1
The following changes since commit 3ccf6cd0e3e1dfd663814640b3b18b55715d7a75:
1
Changes since v1:
2
* Added QEMU_ERROR to wrap __attribute__((error)) -- patch 12.
2
3
3
Merge remote-tracking branch 'remotes/kraxel/tags/audio-20210617-pull-request' into staging (2021-06-18 09:54:42 +0100)
4
5
r~
6
7
8
The following changes since commit 77f7c747193662edfadeeb3118d63eed0eac51a6:
9
10
Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2018-10-17' into staging (2018-10-18 13:40:19 +0100)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210619
14
https://github.com/rth7680/qemu.git tags/pull-tcg-20181018
8
15
9
for you to fetch changes up to 8169ec35eb766a12ad0ae898119060fde148ab61:
16
for you to fetch changes up to 403f290c0603f35f2d09c982bf5549b6d0803ec1:
10
17
11
util/oslib-win32: Fix fatal assertion in qemu_try_memalign (2021-06-19 11:09:11 -0700)
18
cputlb: read CPUTLBEntry.addr_write atomically (2018-10-18 19:46:53 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
TCI cleanup and re-encoding
21
Queued tcg patches.
15
Fixes for #367 and #390.
16
Move TCGCond to tcg/tcg-cond.h.
17
Fix for win32 qemu_try_memalign.
18
22
19
----------------------------------------------------------------
23
----------------------------------------------------------------
20
Alessandro Di Federico (1):
24
Emilio G. Cota (10):
21
tcg: expose TCGCond manipulation routines
25
tcg: access cpu->icount_decr.u16.high with atomics
26
tcg: fix use of uninitialized variable under CONFIG_PROFILER
27
tcg: plug holes in struct TCGProfile
28
tcg: distribute tcg_time into TCG contexts
29
target/alpha: remove tlb_flush from alpha_cpu_initfn
30
target/unicore32: remove tlb_flush from uc32_init_fn
31
exec: introduce tlb_init
32
cputlb: fix assert_cpu_is_self macro
33
cputlb: serialize tlb updates with env->tlb_lock
34
cputlb: read CPUTLBEntry.addr_write atomically
22
35
23
Richard Henderson (31):
36
Richard Henderson (11):
24
tcg: Combine dh_is_64bit and dh_is_signed to dh_typecode
37
tcg: Implement CPU_LOG_TB_NOCHAIN during expansion
25
tcg: Add tcg_call_flags
38
tcg: Add tlb_index and tlb_entry helpers
26
accel/tcg/plugin-gen: Drop inline markers
39
tcg: Split CONFIG_ATOMIC128
27
plugins: Drop tcg_flags from struct qemu_plugin_dyn_cb
40
target/i386: Convert to HAVE_CMPXCHG128
28
accel/tcg: Add tcg call flags to plugins helpers
41
target/arm: Convert to HAVE_CMPXCHG128
29
tcg: Store the TCGHelperInfo in the TCGOp for call
42
target/arm: Check HAVE_CMPXCHG128 at translate time
30
tcg: Add tcg_call_func
43
target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
31
tcg: Build ffi data structures for helpers
44
target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
32
tcg/tci: Improve tcg_target_call_clobber_regs
45
target/s390x: Split do_cdsg, do_lpq, do_stpq
33
tcg/tci: Move call-return regs to end of tcg_target_reg_alloc_order
46
target/s390x: Skip wout, cout helpers if op helper does not return
34
tcg/tci: Use ffi for calls
47
target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
35
tcg/tci: Reserve r13 for a temporary
36
tcg/tci: Emit setcond before brcond
37
tcg/tci: Remove tci_write_reg
38
tcg/tci: Change encoding to uint32_t units
39
tcg/tci: Implement goto_ptr
40
tcg/tci: Implement movcond
41
tcg/tci: Implement andc, orc, eqv, nand, nor
42
tcg/tci: Implement extract, sextract
43
tcg/tci: Implement clz, ctz, ctpop
44
tcg/tci: Implement mulu2, muls2
45
tcg/tci: Implement add2, sub2
46
tcg/tci: Split out tci_qemu_ld, tci_qemu_st
47
Revert "tcg/tci: Use exec/cpu_ldst.h interfaces"
48
tcg/tci: Remove the qemu_ld/st_type macros
49
tcg/tci: Use {set,clear}_helper_retaddr
50
tests/tcg: Increase timeout for TCI
51
accel/tcg: Probe the proper permissions for atomic ops
52
tcg/sparc: Fix temp_allocate_frame vs sparc stack bias
53
tcg: Allocate sufficient storage in temp_allocate_frame
54
tcg: Restart when exhausting the stack frame
55
48
56
Stefan Weil (1):
49
accel/tcg/atomic_template.h | 20 +++-
57
util/oslib-win32: Fix fatal assertion in qemu_try_memalign
50
accel/tcg/softmmu_template.h | 64 +++++-----
51
include/exec/cpu-defs.h | 3 +
52
include/exec/cpu_ldst.h | 30 ++++-
53
include/exec/cpu_ldst_template.h | 25 ++--
54
include/exec/exec-all.h | 8 ++
55
include/qemu/atomic128.h | 153 ++++++++++++++++++++++++
56
include/qemu/compiler.h | 11 ++
57
include/qemu/timer.h | 1 -
58
target/ppc/helper.h | 2 +-
59
tcg/tcg.h | 20 ++--
60
accel/tcg/cpu-exec.c | 2 +-
61
accel/tcg/cputlb.c | 235 +++++++++++++++++++-----------------
62
accel/tcg/tcg-all.c | 2 +-
63
accel/tcg/translate-all.c | 2 +-
64
accel/tcg/user-exec.c | 5 +-
65
cpus.c | 3 +-
66
exec.c | 1 +
67
monitor.c | 13 +-
68
qom/cpu.c | 2 +-
69
target/alpha/cpu.c | 1 -
70
target/arm/helper-a64.c | 251 +++++++++++++++++++--------------------
71
target/arm/translate-a64.c | 38 +++---
72
target/i386/mem_helper.c | 9 +-
73
target/ppc/mem_helper.c | 33 ++++-
74
target/ppc/translate.c | 115 +++++++++---------
75
target/s390x/mem_helper.c | 202 +++++++++++++++----------------
76
target/s390x/translate.c | 45 +++++--
77
target/unicore32/cpu.c | 2 -
78
tcg/tcg-op.c | 9 +-
79
tcg/tcg.c | 25 +++-
80
configure | 19 +++
81
32 files changed, 839 insertions(+), 512 deletions(-)
82
create mode 100644 include/qemu/atomic128.h
58
83
59
configure | 3 +
60
accel/tcg/atomic_template.h | 24 +-
61
accel/tcg/plugin-helpers.h | 5 +-
62
include/exec/helper-head.h | 37 +-
63
include/exec/helper-tcg.h | 34 +-
64
include/qemu/plugin.h | 1 -
65
include/tcg/tcg-cond.h | 101 ++
66
include/tcg/tcg-opc.h | 4 +-
67
include/tcg/tcg.h | 71 +-
68
target/hppa/helper.h | 3 -
69
target/i386/ops_sse_header.h | 3 -
70
target/m68k/helper.h | 1 -
71
target/ppc/helper.h | 3 -
72
tcg/tcg-internal.h | 22 +
73
tcg/tci/tcg-target-con-set.h | 1 +
74
tcg/tci/tcg-target.h | 68 +-
75
accel/tcg/cputlb.c | 95 +-
76
accel/tcg/plugin-gen.c | 20 +-
77
accel/tcg/user-exec.c | 8 +-
78
plugins/core.c | 30 +-
79
tcg/optimize.c | 3 +-
80
tcg/tcg.c | 300 +++--
81
tcg/tci.c | 1203 ++++++++++----------
82
util/oslib-win32.c | 6 +-
83
tcg/sparc/tcg-target.c.inc | 16 +-
84
tcg/tci/tcg-target.c.inc | 550 ++++-----
85
tcg/meson.build | 8 +-
86
tcg/tci/README | 20 +-
87
tests/docker/dockerfiles/alpine.docker | 1 +
88
tests/docker/dockerfiles/centos8.docker | 1 +
89
tests/docker/dockerfiles/debian10.docker | 1 +
90
tests/docker/dockerfiles/fedora-i386-cross.docker | 1 +
91
tests/docker/dockerfiles/fedora-win32-cross.docker | 1 +
92
tests/docker/dockerfiles/fedora-win64-cross.docker | 1 +
93
tests/docker/dockerfiles/fedora.docker | 1 +
94
tests/docker/dockerfiles/ubuntu.docker | 1 +
95
tests/docker/dockerfiles/ubuntu1804.docker | 1 +
96
tests/docker/dockerfiles/ubuntu2004.docker | 1 +
97
tests/tcg/Makefile.target | 6 +-
98
39 files changed, 1454 insertions(+), 1202 deletions(-)
99
create mode 100644 include/tcg/tcg-cond.h
100
diff view generated by jsdifflib
1
This operation is critical to staying within the interpretation
1
Rather than test NOCHAIN before linking, do not emit the
2
loop longer, which avoids the overhead of setup and teardown for
2
goto_tb opcode at all. We already do this for goto_ptr.
3
many TBs.
4
3
5
The check in tcg_prologue_init is disabled because TCI does
6
want to use NULL to indicate exit, as opposed to branching to
7
a real epilogue.
8
9
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
5
---
13
tcg/tci/tcg-target-con-set.h | 1 +
6
accel/tcg/cpu-exec.c | 2 +-
14
tcg/tci/tcg-target.h | 2 +-
7
tcg/tcg-op.c | 9 ++++++++-
15
tcg/tcg.c | 8 +++++++-
8
2 files changed, 9 insertions(+), 2 deletions(-)
16
tcg/tci.c | 19 +++++++++++++++++++
17
tcg/tci/tcg-target.c.inc | 16 ++++++++++++++++
18
5 files changed, 44 insertions(+), 2 deletions(-)
19
9
20
diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
10
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
21
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/tci/tcg-target-con-set.h
12
--- a/accel/tcg/cpu-exec.c
23
+++ b/tcg/tci/tcg-target-con-set.h
13
+++ b/accel/tcg/cpu-exec.c
24
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_find(CPUState *cpu,
25
* Each operand should be a sequence of constraint letters as defined by
26
* tcg-target-con-str.h; the constraint combination is inclusive or.
27
*/
28
+C_O0_I1(r)
29
C_O0_I2(r, r)
30
C_O0_I3(r, r, r)
31
C_O0_I4(r, r, r, r)
32
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tci/tcg-target.h
35
+++ b/tcg/tci/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@
37
#define TCG_TARGET_HAS_muls2_i32 0
38
#define TCG_TARGET_HAS_muluh_i32 0
39
#define TCG_TARGET_HAS_mulsh_i32 0
40
-#define TCG_TARGET_HAS_goto_ptr 0
41
+#define TCG_TARGET_HAS_goto_ptr 1
42
#define TCG_TARGET_HAS_direct_jump 0
43
#define TCG_TARGET_HAS_qemu_st8_i32 0
44
45
diff --git a/tcg/tcg.c b/tcg/tcg.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/tcg.c
48
+++ b/tcg/tcg.c
49
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
50
}
15
}
51
#endif
16
#endif
52
17
/* See if we can patch the calling TB. */
53
- /* Assert that goto_ptr is implemented completely. */
18
- if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
54
+#ifndef CONFIG_TCG_INTERPRETER
19
+ if (last_tb) {
55
+ /*
20
tb_add_jump(last_tb, tb_exit, tb);
56
+ * Assert that goto_ptr is implemented completely, setting an epilogue.
57
+ * For tci, we use NULL as the signal to return from the interpreter,
58
+ * so skip this check.
59
+ */
60
if (TCG_TARGET_HAS_goto_ptr) {
61
tcg_debug_assert(tcg_code_gen_epilogue != NULL);
62
}
21
}
63
+#endif
22
return tb;
23
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg-op.c
26
+++ b/tcg/tcg-op.c
27
@@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
28
seen this numbered exit before, via tcg_gen_goto_tb. */
29
tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
30
#endif
31
+ /* When not chaining, exit without indicating a link. */
32
+ if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
33
+ val = 0;
34
+ }
35
} else {
36
/* This is an exit via the exitreq label. */
37
tcg_debug_assert(idx == TB_EXIT_REQUESTED);
38
@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
39
tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
40
tcg_ctx->goto_tb_issue_mask |= 1 << idx;
41
#endif
42
- tcg_gen_op1i(INDEX_op_goto_tb, idx);
43
+ /* When not chaining, we simply fall through to the "fallback" exit. */
44
+ if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
45
+ tcg_gen_op1i(INDEX_op_goto_tb, idx);
46
+ }
64
}
47
}
65
48
66
void tcg_func_start(TCGContext *s)
49
void tcg_gen_lookup_and_goto_ptr(void)
67
diff --git a/tcg/tci.c b/tcg/tci.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/tcg/tci.c
70
+++ b/tcg/tci.c
71
@@ -XXX,XX +XXX,XX @@ static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0)
72
*l0 = diff ? (void *)tb_ptr + diff : NULL;
73
}
74
75
+static void tci_args_r(uint32_t insn, TCGReg *r0)
76
+{
77
+ *r0 = extract32(insn, 8, 4);
78
+}
79
+
80
static void tci_args_nl(uint32_t insn, const void *tb_ptr,
81
uint8_t *n0, void **l1)
82
{
83
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
84
tb_ptr = *(void **)ptr;
85
break;
86
87
+ case INDEX_op_goto_ptr:
88
+ tci_args_r(insn, &r0);
89
+ ptr = (void *)regs[r0];
90
+ if (!ptr) {
91
+ return 0;
92
+ }
93
+ tb_ptr = ptr;
94
+ break;
95
+
96
case INDEX_op_qemu_ld_i32:
97
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
98
tci_args_rrm(insn, &r0, &r1, &oi);
99
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
100
info->fprintf_func(info->stream, "%-12s %p", op_name, ptr);
101
break;
102
103
+ case INDEX_op_goto_ptr:
104
+ tci_args_r(insn, &r0);
105
+ info->fprintf_func(info->stream, "%-12s %s", op_name, str_r(r0));
106
+ break;
107
+
108
case INDEX_op_call:
109
tci_args_nl(insn, tb_ptr, &len, &ptr);
110
info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr);
111
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
112
index XXXXXXX..XXXXXXX 100644
113
--- a/tcg/tci/tcg-target.c.inc
114
+++ b/tcg/tci/tcg-target.c.inc
115
@@ -XXX,XX +XXX,XX @@
116
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
117
{
118
switch (op) {
119
+ case INDEX_op_goto_ptr:
120
+ return C_O0_I1(r);
121
+
122
case INDEX_op_ld8u_i32:
123
case INDEX_op_ld8s_i32:
124
case INDEX_op_ld16u_i32:
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
126
tcg_out32(s, insn);
127
}
128
129
+static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0)
130
+{
131
+ tcg_insn_unit insn = 0;
132
+
133
+ insn = deposit32(insn, 0, 8, op);
134
+ insn = deposit32(insn, 8, 4, r0);
135
+ tcg_out32(s, insn);
136
+}
137
+
138
static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
139
{
140
tcg_out32(s, (uint8_t)op);
141
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
142
set_jmp_reset_offset(s, args[0]);
143
break;
144
145
+ case INDEX_op_goto_ptr:
146
+ tcg_out_op_r(s, opc, args[0]);
147
+ break;
148
+
149
case INDEX_op_br:
150
tcg_out_op_l(s, opc, arg_label(args[0]));
151
break;
152
--
50
--
153
2.25.1
51
2.17.2
154
52
155
53
diff view generated by jsdifflib
1
This function should have been updated for vector types
1
From: "Emilio G. Cota" <cota@braap.org>
2
when they were introduced.
3
2
4
Fixes: d2fd745fe8b
3
Consistently access u16.high with atomics to avoid
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/367
4
undefined behaviour in MTTCG.
6
Cc: qemu-stable@nongnu.org
5
7
Tested-by: Stefan Weil <sw@weilnetz.de>
6
Note that icount_decr.u16.low is only used in icount mode,
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
so regular accesses to it are OK.
8
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Emilio G. Cota <cota@braap.org>
11
Message-Id: <20181010144853.13005-2-cota@braap.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
13
---
11
tcg/tcg.c | 32 +++++++++++++++++++++++++++-----
14
accel/tcg/tcg-all.c | 2 +-
12
1 file changed, 27 insertions(+), 5 deletions(-)
15
accel/tcg/translate-all.c | 2 +-
16
qom/cpu.c | 2 +-
17
3 files changed, 3 insertions(+), 3 deletions(-)
13
18
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
19
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
21
--- a/accel/tcg/tcg-all.c
17
+++ b/tcg/tcg.c
22
+++ b/accel/tcg/tcg-all.c
18
@@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s)
23
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
19
24
if (!qemu_cpu_is_self(cpu)) {
20
static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
25
qemu_cpu_kick(cpu);
26
} else {
27
- cpu->icount_decr.u16.high = -1;
28
+ atomic_set(&cpu->icount_decr.u16.high, -1);
29
if (use_icount &&
30
!cpu->can_do_io
31
&& (mask & ~old_mask) != 0) {
32
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/accel/tcg/translate-all.c
35
+++ b/accel/tcg/translate-all.c
36
@@ -XXX,XX +XXX,XX @@ void cpu_interrupt(CPUState *cpu, int mask)
21
{
37
{
22
- if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
38
g_assert(qemu_mutex_iothread_locked());
23
- s->frame_end) {
39
cpu->interrupt_request |= mask;
24
- tcg_abort();
40
- cpu->icount_decr.u16.high = -1;
25
+ size_t size, align;
41
+ atomic_set(&cpu->icount_decr.u16.high, -1);
26
+ intptr_t off;
27
+
28
+ switch (ts->type) {
29
+ case TCG_TYPE_I32:
30
+ size = align = 4;
31
+ break;
32
+ case TCG_TYPE_I64:
33
+ case TCG_TYPE_V64:
34
+ size = align = 8;
35
+ break;
36
+ case TCG_TYPE_V128:
37
+ size = align = 16;
38
+ break;
39
+ case TCG_TYPE_V256:
40
+ /* Note that we do not require aligned storage for V256. */
41
+ size = 32, align = 16;
42
+ break;
43
+ default:
44
+ g_assert_not_reached();
45
}
46
- ts->mem_offset = s->current_frame_offset;
47
+
48
+ assert(align <= TCG_TARGET_STACK_ALIGN);
49
+ off = ROUND_UP(s->current_frame_offset, align);
50
+ assert(off + size <= s->frame_end);
51
+ s->current_frame_offset = off + size;
52
+
53
+ ts->mem_offset = off;
54
#if defined(__sparc__)
55
ts->mem_offset += TCG_TARGET_STACK_BIAS;
56
#endif
57
ts->mem_base = s->frame_temp;
58
ts->mem_allocated = 1;
59
- s->current_frame_offset += sizeof(tcg_target_long);
60
}
42
}
61
43
62
static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
44
/*
45
diff --git a/qom/cpu.c b/qom/cpu.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/qom/cpu.c
48
+++ b/qom/cpu.c
49
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(CPUState *cpu)
50
cpu->mem_io_pc = 0;
51
cpu->mem_io_vaddr = 0;
52
cpu->icount_extra = 0;
53
- cpu->icount_decr.u32 = 0;
54
+ atomic_set(&cpu->icount_decr.u32, 0);
55
cpu->can_do_io = 1;
56
cpu->exception_index = -1;
57
cpu->crash_occurred = false;
63
--
58
--
64
2.25.1
59
2.17.2
65
60
66
61
diff view generated by jsdifflib
1
Assume that we'll have fewer temps allocated after
1
From: "Emilio G. Cota" <cota@braap.org>
2
restarting with a fewer number of instructions.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
We forgot to initialize n in commit 15fa08f845 ("tcg: Dynamically
4
allocate TCGOps", 2017-12-29).
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181010144853.13005-3-cota@braap.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
tcg/tcg.c | 6 +++++-
11
tcg/tcg.c | 2 +-
8
1 file changed, 5 insertions(+), 1 deletion(-)
12
1 file changed, 1 insertion(+), 1 deletion(-)
9
13
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
16
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
17
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
18
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
15
19
16
assert(align <= TCG_TARGET_STACK_ALIGN);
20
#ifdef CONFIG_PROFILER
17
off = ROUND_UP(s->current_frame_offset, align);
21
{
18
- assert(off + size <= s->frame_end);
22
- int n;
19
+
23
+ int n = 0;
20
+ /* If we've exhausted the stack frame, restart with a smaller TB. */
24
21
+ if (off + size > s->frame_end) {
25
QTAILQ_FOREACH(op, &s->ops, link) {
22
+ tcg_raise_tb_overflow(s);
26
n++;
23
+ }
24
s->current_frame_offset = off + size;
25
26
ts->mem_offset = off;
27
--
27
--
28
2.25.1
28
2.17.2
29
29
30
30
diff view generated by jsdifflib
1
From: Stefan Weil <sw@weilnetz.de>
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
The function is called with alignment == 0 which caused an assertion.
3
This plugs two 4-byte holes in 64-bit.
4
Use the code from oslib-posix.c to fix that regression.
5
4
6
Fixes: ed6f53f9ca9
5
Signed-off-by: Emilio G. Cota <cota@braap.org>
7
Signed-off-by: Stefan Weil <sw@weilnetz.de>
6
Message-Id: <20181010144853.13005-4-cota@braap.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-Id: <20210611105846.347954-1-sw@weilnetz.de>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
8
---
12
util/oslib-win32.c | 6 +++++-
9
tcg/tcg.h | 2 +-
13
1 file changed, 5 insertions(+), 1 deletion(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
14
11
15
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
12
diff --git a/tcg/tcg.h b/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/util/oslib-win32.c
14
--- a/tcg/tcg.h
18
+++ b/util/oslib-win32.c
15
+++ b/tcg/tcg.h
19
@@ -XXX,XX +XXX,XX @@ void *qemu_try_memalign(size_t alignment, size_t size)
16
@@ -XXX,XX +XXX,XX @@ typedef struct TCGProfile {
20
void *ptr;
17
int64_t tb_count;
21
18
int64_t op_count; /* total insn count */
22
g_assert(size != 0);
19
int op_count_max; /* max insn per TB */
23
- g_assert(is_power_of_2(alignment));
20
- int64_t temp_count;
24
+ if (alignment < sizeof(void *)) {
21
int temp_count_max;
25
+ alignment = sizeof(void *);
22
+ int64_t temp_count;
26
+ } else {
23
int64_t del_op_count;
27
+ g_assert(is_power_of_2(alignment));
24
int64_t code_in_len;
28
+ }
25
int64_t code_out_len;
29
ptr = _aligned_malloc(size, alignment);
30
trace_qemu_memalign(alignment, size, ptr);
31
return ptr;
32
--
26
--
33
2.25.1
27
2.17.2
34
28
35
29
diff view generated by jsdifflib
1
This requires adjusting where arguments are stored.
1
From: "Emilio G. Cota" <cota@braap.org>
2
Place them on the stack at left-aligned positions.
3
Adjust the stack frame to be at entirely positive offsets.
4
2
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
When we implemented per-vCPU TCG contexts, we forgot to also
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
distribute the tcg_time counter, which has remained as a global
5
accessed without any serialization, leading to potentially missed
6
counts.
7
8
Fix it by distributing the field over the TCG contexts, embedding
9
it into TCGProfile with a field called "cpu_exec_time", which is more
10
descriptive than "tcg_time". Add a function to query this value
11
directly, and for completeness, fill in the field in
12
tcg_profile_snapshot, even though its callers do not use it.
13
14
Signed-off-by: Emilio G. Cota <cota@braap.org>
15
Message-Id: <20181010144853.13005-5-cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
17
---
9
include/tcg/tcg.h | 1 +
18
include/qemu/timer.h | 1 -
10
tcg/tci/tcg-target.h | 2 +-
19
tcg/tcg.h | 2 ++
11
tcg/tcg.c | 64 +++++++++++++-----
20
cpus.c | 3 ++-
12
tcg/tci.c | 142 ++++++++++++++++++++++-----------------
21
monitor.c | 13 ++++++++++---
13
tcg/tci/tcg-target.c.inc | 50 +++++++-------
22
tcg/tcg.c | 23 +++++++++++++++++++++++
14
5 files changed, 153 insertions(+), 106 deletions(-)
23
5 files changed, 37 insertions(+), 5 deletions(-)
15
24
16
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
25
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
17
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
18
--- a/include/tcg/tcg.h
27
--- a/include/qemu/timer.h
19
+++ b/include/tcg/tcg.h
28
+++ b/include/qemu/timer.h
29
@@ -XXX,XX +XXX,XX @@ static inline int64_t profile_getclock(void)
30
return get_clock();
31
}
32
33
-extern int64_t tcg_time;
34
extern int64_t dev_time;
35
#endif
36
37
diff --git a/tcg/tcg.h b/tcg/tcg.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tcg/tcg.h
40
+++ b/tcg/tcg.h
41
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOp {
42
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
43
44
typedef struct TCGProfile {
45
+ int64_t cpu_exec_time;
46
int64_t tb_count1;
47
int64_t tb_count;
48
int64_t op_count; /* total insn count */
49
@@ -XXX,XX +XXX,XX @@ int tcg_check_temp_count(void);
50
#define tcg_check_temp_count() 0
51
#endif
52
53
+int64_t tcg_cpu_exec_time(void);
54
void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
55
void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
56
57
diff --git a/cpus.c b/cpus.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/cpus.c
60
+++ b/cpus.c
61
@@ -XXX,XX +XXX,XX @@ static int tcg_cpu_exec(CPUState *cpu)
62
ret = cpu_exec(cpu);
63
cpu_exec_end(cpu);
64
#ifdef CONFIG_PROFILER
65
- tcg_time += profile_getclock() - ti;
66
+ atomic_set(&tcg_ctx->prof.cpu_exec_time,
67
+ tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
68
#endif
69
return ret;
70
}
71
diff --git a/monitor.c b/monitor.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/monitor.c
74
+++ b/monitor.c
20
@@ -XXX,XX +XXX,XX @@
75
@@ -XXX,XX +XXX,XX @@
21
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
76
#include "sysemu/cpus.h"
22
77
#include "sysemu/iothread.h"
23
#define CPU_TEMP_BUF_NLONGS 128
78
#include "qemu/cutils.h"
24
+#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long))
79
+#include "tcg/tcg.h"
25
80
26
/* Default target word size to pointer size. */
81
#if defined(TARGET_S390X)
27
#ifndef TCG_TARGET_REG_BITS
82
#include "hw/s390x/storage-keys.h"
28
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
83
@@ -XXX,XX +XXX,XX @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict)
29
index XXXXXXX..XXXXXXX 100644
84
30
--- a/tcg/tci/tcg-target.h
85
#ifdef CONFIG_PROFILER
31
+++ b/tcg/tci/tcg-target.h
86
32
@@ -XXX,XX +XXX,XX @@ typedef enum {
87
-int64_t tcg_time;
33
88
int64_t dev_time;
34
/* Used for function call generation. */
89
35
#define TCG_TARGET_CALL_STACK_OFFSET 0
90
static void hmp_info_profile(Monitor *mon, const QDict *qdict)
36
-#define TCG_TARGET_STACK_ALIGN 16
91
{
37
+#define TCG_TARGET_STACK_ALIGN 8
92
+ static int64_t last_cpu_exec_time;
38
93
+ int64_t cpu_exec_time;
39
#define HAVE_TCG_QEMU_TB_EXEC
94
+ int64_t delta;
40
95
+
96
+ cpu_exec_time = tcg_cpu_exec_time();
97
+ delta = cpu_exec_time - last_cpu_exec_time;
98
+
99
monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n",
100
dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
101
monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n",
102
- tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND);
103
- tcg_time = 0;
104
+ delta, delta / (double)NANOSECONDS_PER_SECOND);
105
+ last_cpu_exec_time = cpu_exec_time;
106
dev_time = 0;
107
}
108
#else
41
diff --git a/tcg/tcg.c b/tcg/tcg.c
109
diff --git a/tcg/tcg.c b/tcg/tcg.c
42
index XXXXXXX..XXXXXXX 100644
110
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/tcg.c
111
--- a/tcg/tcg.c
44
+++ b/tcg/tcg.c
112
+++ b/tcg/tcg.c
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
46
intptr_t arg2);
47
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
48
TCGReg base, intptr_t ofs);
49
+#ifdef CONFIG_TCG_INTERPRETER
50
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
51
+ ffi_cif *cif);
52
+#else
53
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
54
+#endif
55
static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
56
#ifdef TCG_TARGET_NEED_LDST_LABELS
57
static int tcg_out_ldst_finalize(TCGContext *s);
58
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
59
for (i = 0; i < nargs; i++) {
60
int argtype = extract32(typemask, (i + 1) * 3, 3);
61
bool is_64bit = (argtype & ~1) == dh_typecode_i64;
62
+ bool want_align = false;
63
+
64
+#if defined(CONFIG_TCG_INTERPRETER)
65
+ /*
66
+ * Align all arguments, so that they land in predictable places
67
+ * for passing off to ffi_call.
68
+ */
69
+ want_align = true;
70
+#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
71
+ /* Some targets want aligned 64 bit args */
72
+ want_align = is_64bit;
73
+#endif
74
+
75
+ if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
76
+ op->args[pi++] = TCG_CALL_DUMMY_ARG;
77
+ real_args++;
78
+ }
79
80
if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
81
-#ifdef TCG_TARGET_CALL_ALIGN_ARGS
82
- /* some targets want aligned 64 bit args */
83
- if (real_args & 1) {
84
- op->args[pi++] = TCG_CALL_DUMMY_ARG;
85
- real_args++;
86
- }
87
-#endif
88
- /* If stack grows up, then we will be placing successive
89
- arguments at lower addresses, which means we need to
90
- reverse the order compared to how we would normally
91
- treat either big or little-endian. For those arguments
92
- that will wind up in registers, this still works for
93
- HPPA (the only current STACK_GROWSUP target) since the
94
- argument registers are *also* allocated in decreasing
95
- order. If another such target is added, this logic may
96
- have to get more complicated to differentiate between
97
- stack arguments and register arguments. */
98
+ /*
99
+ * If stack grows up, then we will be placing successive
100
+ * arguments at lower addresses, which means we need to
101
+ * reverse the order compared to how we would normally
102
+ * treat either big or little-endian. For those arguments
103
+ * that will wind up in registers, this still works for
104
+ * HPPA (the only current STACK_GROWSUP target) since the
105
+ * argument registers are *also* allocated in decreasing
106
+ * order. If another such target is added, this logic may
107
+ * have to get more complicated to differentiate between
108
+ * stack arguments and register arguments.
109
+ */
110
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
111
op->args[pi++] = temp_arg(args[i] + 1);
112
op->args[pi++] = temp_arg(args[i]);
113
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
114
const int nb_oargs = TCGOP_CALLO(op);
115
const int nb_iargs = TCGOP_CALLI(op);
116
const TCGLifeData arg_life = op->life;
117
+ const TCGHelperInfo *info;
118
int flags, nb_regs, i;
119
TCGReg reg;
120
TCGArg arg;
121
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
122
TCGRegSet allocated_regs;
123
124
func_addr = tcg_call_func(op);
125
- flags = tcg_call_flags(op);
126
+ info = tcg_call_info(op);
127
+ flags = info->flags;
128
129
nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
130
if (nb_regs > nb_iargs) {
131
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
132
save_globals(s, allocated_regs);
133
}
134
135
+#ifdef CONFIG_TCG_INTERPRETER
136
+ {
137
+ gpointer hash = (gpointer)(uintptr_t)info->typemask;
138
+ ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
139
+ assert(cif != NULL);
140
+ tcg_out_call(s, func_addr, cif);
141
+ }
142
+#else
143
tcg_out_call(s, func_addr);
144
+#endif
145
146
/* assign output registers and emit moves if needed */
147
for(i = 0; i < nb_oargs; i++) {
148
diff --git a/tcg/tci.c b/tcg/tci.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/tcg/tci.c
151
+++ b/tcg/tci.c
152
@@ -XXX,XX +XXX,XX @@
113
@@ -XXX,XX +XXX,XX @@
153
*/
114
/* Define to jump the ELF file used to communicate with GDB. */
154
115
#undef DEBUG_JIT
155
#include "qemu/osdep.h"
116
156
+#include "qemu-common.h"
117
+#include "qemu/error-report.h"
157
+#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */
118
#include "qemu/cutils.h"
158
+#include "exec/cpu_ldst.h"
119
#include "qemu/host-utils.h"
159
+#include "tcg/tcg-op.h"
120
#include "qemu/timer.h"
160
+#include "qemu/compiler.h"
121
@@ -XXX,XX +XXX,XX @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
161
+#include <ffi.h>
122
const TCGProfile *orig = &s->prof;
162
123
163
-/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
124
if (counters) {
164
- * Without assertions, the interpreter runs much faster. */
125
+ PROF_ADD(prof, orig, cpu_exec_time);
165
+
126
PROF_ADD(prof, orig, tb_count1);
166
+/*
127
PROF_ADD(prof, orig, tb_count);
167
+ * Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
128
PROF_ADD(prof, orig, op_count);
168
+ * Without assertions, the interpreter runs much faster.
129
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
169
+ */
130
prof.table_op_count[i]);
170
#if defined(CONFIG_DEBUG_TCG)
171
# define tci_assert(cond) assert(cond)
172
#else
173
# define tci_assert(cond) ((void)(cond))
174
#endif
175
176
-#include "qemu-common.h"
177
-#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */
178
-#include "exec/cpu_ldst.h"
179
-#include "tcg/tcg-op.h"
180
-#include "qemu/compiler.h"
181
-
182
-#if MAX_OPC_PARAM_IARGS != 6
183
-# error Fix needed, number of supported input arguments changed!
184
-#endif
185
-#if TCG_TARGET_REG_BITS == 32
186
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
187
- tcg_target_ulong, tcg_target_ulong,
188
- tcg_target_ulong, tcg_target_ulong,
189
- tcg_target_ulong, tcg_target_ulong,
190
- tcg_target_ulong, tcg_target_ulong,
191
- tcg_target_ulong, tcg_target_ulong);
192
-#else
193
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
194
- tcg_target_ulong, tcg_target_ulong,
195
- tcg_target_ulong, tcg_target_ulong);
196
-#endif
197
-
198
__thread uintptr_t tci_tb_ptr;
199
200
-static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
201
-{
202
- tci_assert(index < TCG_TARGET_NB_REGS);
203
- return regs[index];
204
-}
205
-
206
static void
207
tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
208
{
209
@@ -XXX,XX +XXX,XX @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
210
* I = immediate (tcg_target_ulong)
211
* l = label or pointer
212
* m = immediate (TCGMemOpIdx)
213
+ * n = immediate (call return length)
214
* r = register
215
* s = signed ldst offset
216
*/
217
@@ -XXX,XX +XXX,XX @@ static void tci_args_l(const uint8_t **tb_ptr, void **l0)
218
check_size(start, tb_ptr);
219
}
220
221
+static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0,
222
+ void **l1, void **l2)
223
+{
224
+ const uint8_t *start = *tb_ptr;
225
+
226
+ *n0 = tci_read_b(tb_ptr);
227
+ *l1 = (void *)tci_read_label(tb_ptr);
228
+ *l2 = (void *)tci_read_label(tb_ptr);
229
+
230
+ check_size(start, tb_ptr);
231
+}
232
+
233
static void tci_args_rr(const uint8_t **tb_ptr,
234
TCGReg *r0, TCGReg *r1)
235
{
236
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
237
{
238
const uint8_t *tb_ptr = v_tb_ptr;
239
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
240
- long tcg_temps[CPU_TEMP_BUF_NLONGS];
241
- uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
242
+ uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
243
+ / sizeof(uint64_t)];
244
+ void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
245
246
regs[TCG_AREG0] = (tcg_target_ulong)env;
247
- regs[TCG_REG_CALL_STACK] = sp_value;
248
+ regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
249
+ /* Other call_slots entries initialized at first use (see below). */
250
+ call_slots[0] = NULL;
251
tci_assert(tb_ptr);
252
253
for (;;) {
254
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
255
#endif
256
TCGMemOpIdx oi;
257
int32_t ofs;
258
- void *ptr;
259
+ void *ptr, *cif;
260
261
/* Skip opcode and size entry. */
262
tb_ptr += 2;
263
264
switch (opc) {
265
case INDEX_op_call:
266
- tci_args_l(&tb_ptr, &ptr);
267
+ /*
268
+ * Set up the ffi_avalue array once, delayed until now
269
+ * because many TB's do not make any calls. In tcg_gen_callN,
270
+ * we arranged for every real argument to be "left-aligned"
271
+ * in each 64-bit slot.
272
+ */
273
+ if (unlikely(call_slots[0] == NULL)) {
274
+ for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
275
+ call_slots[i] = &stack[i];
276
+ }
277
+ }
278
+
279
+ tci_args_nll(&tb_ptr, &len, &ptr, &cif);
280
+
281
+ /* Helper functions may need to access the "return address" */
282
tci_tb_ptr = (uintptr_t)tb_ptr;
283
-#if TCG_TARGET_REG_BITS == 32
284
- tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
285
- tci_read_reg(regs, TCG_REG_R1),
286
- tci_read_reg(regs, TCG_REG_R2),
287
- tci_read_reg(regs, TCG_REG_R3),
288
- tci_read_reg(regs, TCG_REG_R4),
289
- tci_read_reg(regs, TCG_REG_R5),
290
- tci_read_reg(regs, TCG_REG_R6),
291
- tci_read_reg(regs, TCG_REG_R7),
292
- tci_read_reg(regs, TCG_REG_R8),
293
- tci_read_reg(regs, TCG_REG_R9),
294
- tci_read_reg(regs, TCG_REG_R10),
295
- tci_read_reg(regs, TCG_REG_R11));
296
- tci_write_reg(regs, TCG_REG_R0, tmp64);
297
- tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32);
298
-#else
299
- tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
300
- tci_read_reg(regs, TCG_REG_R1),
301
- tci_read_reg(regs, TCG_REG_R2),
302
- tci_read_reg(regs, TCG_REG_R3),
303
- tci_read_reg(regs, TCG_REG_R4),
304
- tci_read_reg(regs, TCG_REG_R5));
305
- tci_write_reg(regs, TCG_REG_R0, tmp64);
306
-#endif
307
+
308
+ ffi_call(cif, ptr, stack, call_slots);
309
+
310
+ /* Any result winds up "left-aligned" in the stack[0] slot. */
311
+ switch (len) {
312
+ case 0: /* void */
313
+ break;
314
+ case 1: /* uint32_t */
315
+ /*
316
+ * Note that libffi has an odd special case in that it will
317
+ * always widen an integral result to ffi_arg.
318
+ */
319
+ if (sizeof(ffi_arg) == 4) {
320
+ regs[TCG_REG_R0] = *(uint32_t *)stack;
321
+ break;
322
+ }
323
+ /* fall through */
324
+ case 2: /* uint64_t */
325
+ if (TCG_TARGET_REG_BITS == 32) {
326
+ tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
327
+ } else {
328
+ regs[TCG_REG_R0] = stack[0];
329
+ }
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
+ }
334
break;
335
+
336
case INDEX_op_br:
337
tci_args_l(&tb_ptr, &ptr);
338
tb_ptr = ptr;
339
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
340
TCGCond c;
341
TCGMemOpIdx oi;
342
uint8_t pos, len;
343
- void *ptr;
344
+ void *ptr, *cif;
345
const uint8_t *tb_ptr;
346
347
status = info->read_memory_func(addr, buf, 2, info);
348
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
349
350
switch (op) {
351
case INDEX_op_br:
352
- case INDEX_op_call:
353
case INDEX_op_exit_tb:
354
case INDEX_op_goto_tb:
355
tci_args_l(&tb_ptr, &ptr);
356
info->fprintf_func(info->stream, "%-12s %p", op_name, ptr);
357
break;
358
359
+ case INDEX_op_call:
360
+ tci_args_nll(&tb_ptr, &len, &ptr, &cif);
361
+ info->fprintf_func(info->stream, "%-12s %d, %p, %p",
362
+ op_name, len, ptr, cif);
363
+ break;
364
+
365
case INDEX_op_brcond_i32:
366
case INDEX_op_brcond_i64:
367
tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr);
368
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
369
index XXXXXXX..XXXXXXX 100644
370
--- a/tcg/tci/tcg-target.c.inc
371
+++ b/tcg/tci/tcg-target.c.inc
372
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
373
# error Fix needed, number of supported input arguments changed!
374
#endif
375
376
-static const int tcg_target_call_iarg_regs[] = {
377
- TCG_REG_R0,
378
- TCG_REG_R1,
379
- TCG_REG_R2,
380
- TCG_REG_R3,
381
- TCG_REG_R4,
382
- TCG_REG_R5,
383
-#if TCG_TARGET_REG_BITS == 32
384
- /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
385
- TCG_REG_R6,
386
- TCG_REG_R7,
387
- TCG_REG_R8,
388
- TCG_REG_R9,
389
- TCG_REG_R10,
390
- TCG_REG_R11,
391
-#endif
392
-};
393
+/* No call arguments via registers. All will be stored on the "stack". */
394
+static const int tcg_target_call_iarg_regs[] = { };
395
396
static const int tcg_target_call_oarg_regs[] = {
397
TCG_REG_R0,
398
@@ -XXX,XX +XXX,XX @@ static void tci_out_label(TCGContext *s, TCGLabel *label)
399
static void stack_bounds_check(TCGReg base, target_long offset)
400
{
401
if (base == TCG_REG_CALL_STACK) {
402
- tcg_debug_assert(offset < 0);
403
- tcg_debug_assert(offset >= -(CPU_TEMP_BUF_NLONGS * sizeof(long)));
404
+ tcg_debug_assert(offset >= 0);
405
+ tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE +
406
+ TCG_STATIC_FRAME_SIZE));
407
}
131
}
408
}
132
}
409
133
+
410
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
134
+int64_t tcg_cpu_exec_time(void)
411
}
135
+{
136
+ unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
137
+ unsigned int i;
138
+ int64_t ret = 0;
139
+
140
+ for (i = 0; i < n_ctxs; i++) {
141
+ const TCGContext *s = atomic_read(&tcg_ctxs[i]);
142
+ const TCGProfile *prof = &s->prof;
143
+
144
+ ret += atomic_read(&prof->cpu_exec_time);
145
+ }
146
+ return ret;
147
+}
148
#else
149
void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
150
{
151
cpu_fprintf(f, "[TCG profiler not compiled]\n");
412
}
152
}
413
414
-static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
415
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
416
+ ffi_cif *cif)
417
{
418
uint8_t *old_code_ptr = s->code_ptr;
419
+ uint8_t which;
420
+
153
+
421
+ if (cif->rtype == &ffi_type_void) {
154
+int64_t tcg_cpu_exec_time(void)
422
+ which = 0;
155
+{
423
+ } else if (cif->rtype->size == 4) {
156
+ error_report("%s: TCG profiler not compiled", __func__);
424
+ which = 1;
157
+ exit(EXIT_FAILURE);
425
+ } else {
158
+}
426
+ tcg_debug_assert(cif->rtype->size == 8);
159
#endif
427
+ which = 2;
160
428
+ }
161
429
tcg_out_op_t(s, INDEX_op_call);
430
- tcg_out_i(s, (uintptr_t)arg);
431
+ tcg_out8(s, which);
432
+ tcg_out_i(s, (uintptr_t)func);
433
+ tcg_out_i(s, (uintptr_t)cif);
434
+
435
old_code_ptr[1] = s->code_ptr - old_code_ptr;
436
}
437
438
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
439
s->reserved_regs = 0;
440
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
441
442
- /* We use negative offsets from "sp" so that we can distinguish
443
- stores that might pretend to be call arguments. */
444
- tcg_set_frame(s, TCG_REG_CALL_STACK,
445
- -CPU_TEMP_BUF_NLONGS * sizeof(long),
446
- CPU_TEMP_BUF_NLONGS * sizeof(long));
447
+ /* The call arguments come first, followed by the temp storage. */
448
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
449
+ TCG_STATIC_FRAME_SIZE);
450
}
451
452
/* Generate global QEMU prologue and epilogue code. */
453
--
162
--
454
2.25.1
163
2.17.2
455
164
456
165
diff view generated by jsdifflib
1
From: Alessandro Di Federico <ale@rev.ng>
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
This commit moves into a separate file routines used to manipulate
3
As far as I can tell tlb_flush does not need to be called
4
TCGCond. These will be employed by the idef-parser.
4
this early. tlb_flush is eventually called after the CPU
5
has been realized.
5
6
6
Signed-off-by: Alessandro Di Federico <ale@rev.ng>
7
This change paves the way to the introduction of tlb_init,
7
Signed-off-by: Paolo Montesel <babush@rev.ng>
8
which will be called from cpu_exec_realizefn.
8
Message-Id: <20210619093713.1845446-2-ale.qemu@rev.ng>
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Emilio G. Cota <cota@braap.org>
13
Message-Id: <20181009174557.16125-2-cota@braap.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
15
---
11
include/tcg/tcg-cond.h | 101 +++++++++++++++++++++++++++++++++++++++++
16
target/alpha/cpu.c | 1 -
12
include/tcg/tcg.h | 70 +---------------------------
17
1 file changed, 1 deletion(-)
13
2 files changed, 102 insertions(+), 69 deletions(-)
14
create mode 100644 include/tcg/tcg-cond.h
15
18
16
diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h
19
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
17
new file mode 100644
18
index XXXXXXX..XXXXXXX
19
--- /dev/null
20
+++ b/include/tcg/tcg-cond.h
21
@@ -XXX,XX +XXX,XX @@
22
+/*
23
+ * Tiny Code Generator for QEMU
24
+ *
25
+ * Copyright (c) 2008 Fabrice Bellard
26
+ *
27
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
28
+ * of this software and associated documentation files (the "Software"), to deal
29
+ * in the Software without restriction, including without limitation the rights
30
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31
+ * copies of the Software, and to permit persons to whom the Software is
32
+ * furnished to do so, subject to the following conditions:
33
+ *
34
+ * The above copyright notice and this permission notice shall be included in
35
+ * all copies or substantial portions of the Software.
36
+ *
37
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
40
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
43
+ * THE SOFTWARE.
44
+ */
45
+
46
+#ifndef TCG_COND_H
47
+#define TCG_COND_H
48
+
49
+/*
50
+ * Conditions. Note that these are laid out for easy manipulation by
51
+ * the functions below:
52
+ * bit 0 is used for inverting;
53
+ * bit 1 is signed,
54
+ * bit 2 is unsigned,
55
+ * bit 3 is used with bit 0 for swapping signed/unsigned.
56
+ */
57
+typedef enum {
58
+ /* non-signed */
59
+ TCG_COND_NEVER = 0 | 0 | 0 | 0,
60
+ TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
61
+ TCG_COND_EQ = 8 | 0 | 0 | 0,
62
+ TCG_COND_NE = 8 | 0 | 0 | 1,
63
+ /* signed */
64
+ TCG_COND_LT = 0 | 0 | 2 | 0,
65
+ TCG_COND_GE = 0 | 0 | 2 | 1,
66
+ TCG_COND_LE = 8 | 0 | 2 | 0,
67
+ TCG_COND_GT = 8 | 0 | 2 | 1,
68
+ /* unsigned */
69
+ TCG_COND_LTU = 0 | 4 | 0 | 0,
70
+ TCG_COND_GEU = 0 | 4 | 0 | 1,
71
+ TCG_COND_LEU = 8 | 4 | 0 | 0,
72
+ TCG_COND_GTU = 8 | 4 | 0 | 1,
73
+} TCGCond;
74
+
75
+/* Invert the sense of the comparison. */
76
+static inline TCGCond tcg_invert_cond(TCGCond c)
77
+{
78
+ return (TCGCond)(c ^ 1);
79
+}
80
+
81
+/* Swap the operands in a comparison. */
82
+static inline TCGCond tcg_swap_cond(TCGCond c)
83
+{
84
+ return c & 6 ? (TCGCond)(c ^ 9) : c;
85
+}
86
+
87
+/* Create an "unsigned" version of a "signed" comparison. */
88
+static inline TCGCond tcg_unsigned_cond(TCGCond c)
89
+{
90
+ return c & 2 ? (TCGCond)(c ^ 6) : c;
91
+}
92
+
93
+/* Create a "signed" version of an "unsigned" comparison. */
94
+static inline TCGCond tcg_signed_cond(TCGCond c)
95
+{
96
+ return c & 4 ? (TCGCond)(c ^ 6) : c;
97
+}
98
+
99
+/* Must a comparison be considered unsigned? */
100
+static inline bool is_unsigned_cond(TCGCond c)
101
+{
102
+ return (c & 4) != 0;
103
+}
104
+
105
+/*
106
+ * Create a "high" version of a double-word comparison.
107
+ * This removes equality from a LTE or GTE comparison.
108
+ */
109
+static inline TCGCond tcg_high_cond(TCGCond c)
110
+{
111
+ switch (c) {
112
+ case TCG_COND_GE:
113
+ case TCG_COND_LE:
114
+ case TCG_COND_GEU:
115
+ case TCG_COND_LEU:
116
+ return (TCGCond)(c ^ 8);
117
+ default:
118
+ return c;
119
+ }
120
+}
121
+
122
+#endif /* TCG_COND_H */
123
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
124
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
125
--- a/include/tcg/tcg.h
21
--- a/target/alpha/cpu.c
126
+++ b/include/tcg/tcg.h
22
+++ b/target/alpha/cpu.c
127
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj)
128
#include "tcg/tcg-mo.h"
24
CPUAlphaState *env = &cpu->env;
129
#include "tcg-target.h"
25
130
#include "qemu/int128.h"
26
cs->env_ptr = env;
131
+#include "tcg/tcg-cond.h"
27
- tlb_flush(cs);
132
28
133
/* XXX: make safe guess about sizes */
29
env->lock_addr = -1;
134
#define MAX_OP_PER_INSTR 266
30
#if defined(CONFIG_USER_ONLY)
135
@@ -XXX,XX +XXX,XX @@ typedef TCGv_ptr TCGv_env;
136
/* Used to align parameters. See the comment before tcgv_i32_temp. */
137
#define TCG_CALL_DUMMY_ARG ((TCGArg)0)
138
139
-/* Conditions. Note that these are laid out for easy manipulation by
140
- the functions below:
141
- bit 0 is used for inverting;
142
- bit 1 is signed,
143
- bit 2 is unsigned,
144
- bit 3 is used with bit 0 for swapping signed/unsigned. */
145
-typedef enum {
146
- /* non-signed */
147
- TCG_COND_NEVER = 0 | 0 | 0 | 0,
148
- TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
149
- TCG_COND_EQ = 8 | 0 | 0 | 0,
150
- TCG_COND_NE = 8 | 0 | 0 | 1,
151
- /* signed */
152
- TCG_COND_LT = 0 | 0 | 2 | 0,
153
- TCG_COND_GE = 0 | 0 | 2 | 1,
154
- TCG_COND_LE = 8 | 0 | 2 | 0,
155
- TCG_COND_GT = 8 | 0 | 2 | 1,
156
- /* unsigned */
157
- TCG_COND_LTU = 0 | 4 | 0 | 0,
158
- TCG_COND_GEU = 0 | 4 | 0 | 1,
159
- TCG_COND_LEU = 8 | 4 | 0 | 0,
160
- TCG_COND_GTU = 8 | 4 | 0 | 1,
161
-} TCGCond;
162
-
163
-/* Invert the sense of the comparison. */
164
-static inline TCGCond tcg_invert_cond(TCGCond c)
165
-{
166
- return (TCGCond)(c ^ 1);
167
-}
168
-
169
-/* Swap the operands in a comparison. */
170
-static inline TCGCond tcg_swap_cond(TCGCond c)
171
-{
172
- return c & 6 ? (TCGCond)(c ^ 9) : c;
173
-}
174
-
175
-/* Create an "unsigned" version of a "signed" comparison. */
176
-static inline TCGCond tcg_unsigned_cond(TCGCond c)
177
-{
178
- return c & 2 ? (TCGCond)(c ^ 6) : c;
179
-}
180
-
181
-/* Create a "signed" version of an "unsigned" comparison. */
182
-static inline TCGCond tcg_signed_cond(TCGCond c)
183
-{
184
- return c & 4 ? (TCGCond)(c ^ 6) : c;
185
-}
186
-
187
-/* Must a comparison be considered unsigned? */
188
-static inline bool is_unsigned_cond(TCGCond c)
189
-{
190
- return (c & 4) != 0;
191
-}
192
-
193
-/* Create a "high" version of a double-word comparison.
194
- This removes equality from a LTE or GTE comparison. */
195
-static inline TCGCond tcg_high_cond(TCGCond c)
196
-{
197
- switch (c) {
198
- case TCG_COND_GE:
199
- case TCG_COND_LE:
200
- case TCG_COND_GEU:
201
- case TCG_COND_LEU:
202
- return (TCGCond)(c ^ 8);
203
- default:
204
- return c;
205
- }
206
-}
207
-
208
typedef enum TCGTempVal {
209
TEMP_VAL_DEAD,
210
TEMP_VAL_REG,
211
--
31
--
212
2.25.1
32
2.17.2
213
33
214
34
diff view generated by jsdifflib
1
These macros are only used in one place. By expanding,
1
From: "Emilio G. Cota" <cota@braap.org>
2
we get to apply some common-subexpression elimination
3
and create some local variables.
4
2
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
As far as I can tell tlb_flush does not need to be called
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
this early. tlb_flush is eventually called after the CPU
5
has been realized.
6
7
This change paves the way to the introduction of tlb_init,
8
which will be called from cpu_exec_realizefn.
9
10
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Emilio G. Cota <cota@braap.org>
14
Message-Id: <20181009174557.16125-3-cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
16
---
9
tcg/tci.c | 165 +++++++++++++++++++++++++++++++++---------------------
17
target/unicore32/cpu.c | 2 --
10
1 file changed, 100 insertions(+), 65 deletions(-)
18
1 file changed, 2 deletions(-)
11
19
12
diff --git a/tcg/tci.c b/tcg/tci.c
20
diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c
13
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tci.c
22
--- a/target/unicore32/cpu.c
15
+++ b/tcg/tci.c
23
+++ b/target/unicore32/cpu.c
16
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
24
@@ -XXX,XX +XXX,XX @@ static void uc32_cpu_initfn(Object *obj)
17
return result;
25
env->uncached_asr = ASR_MODE_PRIV;
26
env->regs[31] = 0x03000000;
27
#endif
28
-
29
- tlb_flush(cs);
18
}
30
}
19
31
20
-#ifdef CONFIG_SOFTMMU
32
static const VMStateDescription vmstate_uc32_cpu = {
21
-# define qemu_ld_ub \
22
- helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
23
-# define qemu_ld_leuw \
24
- helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
25
-# define qemu_ld_leul \
26
- helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
27
-# define qemu_ld_leq \
28
- helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
29
-# define qemu_ld_beuw \
30
- helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
31
-# define qemu_ld_beul \
32
- helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
33
-# define qemu_ld_beq \
34
- helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
35
-# define qemu_st_b(X) \
36
- helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
37
-# define qemu_st_lew(X) \
38
- helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
39
-# define qemu_st_lel(X) \
40
- helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
41
-# define qemu_st_leq(X) \
42
- helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
43
-# define qemu_st_bew(X) \
44
- helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
45
-# define qemu_st_bel(X) \
46
- helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
47
-# define qemu_st_beq(X) \
48
- helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
49
-#else
50
-# define qemu_ld_ub ldub_p(g2h(env_cpu(env), taddr))
51
-# define qemu_ld_leuw lduw_le_p(g2h(env_cpu(env), taddr))
52
-# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(env_cpu(env), taddr))
53
-# define qemu_ld_leq ldq_le_p(g2h(env_cpu(env), taddr))
54
-# define qemu_ld_beuw lduw_be_p(g2h(env_cpu(env), taddr))
55
-# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(env_cpu(env), taddr))
56
-# define qemu_ld_beq ldq_be_p(g2h(env_cpu(env), taddr))
57
-# define qemu_st_b(X) stb_p(g2h(env_cpu(env), taddr), X)
58
-# define qemu_st_lew(X) stw_le_p(g2h(env_cpu(env), taddr), X)
59
-# define qemu_st_lel(X) stl_le_p(g2h(env_cpu(env), taddr), X)
60
-# define qemu_st_leq(X) stq_le_p(g2h(env_cpu(env), taddr), X)
61
-# define qemu_st_bew(X) stw_be_p(g2h(env_cpu(env), taddr), X)
62
-# define qemu_st_bel(X) stl_be_p(g2h(env_cpu(env), taddr), X)
63
-# define qemu_st_beq(X) stq_be_p(g2h(env_cpu(env), taddr), X)
64
-#endif
65
-
66
static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
67
TCGMemOpIdx oi, const void *tb_ptr)
68
{
69
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
70
71
+#ifdef CONFIG_SOFTMMU
72
+ uintptr_t ra = (uintptr_t)tb_ptr;
73
+
74
switch (mop) {
75
case MO_UB:
76
- return qemu_ld_ub;
77
+ return helper_ret_ldub_mmu(env, taddr, oi, ra);
78
case MO_SB:
79
- return (int8_t)qemu_ld_ub;
80
+ return helper_ret_ldsb_mmu(env, taddr, oi, ra);
81
case MO_LEUW:
82
- return qemu_ld_leuw;
83
+ return helper_le_lduw_mmu(env, taddr, oi, ra);
84
case MO_LESW:
85
- return (int16_t)qemu_ld_leuw;
86
+ return helper_le_ldsw_mmu(env, taddr, oi, ra);
87
case MO_LEUL:
88
- return qemu_ld_leul;
89
+ return helper_le_ldul_mmu(env, taddr, oi, ra);
90
case MO_LESL:
91
- return (int32_t)qemu_ld_leul;
92
+ return helper_le_ldsl_mmu(env, taddr, oi, ra);
93
case MO_LEQ:
94
- return qemu_ld_leq;
95
+ return helper_le_ldq_mmu(env, taddr, oi, ra);
96
case MO_BEUW:
97
- return qemu_ld_beuw;
98
+ return helper_be_lduw_mmu(env, taddr, oi, ra);
99
case MO_BESW:
100
- return (int16_t)qemu_ld_beuw;
101
+ return helper_be_ldsw_mmu(env, taddr, oi, ra);
102
case MO_BEUL:
103
- return qemu_ld_beul;
104
+ return helper_be_ldul_mmu(env, taddr, oi, ra);
105
case MO_BESL:
106
- return (int32_t)qemu_ld_beul;
107
+ return helper_be_ldsl_mmu(env, taddr, oi, ra);
108
case MO_BEQ:
109
- return qemu_ld_beq;
110
+ return helper_be_ldq_mmu(env, taddr, oi, ra);
111
default:
112
g_assert_not_reached();
113
}
114
+#else
115
+ void *haddr = g2h(env_cpu(env), taddr);
116
+ uint64_t ret;
117
+
118
+ switch (mop) {
119
+ case MO_UB:
120
+ ret = ldub_p(haddr);
121
+ break;
122
+ case MO_SB:
123
+ ret = ldsb_p(haddr);
124
+ break;
125
+ case MO_LEUW:
126
+ ret = lduw_le_p(haddr);
127
+ break;
128
+ case MO_LESW:
129
+ ret = ldsw_le_p(haddr);
130
+ break;
131
+ case MO_LEUL:
132
+ ret = (uint32_t)ldl_le_p(haddr);
133
+ break;
134
+ case MO_LESL:
135
+ ret = (int32_t)ldl_le_p(haddr);
136
+ break;
137
+ case MO_LEQ:
138
+ ret = ldq_le_p(haddr);
139
+ break;
140
+ case MO_BEUW:
141
+ ret = lduw_be_p(haddr);
142
+ break;
143
+ case MO_BESW:
144
+ ret = ldsw_be_p(haddr);
145
+ break;
146
+ case MO_BEUL:
147
+ ret = (uint32_t)ldl_be_p(haddr);
148
+ break;
149
+ case MO_BESL:
150
+ ret = (int32_t)ldl_be_p(haddr);
151
+ break;
152
+ case MO_BEQ:
153
+ ret = ldq_be_p(haddr);
154
+ break;
155
+ default:
156
+ g_assert_not_reached();
157
+ }
158
+ return ret;
159
+#endif
160
}
161
162
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
163
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
164
{
165
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
166
167
+#ifdef CONFIG_SOFTMMU
168
+ uintptr_t ra = (uintptr_t)tb_ptr;
169
+
170
switch (mop) {
171
case MO_UB:
172
- qemu_st_b(val);
173
+ helper_ret_stb_mmu(env, taddr, val, oi, ra);
174
break;
175
case MO_LEUW:
176
- qemu_st_lew(val);
177
+ helper_le_stw_mmu(env, taddr, val, oi, ra);
178
break;
179
case MO_LEUL:
180
- qemu_st_lel(val);
181
+ helper_le_stl_mmu(env, taddr, val, oi, ra);
182
break;
183
case MO_LEQ:
184
- qemu_st_leq(val);
185
+ helper_le_stq_mmu(env, taddr, val, oi, ra);
186
break;
187
case MO_BEUW:
188
- qemu_st_bew(val);
189
+ helper_be_stw_mmu(env, taddr, val, oi, ra);
190
break;
191
case MO_BEUL:
192
- qemu_st_bel(val);
193
+ helper_be_stl_mmu(env, taddr, val, oi, ra);
194
break;
195
case MO_BEQ:
196
- qemu_st_beq(val);
197
+ helper_be_stq_mmu(env, taddr, val, oi, ra);
198
break;
199
default:
200
g_assert_not_reached();
201
}
202
+#else
203
+ void *haddr = g2h(env_cpu(env), taddr);
204
+
205
+ switch (mop) {
206
+ case MO_UB:
207
+ stb_p(haddr, val);
208
+ break;
209
+ case MO_LEUW:
210
+ stw_le_p(haddr, val);
211
+ break;
212
+ case MO_LEUL:
213
+ stl_le_p(haddr, val);
214
+ break;
215
+ case MO_LEQ:
216
+ stq_le_p(haddr, val);
217
+ break;
218
+ case MO_BEUW:
219
+ stw_be_p(haddr, val);
220
+ break;
221
+ case MO_BEUL:
222
+ stl_be_p(haddr, val);
223
+ break;
224
+ case MO_BEQ:
225
+ stq_be_p(haddr, val);
226
+ break;
227
+ default:
228
+ g_assert_not_reached();
229
+ }
230
+#endif
231
}
232
233
#if TCG_TARGET_REG_BITS == 64
234
--
33
--
235
2.25.1
34
2.17.2
236
35
237
36
diff view generated by jsdifflib
1
We can share this code between 32-bit and 64-bit loads and stores.
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Paves the way for the addition of a per-TLB lock.
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181009174557.16125-4-cota@braap.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
tcg/tci.c | 183 +++++++++++++++++++++---------------------------------
11
include/exec/exec-all.h | 8 ++++++++
8
1 file changed, 71 insertions(+), 112 deletions(-)
12
accel/tcg/cputlb.c | 4 ++++
13
exec.c | 1 +
14
3 files changed, 13 insertions(+)
9
15
10
diff --git a/tcg/tci.c b/tcg/tci.c
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
11
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tci.c
18
--- a/include/exec/exec-all.h
13
+++ b/tcg/tci.c
19
+++ b/include/exec/exec-all.h
14
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
20
@@ -XXX,XX +XXX,XX @@ void cpu_address_space_init(CPUState *cpu, int asidx,
15
#define qemu_st_beq(X) \
21
16
cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
22
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
17
23
/* cputlb.c */
18
+static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
24
+/**
19
+ TCGMemOpIdx oi, const void *tb_ptr)
25
+ * tlb_init - initialize a CPU's TLB
26
+ * @cpu: CPU whose TLB should be initialized
27
+ */
28
+void tlb_init(CPUState *cpu);
29
/**
30
* tlb_flush_page:
31
* @cpu: CPU whose TLB should be flushed
32
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
33
void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
34
uintptr_t retaddr);
35
#else
36
+static inline void tlb_init(CPUState *cpu)
20
+{
37
+{
21
+ MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
38
+}
22
+
39
static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
23
+ switch (mop) {
40
{
24
+ case MO_UB:
41
}
25
+ return qemu_ld_ub;
42
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
26
+ case MO_SB:
43
index XXXXXXX..XXXXXXX 100644
27
+ return (int8_t)qemu_ld_ub;
44
--- a/accel/tcg/cputlb.c
28
+ case MO_LEUW:
45
+++ b/accel/tcg/cputlb.c
29
+ return qemu_ld_leuw;
46
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
30
+ case MO_LESW:
47
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
31
+ return (int16_t)qemu_ld_leuw;
48
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
32
+ case MO_LEUL:
49
33
+ return qemu_ld_leul;
50
+void tlb_init(CPUState *cpu)
34
+ case MO_LESL:
51
+{
35
+ return (int32_t)qemu_ld_leul;
36
+ case MO_LEQ:
37
+ return qemu_ld_leq;
38
+ case MO_BEUW:
39
+ return qemu_ld_beuw;
40
+ case MO_BESW:
41
+ return (int16_t)qemu_ld_beuw;
42
+ case MO_BEUL:
43
+ return qemu_ld_beul;
44
+ case MO_BESL:
45
+ return (int32_t)qemu_ld_beul;
46
+ case MO_BEQ:
47
+ return qemu_ld_beq;
48
+ default:
49
+ g_assert_not_reached();
50
+ }
51
+}
52
+}
52
+
53
+
53
+static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
54
/* flush_all_helper: run fn across all cpus
54
+ TCGMemOpIdx oi, const void *tb_ptr)
55
*
55
+{
56
* If the wait flag is set then the src cpu's helper will be queued as
56
+ MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
57
diff --git a/exec.c b/exec.c
57
+
58
index XXXXXXX..XXXXXXX 100644
58
+ switch (mop) {
59
--- a/exec.c
59
+ case MO_UB:
60
+++ b/exec.c
60
+ qemu_st_b(val);
61
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
61
+ break;
62
tcg_target_initialized = true;
62
+ case MO_LEUW:
63
cc->tcg_initialize();
63
+ qemu_st_lew(val);
64
}
64
+ break;
65
+ tlb_init(cpu);
65
+ case MO_LEUL:
66
66
+ qemu_st_lel(val);
67
#ifndef CONFIG_USER_ONLY
67
+ break;
68
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
68
+ case MO_LEQ:
69
+ qemu_st_leq(val);
70
+ break;
71
+ case MO_BEUW:
72
+ qemu_st_bew(val);
73
+ break;
74
+ case MO_BEUL:
75
+ qemu_st_bel(val);
76
+ break;
77
+ case MO_BEQ:
78
+ qemu_st_beq(val);
79
+ break;
80
+ default:
81
+ g_assert_not_reached();
82
+ }
83
+}
84
+
85
#if TCG_TARGET_REG_BITS == 64
86
# define CASE_32_64(x) \
87
case glue(glue(INDEX_op_, x), _i64): \
88
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
89
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
90
taddr = tci_uint64(regs[r2], regs[r1]);
91
}
92
- switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
93
- case MO_UB:
94
- tmp32 = qemu_ld_ub;
95
- break;
96
- case MO_SB:
97
- tmp32 = (int8_t)qemu_ld_ub;
98
- break;
99
- case MO_LEUW:
100
- tmp32 = qemu_ld_leuw;
101
- break;
102
- case MO_LESW:
103
- tmp32 = (int16_t)qemu_ld_leuw;
104
- break;
105
- case MO_LEUL:
106
- tmp32 = qemu_ld_leul;
107
- break;
108
- case MO_BEUW:
109
- tmp32 = qemu_ld_beuw;
110
- break;
111
- case MO_BESW:
112
- tmp32 = (int16_t)qemu_ld_beuw;
113
- break;
114
- case MO_BEUL:
115
- tmp32 = qemu_ld_beul;
116
- break;
117
- default:
118
- g_assert_not_reached();
119
- }
120
+ tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
121
regs[r0] = tmp32;
122
break;
123
124
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
125
taddr = tci_uint64(regs[r3], regs[r2]);
126
oi = regs[r4];
127
}
128
- switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
129
- case MO_UB:
130
- tmp64 = qemu_ld_ub;
131
- break;
132
- case MO_SB:
133
- tmp64 = (int8_t)qemu_ld_ub;
134
- break;
135
- case MO_LEUW:
136
- tmp64 = qemu_ld_leuw;
137
- break;
138
- case MO_LESW:
139
- tmp64 = (int16_t)qemu_ld_leuw;
140
- break;
141
- case MO_LEUL:
142
- tmp64 = qemu_ld_leul;
143
- break;
144
- case MO_LESL:
145
- tmp64 = (int32_t)qemu_ld_leul;
146
- break;
147
- case MO_LEQ:
148
- tmp64 = qemu_ld_leq;
149
- break;
150
- case MO_BEUW:
151
- tmp64 = qemu_ld_beuw;
152
- break;
153
- case MO_BESW:
154
- tmp64 = (int16_t)qemu_ld_beuw;
155
- break;
156
- case MO_BEUL:
157
- tmp64 = qemu_ld_beul;
158
- break;
159
- case MO_BESL:
160
- tmp64 = (int32_t)qemu_ld_beul;
161
- break;
162
- case MO_BEQ:
163
- tmp64 = qemu_ld_beq;
164
- break;
165
- default:
166
- g_assert_not_reached();
167
- }
168
+ tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
169
if (TCG_TARGET_REG_BITS == 32) {
170
tci_write_reg64(regs, r1, r0, tmp64);
171
} else {
172
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
173
taddr = tci_uint64(regs[r2], regs[r1]);
174
}
175
tmp32 = regs[r0];
176
- switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
177
- case MO_UB:
178
- qemu_st_b(tmp32);
179
- break;
180
- case MO_LEUW:
181
- qemu_st_lew(tmp32);
182
- break;
183
- case MO_LEUL:
184
- qemu_st_lel(tmp32);
185
- break;
186
- case MO_BEUW:
187
- qemu_st_bew(tmp32);
188
- break;
189
- case MO_BEUL:
190
- qemu_st_bel(tmp32);
191
- break;
192
- default:
193
- g_assert_not_reached();
194
- }
195
+ tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
196
break;
197
198
case INDEX_op_qemu_st_i64:
199
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
200
}
201
tmp64 = tci_uint64(regs[r1], regs[r0]);
202
}
203
- switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
204
- case MO_UB:
205
- qemu_st_b(tmp64);
206
- break;
207
- case MO_LEUW:
208
- qemu_st_lew(tmp64);
209
- break;
210
- case MO_LEUL:
211
- qemu_st_lel(tmp64);
212
- break;
213
- case MO_LEQ:
214
- qemu_st_leq(tmp64);
215
- break;
216
- case MO_BEUW:
217
- qemu_st_bew(tmp64);
218
- break;
219
- case MO_BEUL:
220
- qemu_st_bel(tmp64);
221
- break;
222
- case MO_BEQ:
223
- qemu_st_beq(tmp64);
224
- break;
225
- default:
226
- g_assert_not_reached();
227
- }
228
+ tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
229
break;
230
231
case INDEX_op_mb:
232
--
69
--
233
2.25.1
70
2.17.2
234
71
235
72
diff view generated by jsdifflib
1
Wrap guest memory operations for tci like we do for cpu_ld*_data.
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
We cannot actually use the cpu_ldst.h interface without duplicating
3
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
the memory trace operations performed within, which will already
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
have been expanded into the tcg opcode stream.
5
Signed-off-by: Emilio G. Cota <cota@braap.org>
6
6
Message-Id: <20181009174557.16125-5-cota@braap.org>
7
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
8
---
11
tcg/tci.c | 10 ++++++----
9
accel/tcg/cputlb.c | 4 ++--
12
1 file changed, 6 insertions(+), 4 deletions(-)
10
1 file changed, 2 insertions(+), 2 deletions(-)
13
11
14
diff --git a/tcg/tci.c b/tcg/tci.c
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tci.c
14
--- a/accel/tcg/cputlb.c
17
+++ b/tcg/tci.c
15
+++ b/accel/tcg/cputlb.c
18
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
16
@@ -XXX,XX +XXX,XX @@
19
TCGMemOpIdx oi, const void *tb_ptr)
17
} \
20
{
18
} while (0)
21
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
19
22
-
20
-#define assert_cpu_is_self(this_cpu) do { \
23
-#ifdef CONFIG_SOFTMMU
21
+#define assert_cpu_is_self(cpu) do { \
24
uintptr_t ra = (uintptr_t)tb_ptr;
22
if (DEBUG_TLB_GATE) { \
25
23
- g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
26
+#ifdef CONFIG_SOFTMMU
24
+ g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
27
switch (mop) {
25
} \
28
case MO_UB:
26
} while (0)
29
return helper_ret_ldub_mmu(env, taddr, oi, ra);
30
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
31
void *haddr = g2h(env_cpu(env), taddr);
32
uint64_t ret;
33
34
+ set_helper_retaddr(ra);
35
switch (mop) {
36
case MO_UB:
37
ret = ldub_p(haddr);
38
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
39
default:
40
g_assert_not_reached();
41
}
42
+ clear_helper_retaddr();
43
return ret;
44
#endif
45
}
46
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
47
TCGMemOpIdx oi, const void *tb_ptr)
48
{
49
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
50
-
51
-#ifdef CONFIG_SOFTMMU
52
uintptr_t ra = (uintptr_t)tb_ptr;
53
54
+#ifdef CONFIG_SOFTMMU
55
switch (mop) {
56
case MO_UB:
57
helper_ret_stb_mmu(env, taddr, val, oi, ra);
58
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
59
#else
60
void *haddr = g2h(env_cpu(env), taddr);
61
62
+ set_helper_retaddr(ra);
63
switch (mop) {
64
case MO_UB:
65
stb_p(haddr, val);
66
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
67
default:
68
g_assert_not_reached();
69
}
70
+ clear_helper_retaddr();
71
#endif
72
}
73
27
74
--
28
--
75
2.25.1
29
2.17.2
76
30
77
31
diff view generated by jsdifflib
1
This will give us both flags and typemask for use later.
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
We also fix a dumping bug, wherein calls generated for plugins
3
Currently we rely on atomic operations for cross-CPU invalidations.
4
fail tcg_find_helper and print (null) instead of either a name
4
There are two cases that these atomics miss: cross-CPU invalidations
5
or the raw function pointer.
5
can race with either (1) vCPU threads flushing their TLB, which
6
6
happens via memset, or (2) vCPUs calling tlb_reset_dirty on their TLB,
7
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
which updates .addr_write with a regular store. This results in
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
undefined behaviour, since we're mixing regular and atomic ops
9
on concurrent accesses.
10
11
Fix it by using tlb_lock, a per-vCPU lock. All updaters of tlb_table
12
and the corresponding victim cache now hold the lock.
13
The readers that do not hold tlb_lock must use atomic reads when
14
reading .addr_write, since this field can be updated by other threads;
15
the conversion to atomic reads is done in the next patch.
16
17
Note that an alternative fix would be to expand the use of atomic ops.
18
However, in the case of TLB flushes this would have a huge performance
19
impact, since (1) TLB flushes can happen very frequently and (2) we
20
currently use a full memory barrier to flush each TLB entry, and a TLB
21
has many entries. Instead, acquiring the lock is barely slower than a
22
full memory barrier since it is uncontended, and with a single lock
23
acquisition we can flush the entire TLB.
24
25
Tested-by: Alex Bennée <alex.bennee@linaro.org>
26
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
27
Signed-off-by: Emilio G. Cota <cota@braap.org>
28
Message-Id: <20181009174557.16125-6-cota@braap.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
29
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
30
---
11
tcg/tcg-internal.h | 14 ++++++++++++-
31
include/exec/cpu-defs.h | 3 +
12
tcg/tcg.c | 49 ++++++++++++++++++++--------------------------
32
accel/tcg/cputlb.c | 155 ++++++++++++++++++++++------------------
13
2 files changed, 34 insertions(+), 29 deletions(-)
33
2 files changed, 87 insertions(+), 71 deletions(-)
14
34
15
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
35
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
16
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/tcg-internal.h
37
--- a/include/exec/cpu-defs.h
18
+++ b/tcg/tcg-internal.h
38
+++ b/include/exec/cpu-defs.h
19
@@ -XXX,XX +XXX,XX @@
39
@@ -XXX,XX +XXX,XX @@
20
40
#endif
21
#define TCG_HIGHWATER 1024
41
22
42
#include "qemu/host-utils.h"
23
+typedef struct TCGHelperInfo {
43
+#include "qemu/thread.h"
24
+ void *func;
44
#include "qemu/queue.h"
25
+ const char *name;
45
#ifdef CONFIG_TCG
26
+ unsigned flags;
46
#include "tcg-target.h"
27
+ unsigned typemask;
47
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
28
+} TCGHelperInfo;
48
29
+
49
#define CPU_COMMON_TLB \
30
extern TCGContext tcg_init_ctx;
50
/* The meaning of the MMU modes is defined in the target code. */ \
31
extern TCGContext **tcg_ctxs;
51
+ /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \
32
extern unsigned int tcg_cur_ctxs;
52
+ QemuSpin tlb_lock; \
33
@@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s);
53
CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \
34
void tcg_region_initial_alloc(TCGContext *s);
54
CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \
35
void tcg_region_prologue_set(TCGContext *s);
55
CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \
36
56
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
37
+static inline const TCGHelperInfo *tcg_call_info(TCGOp *op)
38
+{
39
+ return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
40
+}
41
+
42
static inline unsigned tcg_call_flags(TCGOp *op)
43
{
44
- return op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
45
+ return tcg_call_info(op)->flags;
46
}
47
48
#endif /* TCG_INTERNAL_H */
49
diff --git a/tcg/tcg.c b/tcg/tcg.c
50
index XXXXXXX..XXXXXXX 100644
57
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/tcg.c
58
--- a/accel/tcg/cputlb.c
52
+++ b/tcg/tcg.c
59
+++ b/accel/tcg/cputlb.c
53
@@ -XXX,XX +XXX,XX @@ void tcg_pool_reset(TCGContext *s)
60
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
54
s->pool_current = NULL;
61
55
}
62
void tlb_init(CPUState *cpu)
56
63
{
57
-typedef struct TCGHelperInfo {
64
+ CPUArchState *env = cpu->env_ptr;
58
- void *func;
65
+
59
- const char *name;
66
+ qemu_spin_init(&env->tlb_lock);
60
- unsigned flags;
67
}
61
- unsigned typemask;
68
62
-} TCGHelperInfo;
69
/* flush_all_helper: run fn across all cpus
70
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_nocheck(CPUState *cpu)
71
atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
72
tlb_debug("(count: %zu)\n", tlb_flush_count());
73
74
+ /*
75
+ * tlb_table/tlb_v_table updates from any thread must hold tlb_lock.
76
+ * However, updates from the owner thread (as is the case here; see the
77
+ * above assert_cpu_is_self) do not need atomic_set because all reads
78
+ * that do not hold the lock are performed by the same owner thread.
79
+ */
80
+ qemu_spin_lock(&env->tlb_lock);
81
memset(env->tlb_table, -1, sizeof(env->tlb_table));
82
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
83
+ qemu_spin_unlock(&env->tlb_lock);
84
+
85
cpu_tb_jmp_cache_clear(cpu);
86
87
env->vtlb_index = 0;
88
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
89
90
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
91
92
+ qemu_spin_lock(&env->tlb_lock);
93
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
94
95
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
96
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
97
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
98
}
99
}
100
+ qemu_spin_unlock(&env->tlb_lock);
101
102
cpu_tb_jmp_cache_clear(cpu);
103
104
@@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
105
tlb_hit_page(tlb_entry->addr_code, page);
106
}
107
108
-static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page)
109
+/* Called with tlb_lock held */
110
+static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
111
+ target_ulong page)
112
{
113
if (tlb_hit_page_anyprot(tlb_entry, page)) {
114
memset(tlb_entry, -1, sizeof(*tlb_entry));
115
}
116
}
117
118
-static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx,
119
- target_ulong page)
120
+/* Called with tlb_lock held */
121
+static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
122
+ target_ulong page)
123
{
124
int k;
125
+
126
+ assert_cpu_is_self(ENV_GET_CPU(env));
127
for (k = 0; k < CPU_VTLB_SIZE; k++) {
128
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page);
129
+ tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
134
135
addr &= TARGET_PAGE_MASK;
136
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
137
+ qemu_spin_lock(&env->tlb_lock);
138
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
139
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
140
- tlb_flush_vtlb_page(env, mmu_idx, addr);
141
+ tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr);
142
+ tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
143
}
144
+ qemu_spin_unlock(&env->tlb_lock);
145
146
tb_flush_jmp_cache(cpu, addr);
147
}
148
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
149
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
150
page, addr, mmu_idx_bitmap);
151
152
+ qemu_spin_lock(&env->tlb_lock);
153
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
154
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
155
- tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
156
- tlb_flush_vtlb_page(env, mmu_idx, addr);
157
+ tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr);
158
+ tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
159
}
160
}
161
+ qemu_spin_unlock(&env->tlb_lock);
162
163
tb_flush_jmp_cache(cpu, addr);
164
}
165
@@ -XXX,XX +XXX,XX @@ void tlb_unprotect_code(ram_addr_t ram_addr)
166
* most usual is detecting writes to code regions which may invalidate
167
* generated code.
168
*
169
- * Because we want other vCPUs to respond to changes straight away we
170
- * update the te->addr_write field atomically. If the TLB entry has
171
- * been changed by the vCPU in the mean time we skip the update.
172
+ * Other vCPUs might be reading their TLBs during guest execution, so we update
173
+ * te->addr_write with atomic_set. We don't need to worry about this for
174
+ * oversized guests as MTTCG is disabled for them.
175
*
176
- * As this function uses atomic accesses we also need to ensure
177
- * updates to tlb_entries follow the same access rules. We don't need
178
- * to worry about this for oversized guests as MTTCG is disabled for
179
- * them.
180
+ * Called with tlb_lock held.
181
*/
63
-
182
-
64
#include "exec/helper-proto.h"
183
-static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
65
184
- uintptr_t length)
66
static const TCGHelperInfo all_helpers[] = {
185
+static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
67
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
186
+ uintptr_t start, uintptr_t length)
68
void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
187
{
69
{
188
-#if TCG_OVERSIZED_GUEST
70
int i, real_args, nb_rets, pi;
189
uintptr_t addr = tlb_entry->addr_write;
71
- unsigned typemask, flags;
190
72
- TCGHelperInfo *info;
191
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
73
+ unsigned typemask;
192
addr &= TARGET_PAGE_MASK;
74
+ const TCGHelperInfo *info;
193
addr += tlb_entry->addend;
75
TCGOp *op;
194
if ((addr - start) < length) {
76
195
+#if TCG_OVERSIZED_GUEST
77
info = g_hash_table_lookup(helper_table, (gpointer)func);
196
tlb_entry->addr_write |= TLB_NOTDIRTY;
78
- flags = info->flags;
79
typemask = info->typemask;
80
81
#ifdef CONFIG_PLUGIN
82
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
83
real_args++;
84
}
85
op->args[pi++] = (uintptr_t)func;
86
- op->args[pi++] = flags;
87
+ op->args[pi++] = (uintptr_t)info;
88
TCGOP_CALLI(op) = real_args;
89
90
/* Make sure the fields didn't overflow. */
91
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str(TCGContext *s, char *buf,
92
return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
93
}
94
95
-/* Find helper name. */
96
-static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
97
-{
98
- const char *ret = NULL;
99
- if (helper_table) {
100
- TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
101
- if (info) {
102
- ret = info->name;
103
- }
197
- }
104
- }
198
- }
105
- return ret;
199
#else
106
-}
200
- /* paired with atomic_mb_set in tlb_set_page_with_attrs */
201
- uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
202
- uintptr_t addr = orig_addr;
107
-
203
-
108
static const char * const cond_name[] =
204
- if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
109
{
205
- addr &= TARGET_PAGE_MASK;
110
[TCG_COND_NEVER] = "never",
206
- addr += atomic_read(&tlb_entry->addend);
111
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
207
- if ((addr - start) < length) {
112
col += qemu_log(" " TARGET_FMT_lx, a);
208
- uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
113
}
209
- atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
114
} else if (c == INDEX_op_call) {
210
+ atomic_set(&tlb_entry->addr_write,
115
+ const TCGHelperInfo *info = tcg_call_info(op);
211
+ tlb_entry->addr_write | TLB_NOTDIRTY);
116
+ void *func;
212
+#endif
117
+
213
}
118
/* variable number of arguments */
214
}
119
nb_oargs = TCGOP_CALLO(op);
215
-#endif
120
nb_iargs = TCGOP_CALLI(op);
216
}
121
nb_cargs = def->nb_cargs;
217
122
218
-/* For atomic correctness when running MTTCG we need to use the right
123
- /* function name, flags, out args */
219
- * primitives when copying entries */
124
- col += qemu_log(" %s %s,$0x%x,$%d", def->name,
220
-static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
125
- tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
221
- bool atomic_set)
126
- tcg_call_flags(op), nb_oargs);
222
+/*
127
+ col += qemu_log(" %s ", def->name);
223
+ * Called with tlb_lock held.
128
+
224
+ * Called only from the vCPU context, i.e. the TLB's owner thread.
129
+ /*
225
+ */
130
+ * Print the function name from TCGHelperInfo, if available.
226
+static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
131
+ * Note that plugins have a template function for the info,
227
{
132
+ * but the actual function pointer comes from the plugin.
228
-#if TCG_OVERSIZED_GUEST
133
+ */
229
*d = *s;
134
+ func = (void *)(uintptr_t)op->args[nb_oargs + nb_iargs];
230
-#else
135
+ if (func == info->func) {
231
- if (atomic_set) {
136
+ col += qemu_log("%s", info->name);
232
- d->addr_read = s->addr_read;
137
+ } else {
233
- d->addr_code = s->addr_code;
138
+ col += qemu_log("plugin(%p)", func);
234
- atomic_set(&d->addend, atomic_read(&s->addend));
139
+ }
235
- /* Pairs with flag setting in tlb_reset_dirty_range */
140
+
236
- atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
141
+ col += qemu_log("$0x%x,$%d", info->flags, nb_oargs);
237
- } else {
142
for (i = 0; i < nb_oargs; i++) {
238
- d->addr_read = s->addr_read;
143
col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
239
- d->addr_write = atomic_read(&s->addr_write);
144
op->args[i]));
240
- d->addr_code = s->addr_code;
241
- d->addend = atomic_read(&s->addend);
242
- }
243
-#endif
244
}
245
246
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
247
- * the target vCPU). As such care needs to be taken that we don't
248
- * dangerously race with another vCPU update. The only thing actually
249
- * updated is the target TLB entry ->addr_write flags.
250
+ * the target vCPU).
251
+ * We must take tlb_lock to avoid racing with another vCPU update. The only
252
+ * thing actually updated is the target TLB entry ->addr_write flags.
253
*/
254
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
255
{
256
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
257
int mmu_idx;
258
259
env = cpu->env_ptr;
260
+ qemu_spin_lock(&env->tlb_lock);
261
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
262
unsigned int i;
263
264
for (i = 0; i < CPU_TLB_SIZE; i++) {
265
- tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
266
- start1, length);
267
+ tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
268
+ length);
269
}
270
271
for (i = 0; i < CPU_VTLB_SIZE; i++) {
272
- tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
273
- start1, length);
274
+ tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
275
+ length);
276
}
277
}
278
+ qemu_spin_unlock(&env->tlb_lock);
279
}
280
281
-static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
282
+/* Called with tlb_lock held */
283
+static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
284
+ target_ulong vaddr)
285
{
286
if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
287
tlb_entry->addr_write = vaddr;
288
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
289
290
vaddr &= TARGET_PAGE_MASK;
291
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
292
+ qemu_spin_lock(&env->tlb_lock);
293
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
294
- tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
295
+ tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr);
296
}
297
298
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
299
int k;
300
for (k = 0; k < CPU_VTLB_SIZE; k++) {
301
- tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
302
+ tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
303
}
304
}
305
+ qemu_spin_unlock(&env->tlb_lock);
306
}
307
308
/* Our TLB does not support large pages, so remember the area covered by
309
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
310
addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
311
}
312
313
- /* Make sure there's no cached translation for the new page. */
314
- tlb_flush_vtlb_page(env, mmu_idx, vaddr_page);
315
-
316
code_address = address;
317
iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
318
paddr_page, xlat, prot, &address);
319
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
320
index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
321
te = &env->tlb_table[mmu_idx][index];
322
323
+ /*
324
+ * Hold the TLB lock for the rest of the function. We could acquire/release
325
+ * the lock several times in the function, but it is faster to amortize the
326
+ * acquisition cost by acquiring it just once. Note that this leads to
327
+ * a longer critical section, but this is not a concern since the TLB lock
328
+ * is unlikely to be contended.
329
+ */
330
+ qemu_spin_lock(&env->tlb_lock);
331
+
332
+ /* Make sure there's no cached translation for the new page. */
333
+ tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
334
+
335
/*
336
* Only evict the old entry to the victim tlb if it's for a
337
* different page; otherwise just overwrite the stale data.
338
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
339
CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
340
341
/* Evict the old entry into the victim tlb. */
342
- copy_tlb_helper(tv, te, true);
343
+ copy_tlb_helper_locked(tv, te);
344
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
345
}
346
347
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
348
}
349
}
350
351
- /* Pairs with flag setting in tlb_reset_dirty_range */
352
- copy_tlb_helper(te, &tn, true);
353
- /* atomic_mb_set(&te->addr_write, write_address); */
354
+ copy_tlb_helper_locked(te, &tn);
355
+ qemu_spin_unlock(&env->tlb_lock);
356
}
357
358
/* Add a new TLB entry, but without specifying the memory
359
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
360
size_t elt_ofs, target_ulong page)
361
{
362
size_t vidx;
363
+
364
+ assert_cpu_is_self(ENV_GET_CPU(env));
365
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
366
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
367
target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
368
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
369
/* Found entry in victim tlb, swap tlb and iotlb. */
370
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
371
372
- copy_tlb_helper(&tmptlb, tlb, false);
373
- copy_tlb_helper(tlb, vtlb, true);
374
- copy_tlb_helper(vtlb, &tmptlb, true);
375
+ qemu_spin_lock(&env->tlb_lock);
376
+ copy_tlb_helper_locked(&tmptlb, tlb);
377
+ copy_tlb_helper_locked(tlb, vtlb);
378
+ copy_tlb_helper_locked(vtlb, &tmptlb);
379
+ qemu_spin_unlock(&env->tlb_lock);
380
381
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
382
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
145
--
383
--
146
2.25.1
384
2.17.2
147
385
148
386
diff view generated by jsdifflib
1
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Isolate the computation of an index from an address into a
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
helper before we change that function.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
[ cota: convert tlb_vaddr_to_host; use atomic_read on addr_write ]
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181009175129.17888-2-cota@braap.org>
4
---
9
---
5
tcg/tci/tcg-target.h | 8 ++++----
10
accel/tcg/softmmu_template.h | 64 +++++++++++++++++---------------
6
tcg/tci.c | 42 ++++++++++++++++++++++++++++++++++++++++
11
include/exec/cpu_ldst.h | 19 ++++++++--
7
tcg/tci/tcg-target.c.inc | 32 ++++++++++++++++++++++++++++++
12
include/exec/cpu_ldst_template.h | 25 +++++++------
8
3 files changed, 78 insertions(+), 4 deletions(-)
13
accel/tcg/cputlb.c | 60 ++++++++++++++----------------
14
4 files changed, 90 insertions(+), 78 deletions(-)
9
15
10
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
16
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
11
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tci/tcg-target.h
18
--- a/accel/tcg/softmmu_template.h
13
+++ b/tcg/tci/tcg-target.h
19
+++ b/accel/tcg/softmmu_template.h
14
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
15
#define TCG_TARGET_HAS_ext16u_i32 1
21
WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
16
#define TCG_TARGET_HAS_andc_i32 1
22
TCGMemOpIdx oi, uintptr_t retaddr)
17
#define TCG_TARGET_HAS_deposit_i32 1
23
{
18
-#define TCG_TARGET_HAS_extract_i32 0
24
- unsigned mmu_idx = get_mmuidx(oi);
19
-#define TCG_TARGET_HAS_sextract_i32 0
25
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
20
+#define TCG_TARGET_HAS_extract_i32 1
26
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
21
+#define TCG_TARGET_HAS_sextract_i32 1
27
+ uintptr_t mmu_idx = get_mmuidx(oi);
22
#define TCG_TARGET_HAS_extract2_i32 0
28
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
23
#define TCG_TARGET_HAS_eqv_i32 1
29
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
24
#define TCG_TARGET_HAS_nand_i32 1
30
+ target_ulong tlb_addr = entry->ADDR_READ;
25
@@ -XXX,XX +XXX,XX @@
31
unsigned a_bits = get_alignment_bits(get_memop(oi));
26
#define TCG_TARGET_HAS_bswap32_i64 1
32
uintptr_t haddr;
27
#define TCG_TARGET_HAS_bswap64_i64 1
33
DATA_TYPE res;
28
#define TCG_TARGET_HAS_deposit_i64 1
34
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
29
-#define TCG_TARGET_HAS_extract_i64 0
35
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
30
-#define TCG_TARGET_HAS_sextract_i64 0
36
mmu_idx, retaddr);
31
+#define TCG_TARGET_HAS_extract_i64 1
37
}
32
+#define TCG_TARGET_HAS_sextract_i64 1
38
- tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
33
#define TCG_TARGET_HAS_extract2_i64 0
39
+ tlb_addr = entry->ADDR_READ;
34
#define TCG_TARGET_HAS_div_i64 1
40
}
35
#define TCG_TARGET_HAS_rem_i64 1
41
36
diff --git a/tcg/tci.c b/tcg/tci.c
42
/* Handle an IO access. */
43
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
44
return res;
45
}
46
47
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
48
+ haddr = addr + entry->addend;
49
#if DATA_SIZE == 1
50
res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
51
#else
52
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
53
WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
54
TCGMemOpIdx oi, uintptr_t retaddr)
55
{
56
- unsigned mmu_idx = get_mmuidx(oi);
57
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
58
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
59
+ uintptr_t mmu_idx = get_mmuidx(oi);
60
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
61
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
62
+ target_ulong tlb_addr = entry->ADDR_READ;
63
unsigned a_bits = get_alignment_bits(get_memop(oi));
64
uintptr_t haddr;
65
DATA_TYPE res;
66
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
67
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
68
mmu_idx, retaddr);
69
}
70
- tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
71
+ tlb_addr = entry->ADDR_READ;
72
}
73
74
/* Handle an IO access. */
75
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
76
return res;
77
}
78
79
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
80
+ haddr = addr + entry->addend;
81
res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
82
return res;
83
}
84
@@ -XXX,XX +XXX,XX @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
85
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
86
TCGMemOpIdx oi, uintptr_t retaddr)
87
{
88
- unsigned mmu_idx = get_mmuidx(oi);
89
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
90
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
91
+ uintptr_t mmu_idx = get_mmuidx(oi);
92
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
93
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
94
+ target_ulong tlb_addr = entry->addr_write;
95
unsigned a_bits = get_alignment_bits(get_memop(oi));
96
uintptr_t haddr;
97
98
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
99
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
100
mmu_idx, retaddr);
101
}
102
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
103
+ tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
104
}
105
106
/* Handle an IO access. */
107
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
108
if (DATA_SIZE > 1
109
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
110
>= TARGET_PAGE_SIZE)) {
111
- int i, index2;
112
- target_ulong page2, tlb_addr2;
113
+ int i;
114
+ target_ulong page2;
115
+ CPUTLBEntry *entry2;
116
do_unaligned_access:
117
/* Ensure the second page is in the TLB. Note that the first page
118
is already guaranteed to be filled, and that the second page
119
cannot evict the first. */
120
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
121
- index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
122
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
123
- if (!tlb_hit_page(tlb_addr2, page2)
124
+ entry2 = tlb_entry(env, mmu_idx, page2);
125
+ if (!tlb_hit_page(entry2->addr_write, page2)
126
&& !VICTIM_TLB_HIT(addr_write, page2)) {
127
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
128
mmu_idx, retaddr);
129
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
130
return;
131
}
132
133
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
134
+ haddr = addr + entry->addend;
135
#if DATA_SIZE == 1
136
glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
137
#else
138
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
139
void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
140
TCGMemOpIdx oi, uintptr_t retaddr)
141
{
142
- unsigned mmu_idx = get_mmuidx(oi);
143
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
144
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
145
+ uintptr_t mmu_idx = get_mmuidx(oi);
146
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
147
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
148
+ target_ulong tlb_addr = entry->addr_write;
149
unsigned a_bits = get_alignment_bits(get_memop(oi));
150
uintptr_t haddr;
151
152
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
153
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
154
mmu_idx, retaddr);
155
}
156
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
157
+ tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
158
}
159
160
/* Handle an IO access. */
161
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
162
if (DATA_SIZE > 1
163
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
164
>= TARGET_PAGE_SIZE)) {
165
- int i, index2;
166
- target_ulong page2, tlb_addr2;
167
+ int i;
168
+ target_ulong page2;
169
+ CPUTLBEntry *entry2;
170
do_unaligned_access:
171
/* Ensure the second page is in the TLB. Note that the first page
172
is already guaranteed to be filled, and that the second page
173
cannot evict the first. */
174
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
175
- index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
176
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
177
- if (!tlb_hit_page(tlb_addr2, page2)
178
+ entry2 = tlb_entry(env, mmu_idx, page2);
179
+ if (!tlb_hit_page(entry2->addr_write, page2)
180
&& !VICTIM_TLB_HIT(addr_write, page2)) {
181
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
182
mmu_idx, retaddr);
183
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
184
return;
185
}
186
187
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
188
+ haddr = addr + entry->addend;
189
glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
190
}
191
#endif /* DATA_SIZE > 1 */
192
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
37
index XXXXXXX..XXXXXXX 100644
193
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/tci.c
194
--- a/include/exec/cpu_ldst.h
39
+++ b/tcg/tci.c
195
+++ b/include/exec/cpu_ldst.h
40
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2)
196
@@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr;
41
*i2 = sextract32(insn, 16, 16);
197
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
42
}
198
#include "tcg.h"
43
199
44
+static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
200
+/* Find the TLB index corresponding to the mmu_idx + address pair. */
45
+ uint8_t *i2, uint8_t *i3)
201
+static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
202
+ target_ulong addr)
46
+{
203
+{
47
+ *r0 = extract32(insn, 8, 4);
204
+ return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
48
+ *r1 = extract32(insn, 12, 4);
49
+ *i2 = extract32(insn, 16, 6);
50
+ *i3 = extract32(insn, 22, 6);
51
+}
205
+}
52
+
206
+
53
static void tci_args_rrrc(uint32_t insn,
207
+/* Find the TLB entry corresponding to the mmu_idx + address pair. */
54
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
208
+static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
55
{
209
+ target_ulong addr)
56
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
57
tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
58
regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
59
break;
60
+#endif
61
+#if TCG_TARGET_HAS_extract_i32
62
+ case INDEX_op_extract_i32:
63
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
64
+ regs[r0] = extract32(regs[r1], pos, len);
65
+ break;
66
+#endif
67
+#if TCG_TARGET_HAS_sextract_i32
68
+ case INDEX_op_sextract_i32:
69
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
70
+ regs[r0] = sextract32(regs[r1], pos, len);
71
+ break;
72
#endif
73
case INDEX_op_brcond_i32:
74
tci_args_rl(insn, tb_ptr, &r0, &ptr);
75
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
76
tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
77
regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
78
break;
79
+#endif
80
+#if TCG_TARGET_HAS_extract_i64
81
+ case INDEX_op_extract_i64:
82
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
83
+ regs[r0] = extract64(regs[r1], pos, len);
84
+ break;
85
+#endif
86
+#if TCG_TARGET_HAS_sextract_i64
87
+ case INDEX_op_sextract_i64:
88
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
89
+ regs[r0] = sextract64(regs[r1], pos, len);
90
+ break;
91
#endif
92
case INDEX_op_brcond_i64:
93
tci_args_rl(insn, tb_ptr, &r0, &ptr);
94
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
95
op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
96
break;
97
98
+ case INDEX_op_extract_i32:
99
+ case INDEX_op_extract_i64:
100
+ case INDEX_op_sextract_i32:
101
+ case INDEX_op_sextract_i64:
102
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
103
+ info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d",
104
+ op_name, str_r(r0), str_r(r1), pos, len);
105
+ break;
106
+
107
case INDEX_op_movcond_i32:
108
case INDEX_op_movcond_i64:
109
case INDEX_op_setcond2_i32:
110
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
111
index XXXXXXX..XXXXXXX 100644
112
--- a/tcg/tci/tcg-target.c.inc
113
+++ b/tcg/tci/tcg-target.c.inc
114
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
115
case INDEX_op_bswap32_i32:
116
case INDEX_op_bswap32_i64:
117
case INDEX_op_bswap64_i64:
118
+ case INDEX_op_extract_i32:
119
+ case INDEX_op_extract_i64:
120
+ case INDEX_op_sextract_i32:
121
+ case INDEX_op_sextract_i64:
122
return C_O1_I1(r, r);
123
124
case INDEX_op_st8_i32:
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
126
tcg_out32(s, insn);
127
}
128
129
+static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
130
+ TCGReg r1, uint8_t b2, uint8_t b3)
131
+{
210
+{
132
+ tcg_insn_unit insn = 0;
211
+ return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
133
+
134
+ tcg_debug_assert(b2 == extract32(b2, 0, 6));
135
+ tcg_debug_assert(b3 == extract32(b3, 0, 6));
136
+ insn = deposit32(insn, 0, 8, op);
137
+ insn = deposit32(insn, 8, 4, r0);
138
+ insn = deposit32(insn, 12, 4, r1);
139
+ insn = deposit32(insn, 16, 6, b2);
140
+ insn = deposit32(insn, 22, 6, b3);
141
+ tcg_out32(s, insn);
142
+}
212
+}
143
+
213
+
144
static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
214
#ifdef MMU_MODE0_SUFFIX
145
TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
215
#define CPU_MMU_INDEX 0
146
{
216
#define MEMSUFFIX MMU_MODE0_SUFFIX
147
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
217
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
148
}
218
#if defined(CONFIG_USER_ONLY)
149
break;
219
return g2h(addr);
150
220
#else
151
+ CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */
221
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
152
+ CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
222
- CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
153
+ {
223
+ CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr);
154
+ TCGArg pos = args[2], len = args[3];
224
abi_ptr tlb_addr;
155
+ TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
225
uintptr_t haddr;
156
+
226
157
+ tcg_debug_assert(pos < max);
227
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
158
+ tcg_debug_assert(pos + len <= max);
228
return NULL;
159
+
229
}
160
+ tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
230
161
+ }
231
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
162
+ break;
232
+ haddr = addr + tlbentry->addend;
163
+
233
return (void *)haddr;
164
CASE_32_64(brcond)
234
#endif /* defined(CONFIG_USER_ONLY) */
165
tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32
235
}
166
? INDEX_op_setcond_i32 : INDEX_op_setcond_i64),
236
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
237
index XXXXXXX..XXXXXXX 100644
238
--- a/include/exec/cpu_ldst_template.h
239
+++ b/include/exec/cpu_ldst_template.h
240
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
241
target_ulong ptr,
242
uintptr_t retaddr)
243
{
244
- int page_index;
245
+ CPUTLBEntry *entry;
246
RES_TYPE res;
247
target_ulong addr;
248
int mmu_idx;
249
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
250
#endif
251
252
addr = ptr;
253
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
254
mmu_idx = CPU_MMU_INDEX;
255
- if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
256
+ entry = tlb_entry(env, mmu_idx, addr);
257
+ if (unlikely(entry->ADDR_READ !=
258
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
259
oi = make_memop_idx(SHIFT, mmu_idx);
260
res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
261
oi, retaddr);
262
} else {
263
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
264
+ uintptr_t hostaddr = addr + entry->addend;
265
res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr);
266
}
267
return res;
268
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
269
target_ulong ptr,
270
uintptr_t retaddr)
271
{
272
- int res, page_index;
273
+ CPUTLBEntry *entry;
274
+ int res;
275
target_ulong addr;
276
int mmu_idx;
277
TCGMemOpIdx oi;
278
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
279
#endif
280
281
addr = ptr;
282
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
283
mmu_idx = CPU_MMU_INDEX;
284
- if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
285
+ entry = tlb_entry(env, mmu_idx, addr);
286
+ if (unlikely(entry->ADDR_READ !=
287
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
288
oi = make_memop_idx(SHIFT, mmu_idx);
289
res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
290
MMUSUFFIX)(env, addr, oi, retaddr);
291
} else {
292
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
293
+ uintptr_t hostaddr = addr + entry->addend;
294
res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr);
295
}
296
return res;
297
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
298
target_ulong ptr,
299
RES_TYPE v, uintptr_t retaddr)
300
{
301
- int page_index;
302
+ CPUTLBEntry *entry;
303
target_ulong addr;
304
int mmu_idx;
305
TCGMemOpIdx oi;
306
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
307
#endif
308
309
addr = ptr;
310
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
311
mmu_idx = CPU_MMU_INDEX;
312
- if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
313
+ entry = tlb_entry(env, mmu_idx, addr);
314
+ if (unlikely(entry->addr_write !=
315
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
316
oi = make_memop_idx(SHIFT, mmu_idx);
317
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
318
retaddr);
319
} else {
320
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
321
+ uintptr_t hostaddr = addr + entry->addend;
322
glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v);
323
}
324
}
325
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
326
index XXXXXXX..XXXXXXX 100644
327
--- a/accel/tcg/cputlb.c
328
+++ b/accel/tcg/cputlb.c
329
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
330
{
331
CPUArchState *env = cpu->env_ptr;
332
target_ulong addr = (target_ulong) data.target_ptr;
333
- int i;
334
int mmu_idx;
335
336
assert_cpu_is_self(cpu);
337
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
338
}
339
340
addr &= TARGET_PAGE_MASK;
341
- i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
342
qemu_spin_lock(&env->tlb_lock);
343
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
344
- tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr);
345
+ tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
346
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
347
}
348
qemu_spin_unlock(&env->tlb_lock);
349
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
350
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
351
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
352
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
353
- int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
354
int mmu_idx;
355
356
assert_cpu_is_self(cpu);
357
358
- tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
359
- page, addr, mmu_idx_bitmap);
360
+ tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
361
+ addr, mmu_idx_bitmap);
362
363
qemu_spin_lock(&env->tlb_lock);
364
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
365
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
366
- tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr);
367
+ tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
368
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
369
}
370
}
371
@@ -XXX,XX +XXX,XX @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
372
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
373
{
374
CPUArchState *env = cpu->env_ptr;
375
- int i;
376
int mmu_idx;
377
378
assert_cpu_is_self(cpu);
379
380
vaddr &= TARGET_PAGE_MASK;
381
- i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
382
qemu_spin_lock(&env->tlb_lock);
383
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
384
- tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr);
385
+ tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
386
}
387
388
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
389
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
390
iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
391
paddr_page, xlat, prot, &address);
392
393
- index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
394
- te = &env->tlb_table[mmu_idx][index];
395
+ index = tlb_index(env, mmu_idx, vaddr_page);
396
+ te = tlb_entry(env, mmu_idx, vaddr_page);
397
398
/*
399
* Hold the TLB lock for the rest of the function. We could acquire/release
400
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
401
* repeat the MMU check here. This tlb_fill() call might
402
* longjump out if this access should cause a guest exception.
403
*/
404
- int index;
405
+ CPUTLBEntry *entry;
406
target_ulong tlb_addr;
407
408
tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
409
410
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
411
- tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
412
+ entry = tlb_entry(env, mmu_idx, addr);
413
+ tlb_addr = entry->addr_read;
414
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
415
/* RAM access */
416
- uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
417
+ uintptr_t haddr = addr + entry->addend;
418
419
return ldn_p((void *)haddr, size);
420
}
421
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
422
* repeat the MMU check here. This tlb_fill() call might
423
* longjump out if this access should cause a guest exception.
424
*/
425
- int index;
426
+ CPUTLBEntry *entry;
427
target_ulong tlb_addr;
428
429
tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
430
431
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
432
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
433
+ entry = tlb_entry(env, mmu_idx, addr);
434
+ tlb_addr = entry->addr_write;
435
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
436
/* RAM access */
437
- uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
438
+ uintptr_t haddr = addr + entry->addend;
439
440
stn_p((void *)haddr, size, val);
441
return;
442
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
443
*/
444
tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
445
{
446
- int mmu_idx, index;
447
+ uintptr_t mmu_idx = cpu_mmu_index(env, true);
448
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
449
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
450
void *p;
451
452
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
453
- mmu_idx = cpu_mmu_index(env, true);
454
- if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) {
455
+ if (unlikely(!tlb_hit(entry->addr_code, addr))) {
456
if (!VICTIM_TLB_HIT(addr_code, addr)) {
457
tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
458
}
459
- assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr));
460
+ assert(tlb_hit(entry->addr_code, addr));
461
}
462
463
- if (unlikely(env->tlb_table[mmu_idx][index].addr_code &
464
- (TLB_RECHECK | TLB_MMIO))) {
465
+ if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
466
/*
467
* Return -1 if we can't translate and execute from an entire
468
* page of RAM here, which will cause us to execute by loading
469
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
470
return -1;
471
}
472
473
- p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
474
+ p = (void *)((uintptr_t)addr + entry->addend);
475
return qemu_ram_addr_from_host_nofail(p);
476
}
477
478
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
479
void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
480
uintptr_t retaddr)
481
{
482
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
483
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
484
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
485
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
486
487
- if (!tlb_hit(tlb_addr, addr)) {
488
+ if (!tlb_hit(entry->addr_write, addr)) {
489
/* TLB entry is for a different page */
490
if (!VICTIM_TLB_HIT(addr_write, addr)) {
491
tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
492
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
493
NotDirtyInfo *ndi)
494
{
495
size_t mmu_idx = get_mmuidx(oi);
496
- size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
497
- CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
498
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
499
+ CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
500
target_ulong tlb_addr = tlbe->addr_write;
501
TCGMemOp mop = get_memop(oi);
502
int a_bits = get_alignment_bits(mop);
167
--
503
--
168
2.25.1
504
2.17.2
169
505
170
506
diff view generated by jsdifflib
1
We had a single ATOMIC_MMU_LOOKUP macro that probed for
1
GCC7+ will no longer advertise support for 16-byte __atomic operations
2
read+write on all atomic ops. This is incorrect for
2
if only cmpxchg is supported, as for x86_64. Fortunately, x86_64 still
3
plain atomic load and atomic store.
3
has support for __sync_compare_and_swap_16 and we can make use of that.
4
AArch64 does not have, nor ever has had such support, so open-code it.
4
5
5
For user-only, we rely on the host page permissions.
6
Reviewed-by: Emilio G. Cota <cota@braap.org>
6
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/390
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
8
---
11
accel/tcg/atomic_template.h | 24 +++++-----
9
accel/tcg/atomic_template.h | 20 ++++-
12
accel/tcg/cputlb.c | 95 ++++++++++++++++++++++++++-----------
10
include/qemu/atomic128.h | 153 ++++++++++++++++++++++++++++++++++++
13
accel/tcg/user-exec.c | 8 ++--
11
include/qemu/compiler.h | 11 +++
14
3 files changed, 83 insertions(+), 44 deletions(-)
12
tcg/tcg.h | 16 ++--
13
accel/tcg/cputlb.c | 3 +-
14
accel/tcg/user-exec.c | 5 +-
15
configure | 19 +++++
16
7 files changed, 213 insertions(+), 14 deletions(-)
17
create mode 100644 include/qemu/atomic128.h
15
18
16
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
19
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
17
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/atomic_template.h
21
--- a/accel/tcg/atomic_template.h
19
+++ b/accel/tcg/atomic_template.h
22
+++ b/accel/tcg/atomic_template.h
20
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
23
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
21
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
22
{
23
ATOMIC_MMU_DECLS;
24
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
25
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW;
26
DATA_TYPE ret;
24
DATA_TYPE ret;
27
uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
25
28
ATOMIC_MMU_IDX);
26
ATOMIC_TRACE_RMW;
29
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
27
+#if DATA_SIZE == 16
28
+ ret = atomic16_cmpxchg(haddr, cmpv, newv);
29
+#else
30
ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
31
+#endif
32
ATOMIC_MMU_CLEANUP;
33
return ret;
34
}
35
36
#if DATA_SIZE >= 16
37
+#if HAVE_ATOMIC128
30
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
38
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
31
{
39
{
32
ATOMIC_MMU_DECLS;
40
ATOMIC_MMU_DECLS;
33
- DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
41
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
34
+ DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R;
42
35
uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
43
ATOMIC_TRACE_LD;
36
ATOMIC_MMU_IDX);
44
- __atomic_load(haddr, &val, __ATOMIC_RELAXED);
37
45
+ val = atomic16_read(haddr);
46
ATOMIC_MMU_CLEANUP;
47
return val;
48
}
38
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
49
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
39
ABI_TYPE val EXTRA_ARGS)
50
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
40
{
51
41
ATOMIC_MMU_DECLS;
52
ATOMIC_TRACE_ST;
42
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
53
- __atomic_store(haddr, &val, __ATOMIC_RELAXED);
43
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W;
54
+ atomic16_set(haddr, val);
44
uint16_t info = trace_mem_build_info(SHIFT, false, 0, true,
55
ATOMIC_MMU_CLEANUP;
45
ATOMIC_MMU_IDX);
56
}
46
57
+#endif
47
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
58
#else
59
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
48
ABI_TYPE val EXTRA_ARGS)
60
ABI_TYPE val EXTRA_ARGS)
49
{
61
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
50
ATOMIC_MMU_DECLS;
51
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
52
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW;
53
DATA_TYPE ret;
62
DATA_TYPE ret;
54
uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
63
55
ATOMIC_MMU_IDX);
64
ATOMIC_TRACE_RMW;
56
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
65
+#if DATA_SIZE == 16
57
ABI_TYPE val EXTRA_ARGS) \
66
+ ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
58
{ \
67
+#else
59
ATOMIC_MMU_DECLS; \
68
ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
60
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
69
+#endif
61
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \
70
ATOMIC_MMU_CLEANUP;
62
DATA_TYPE ret; \
71
return BSWAP(ret);
63
uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \
72
}
64
ATOMIC_MMU_IDX); \
73
65
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
74
#if DATA_SIZE >= 16
66
ABI_TYPE xval EXTRA_ARGS) \
75
+#if HAVE_ATOMIC128
67
{ \
68
ATOMIC_MMU_DECLS; \
69
- XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
70
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \
71
XDATA_TYPE cmp, old, new, val = xval; \
72
uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \
73
ATOMIC_MMU_IDX); \
74
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
75
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
76
{
77
ATOMIC_MMU_DECLS;
78
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
79
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW;
80
DATA_TYPE ret;
81
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
82
ATOMIC_MMU_IDX);
83
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
84
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
76
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
85
{
77
{
86
ATOMIC_MMU_DECLS;
78
ATOMIC_MMU_DECLS;
87
- DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
79
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
88
+ DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R;
80
89
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
81
ATOMIC_TRACE_LD;
90
ATOMIC_MMU_IDX);
82
- __atomic_load(haddr, &val, __ATOMIC_RELAXED);
91
83
+ val = atomic16_read(haddr);
84
ATOMIC_MMU_CLEANUP;
85
return BSWAP(val);
86
}
92
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
87
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
93
ABI_TYPE val EXTRA_ARGS)
88
94
{
89
ATOMIC_TRACE_ST;
95
ATOMIC_MMU_DECLS;
90
val = BSWAP(val);
96
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
91
- __atomic_store(haddr, &val, __ATOMIC_RELAXED);
97
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W;
92
+ atomic16_set(haddr, val);
98
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, true,
93
ATOMIC_MMU_CLEANUP;
99
ATOMIC_MMU_IDX);
94
}
100
95
+#endif
101
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
96
#else
97
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
102
ABI_TYPE val EXTRA_ARGS)
98
ABI_TYPE val EXTRA_ARGS)
103
{
99
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
104
ATOMIC_MMU_DECLS;
100
new file mode 100644
105
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
101
index XXXXXXX..XXXXXXX
106
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW;
102
--- /dev/null
107
ABI_TYPE ret;
103
+++ b/include/qemu/atomic128.h
108
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
104
@@ -XXX,XX +XXX,XX @@
109
ATOMIC_MMU_IDX);
105
+/*
110
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
106
+ * Simple interface for 128-bit atomic operations.
111
ABI_TYPE val EXTRA_ARGS) \
107
+ *
112
{ \
108
+ * Copyright (C) 2018 Linaro, Ltd.
113
ATOMIC_MMU_DECLS; \
109
+ *
114
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
110
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
115
+ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \
111
+ * See the COPYING file in the top-level directory.
116
DATA_TYPE ret; \
112
+ *
117
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \
113
+ * See docs/devel/atomics.txt for discussion about the guarantees each
118
false, ATOMIC_MMU_IDX); \
114
+ * atomic primitive is meant to provide.
119
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
115
+ */
120
ABI_TYPE xval EXTRA_ARGS) \
116
+
121
{ \
117
+#ifndef QEMU_ATOMIC128_H
122
ATOMIC_MMU_DECLS; \
118
+#define QEMU_ATOMIC128_H
123
- XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
119
+
124
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \
120
+/*
125
XDATA_TYPE ldo, ldn, old, new, val = xval; \
121
+ * GCC is a house divided about supporting large atomic operations.
126
uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \
122
+ *
127
false, ATOMIC_MMU_IDX); \
123
+ * For hosts that only have large compare-and-swap, a legalistic reading
124
+ * of the C++ standard means that one cannot implement __atomic_read on
125
+ * read-only memory, and thus all atomic operations must synchronize
126
+ * through libatomic.
127
+ *
128
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878
129
+ *
130
+ * This interpretation is not especially helpful for QEMU.
131
+ * For softmmu, all RAM is always read/write from the hypervisor.
132
+ * For user-only, if the guest doesn't implement such an __atomic_read
133
+ * then the host need not worry about it either.
134
+ *
135
+ * Moreover, using libatomic is not an option, because its interface is
136
+ * built for std::atomic<T>, and requires that *all* accesses to such an
137
+ * object go through the library. In our case we do not have an object
138
+ * in the C/C++ sense, but a view of memory as seen by the guest.
139
+ * The guest may issue a large atomic operation and then access those
140
+ * pieces using word-sized accesses. From the hypervisor, we have no
141
+ * way to connect those two actions.
142
+ *
143
+ * Therefore, special case each platform.
144
+ */
145
+
146
+#if defined(CONFIG_ATOMIC128)
147
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
148
+{
149
+ return atomic_cmpxchg__nocheck(ptr, cmp, new);
150
+}
151
+# define HAVE_CMPXCHG128 1
152
+#elif defined(CONFIG_CMPXCHG128)
153
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
154
+{
155
+ return __sync_val_compare_and_swap_16(ptr, cmp, new);
156
+}
157
+# define HAVE_CMPXCHG128 1
158
+#elif defined(__aarch64__)
159
+/* Through gcc 8, aarch64 has no support for 128-bit at all. */
160
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
161
+{
162
+ uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
163
+ uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
164
+ uint64_t oldl, oldh;
165
+ uint32_t tmp;
166
+
167
+ asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
168
+ "cmp %[oldl], %[cmpl]\n\t"
169
+ "ccmp %[oldh], %[cmph], #0, eq\n\t"
170
+ "b.ne 1f\n\t"
171
+ "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
172
+ "cbnz %w[tmp], 0b\n"
173
+ "1:"
174
+ : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
175
+ [oldl] "=&r"(oldl), [oldh] "=r"(oldh)
176
+ : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
177
+ [newl] "r"(newl), [newh] "r"(newh)
178
+ : "memory", "cc");
179
+
180
+ return int128_make128(oldl, oldh);
181
+}
182
+# define HAVE_CMPXCHG128 1
183
+#else
184
+/* Fallback definition that must be optimized away, or error. */
185
+Int128 QEMU_ERROR("unsupported atomic")
186
+ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
187
+# define HAVE_CMPXCHG128 0
188
+#endif /* Some definition for HAVE_CMPXCHG128 */
189
+
190
+
191
+#if defined(CONFIG_ATOMIC128)
192
+static inline Int128 atomic16_read(Int128 *ptr)
193
+{
194
+ return atomic_read__nocheck(ptr);
195
+}
196
+
197
+static inline void atomic16_set(Int128 *ptr, Int128 val)
198
+{
199
+ atomic_set__nocheck(ptr, val);
200
+}
201
+
202
+# define HAVE_ATOMIC128 1
203
+#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
204
+/* We can do better than cmpxchg for AArch64. */
205
+static inline Int128 atomic16_read(Int128 *ptr)
206
+{
207
+ uint64_t l, h;
208
+ uint32_t tmp;
209
+
210
+ /* The load must be paired with the store to guarantee not tearing. */
211
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
212
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
213
+ "cbnz %w[tmp], 0b"
214
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
215
+
216
+ return int128_make128(l, h);
217
+}
218
+
219
+static inline void atomic16_set(Int128 *ptr, Int128 val)
220
+{
221
+ uint64_t l = int128_getlo(val), h = int128_gethi(val);
222
+ uint64_t t1, t2;
223
+
224
+ /* Load into temporaries to acquire the exclusive access lock. */
225
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
226
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
227
+ "cbnz %w[t1], 0b"
228
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
229
+ : [l] "r"(l), [h] "r"(h));
230
+}
231
+
232
+# define HAVE_ATOMIC128 1
233
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
234
+static inline Int128 atomic16_read(Int128 *ptr)
235
+{
236
+ /* Maybe replace 0 with 0, returning the old value. */
237
+ return atomic16_cmpxchg(ptr, 0, 0);
238
+}
239
+
240
+static inline void atomic16_set(Int128 *ptr, Int128 val)
241
+{
242
+ Int128 old = *ptr, cmp;
243
+ do {
244
+ cmp = old;
245
+ old = atomic16_cmpxchg(ptr, cmp, val);
246
+ } while (old != cmp);
247
+}
248
+
249
+# define HAVE_ATOMIC128 1
250
+#else
251
+/* Fallback definitions that must be optimized away, or error. */
252
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
253
+void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
254
+# define HAVE_ATOMIC128 0
255
+#endif /* Some definition for HAVE_ATOMIC128 */
256
+
257
+#endif /* QEMU_ATOMIC128_H */
258
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
259
index XXXXXXX..XXXXXXX 100644
260
--- a/include/qemu/compiler.h
261
+++ b/include/qemu/compiler.h
262
@@ -XXX,XX +XXX,XX @@
263
# define QEMU_FLATTEN
264
#endif
265
266
+/*
267
+ * If __attribute__((error)) is present, use it to produce an error at
268
+ * compile time. Otherwise, one must wait for the linker to diagnose
269
+ * the missing symbol.
270
+ */
271
+#if __has_attribute(error)
272
+# define QEMU_ERROR(X) __attribute__((error(X)))
273
+#else
274
+# define QEMU_ERROR(X)
275
+#endif
276
+
277
/* Implement C11 _Generic via GCC builtins. Example:
278
*
279
* QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x)
280
diff --git a/tcg/tcg.h b/tcg/tcg.h
281
index XXXXXXX..XXXXXXX 100644
282
--- a/tcg/tcg.h
283
+++ b/tcg/tcg.h
284
@@ -XXX,XX +XXX,XX @@
285
#include "qemu/queue.h"
286
#include "tcg-mo.h"
287
#include "tcg-target.h"
288
+#include "qemu/int128.h"
289
290
/* XXX: make safe guess about sizes */
291
#define MAX_OP_PER_INSTR 266
292
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(xchg)
293
#undef GEN_ATOMIC_HELPER
294
#endif /* CONFIG_SOFTMMU */
295
296
-#ifdef CONFIG_ATOMIC128
297
-#include "qemu/int128.h"
298
-
299
-/* These aren't really a "proper" helpers because TCG cannot manage Int128.
300
- However, use the same format as the others, for use by the backends. */
301
+/*
302
+ * These aren't really a "proper" helpers because TCG cannot manage Int128.
303
+ * However, use the same format as the others, for use by the backends.
304
+ *
305
+ * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
306
+ * the ld/st functions are only defined if HAVE_ATOMIC128,
307
+ * as defined by <qemu/atomic128.h>.
308
+ */
309
Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
310
Int128 cmpv, Int128 newv,
311
TCGMemOpIdx oi, uintptr_t retaddr);
312
@@ -XXX,XX +XXX,XX @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
313
void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
314
TCGMemOpIdx oi, uintptr_t retaddr);
315
316
-#endif /* CONFIG_ATOMIC128 */
317
-
318
#endif /* TCG_H */
128
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
319
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
129
index XXXXXXX..XXXXXXX 100644
320
index XXXXXXX..XXXXXXX 100644
130
--- a/accel/tcg/cputlb.c
321
--- a/accel/tcg/cputlb.c
131
+++ b/accel/tcg/cputlb.c
322
+++ b/accel/tcg/cputlb.c
132
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
323
@@ -XXX,XX +XXX,XX @@
133
324
#include "exec/log.h"
325
#include "exec/helper-proto.h"
326
#include "qemu/atomic.h"
327
+#include "qemu/atomic128.h"
328
329
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
330
/* #define DEBUG_TLB */
331
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
332
#include "atomic_template.h"
134
#endif
333
#endif
135
334
136
-/* Probe for a read-modify-write atomic operation. Do not allow unaligned
335
-#ifdef CONFIG_ATOMIC128
137
- * operations, or io operations to proceed. Return the host address. */
336
+#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
138
+/*
337
#define DATA_SIZE 16
139
+ * Probe for an atomic operation. Do not allow unaligned operations,
140
+ * or io operations to proceed. Return the host address.
141
+ *
142
+ * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
143
+ */
144
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
145
- TCGMemOpIdx oi, uintptr_t retaddr)
146
+ TCGMemOpIdx oi, int size, int prot,
147
+ uintptr_t retaddr)
148
{
149
size_t mmu_idx = get_mmuidx(oi);
150
- uintptr_t index = tlb_index(env, mmu_idx, addr);
151
- CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
152
- target_ulong tlb_addr = tlb_addr_write(tlbe);
153
MemOp mop = get_memop(oi);
154
int a_bits = get_alignment_bits(mop);
155
- int s_bits = mop & MO_SIZE;
156
+ uintptr_t index;
157
+ CPUTLBEntry *tlbe;
158
+ target_ulong tlb_addr;
159
void *hostaddr;
160
161
/* Adjust the given return address. */
162
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
163
}
164
165
/* Enforce qemu required alignment. */
166
- if (unlikely(addr & ((1 << s_bits) - 1))) {
167
+ if (unlikely(addr & (size - 1))) {
168
/* We get here if guest alignment was not requested,
169
or was not enforced by cpu_unaligned_access above.
170
We might widen the access and emulate, but for now
171
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
172
goto stop_the_world;
173
}
174
175
+ index = tlb_index(env, mmu_idx, addr);
176
+ tlbe = tlb_entry(env, mmu_idx, addr);
177
+
178
/* Check TLB entry and enforce page permissions. */
179
- if (!tlb_hit(tlb_addr, addr)) {
180
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
181
- tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
182
- mmu_idx, retaddr);
183
- index = tlb_index(env, mmu_idx, addr);
184
- tlbe = tlb_entry(env, mmu_idx, addr);
185
+ if (prot & PAGE_WRITE) {
186
+ tlb_addr = tlb_addr_write(tlbe);
187
+ if (!tlb_hit(tlb_addr, addr)) {
188
+ if (!VICTIM_TLB_HIT(addr_write, addr)) {
189
+ tlb_fill(env_cpu(env), addr, size,
190
+ MMU_DATA_STORE, mmu_idx, retaddr);
191
+ index = tlb_index(env, mmu_idx, addr);
192
+ tlbe = tlb_entry(env, mmu_idx, addr);
193
+ }
194
+ tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
195
+ }
196
+
197
+ /* Let the guest notice RMW on a write-only page. */
198
+ if ((prot & PAGE_READ) &&
199
+ unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
200
+ tlb_fill(env_cpu(env), addr, size,
201
+ MMU_DATA_LOAD, mmu_idx, retaddr);
202
+ /*
203
+ * Since we don't support reads and writes to different addresses,
204
+ * and we do have the proper page loaded for write, this shouldn't
205
+ * ever return. But just in case, handle via stop-the-world.
206
+ */
207
+ goto stop_the_world;
208
+ }
209
+ } else /* if (prot & PAGE_READ) */ {
210
+ tlb_addr = tlbe->addr_read;
211
+ if (!tlb_hit(tlb_addr, addr)) {
212
+ if (!VICTIM_TLB_HIT(addr_write, addr)) {
213
+ tlb_fill(env_cpu(env), addr, size,
214
+ MMU_DATA_LOAD, mmu_idx, retaddr);
215
+ index = tlb_index(env, mmu_idx, addr);
216
+ tlbe = tlb_entry(env, mmu_idx, addr);
217
+ }
218
+ tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
219
}
220
- tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
221
}
222
223
/* Notice an IO access or a needs-MMU-lookup access */
224
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
225
goto stop_the_world;
226
}
227
228
- /* Let the guest notice RMW on a write-only page. */
229
- if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
230
- tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
231
- mmu_idx, retaddr);
232
- /* Since we don't support reads and writes to different addresses,
233
- and we do have the proper page loaded for write, this shouldn't
234
- ever return. But just in case, handle via stop-the-world. */
235
- goto stop_the_world;
236
- }
237
-
238
hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
239
240
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
241
- notdirty_write(env_cpu(env), addr, 1 << s_bits,
242
+ notdirty_write(env_cpu(env), addr, size,
243
&env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
244
}
245
246
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
247
#define ATOMIC_NAME(X) \
248
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
249
#define ATOMIC_MMU_DECLS
250
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
251
+#define ATOMIC_MMU_LOOKUP_RW \
252
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, retaddr)
253
+#define ATOMIC_MMU_LOOKUP_R \
254
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, retaddr)
255
+#define ATOMIC_MMU_LOOKUP_W \
256
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, retaddr)
257
#define ATOMIC_MMU_CLEANUP
258
#define ATOMIC_MMU_IDX get_mmuidx(oi)
259
260
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
261
262
#undef EXTRA_ARGS
263
#undef ATOMIC_NAME
264
-#undef ATOMIC_MMU_LOOKUP
265
+#undef ATOMIC_MMU_LOOKUP_RW
266
+#undef ATOMIC_MMU_LOOKUP_R
267
+#undef ATOMIC_MMU_LOOKUP_W
268
+
269
#define EXTRA_ARGS , TCGMemOpIdx oi
270
#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
271
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
272
+#define ATOMIC_MMU_LOOKUP_RW \
273
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, GETPC())
274
+#define ATOMIC_MMU_LOOKUP_R \
275
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, GETPC())
276
+#define ATOMIC_MMU_LOOKUP_W \
277
+ atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, GETPC())
278
279
#define DATA_SIZE 1
280
#include "atomic_template.h"
338
#include "atomic_template.h"
339
#endif
281
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
340
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
282
index XXXXXXX..XXXXXXX 100644
341
index XXXXXXX..XXXXXXX 100644
283
--- a/accel/tcg/user-exec.c
342
--- a/accel/tcg/user-exec.c
284
+++ b/accel/tcg/user-exec.c
343
+++ b/accel/tcg/user-exec.c
344
@@ -XXX,XX +XXX,XX @@
345
#include "exec/cpu_ldst.h"
346
#include "translate-all.h"
347
#include "exec/helper-proto.h"
348
+#include "qemu/atomic128.h"
349
350
#undef EAX
351
#undef ECX
285
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
352
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
286
353
/* The following is only callable from other helpers, and matches up
287
/* Macro to call the above, with local variables from the use context. */
354
with the softmmu version. */
288
#define ATOMIC_MMU_DECLS do {} while (0)
355
289
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
356
-#ifdef CONFIG_ATOMIC128
290
+#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
357
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
291
+#define ATOMIC_MMU_LOOKUP_R ATOMIC_MMU_LOOKUP_RW
292
+#define ATOMIC_MMU_LOOKUP_W ATOMIC_MMU_LOOKUP_RW
293
#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
294
#define ATOMIC_MMU_IDX MMU_USER_IDX
295
296
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
297
358
298
#undef EXTRA_ARGS
359
#undef EXTRA_ARGS
299
#undef ATOMIC_NAME
360
#undef ATOMIC_NAME
300
-#undef ATOMIC_MMU_LOOKUP
361
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
301
+#undef ATOMIC_MMU_LOOKUP_RW
302
303
#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
304
#define ATOMIC_NAME(X) \
305
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
306
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
307
+#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
308
362
309
#define DATA_SIZE 16
363
#define DATA_SIZE 16
310
#include "atomic_template.h"
364
#include "atomic_template.h"
365
-#endif /* CONFIG_ATOMIC128 */
366
+#endif
367
diff --git a/configure b/configure
368
index XXXXXXX..XXXXXXX 100755
369
--- a/configure
370
+++ b/configure
371
@@ -XXX,XX +XXX,XX @@ EOF
372
fi
373
fi
374
375
+cmpxchg128=no
376
+if test "$int128" = yes -a "$atomic128" = no; then
377
+ cat > $TMPC << EOF
378
+int main(void)
379
+{
380
+ unsigned __int128 x = 0, y = 0;
381
+ __sync_val_compare_and_swap_16(&x, y, x);
382
+ return 0;
383
+}
384
+EOF
385
+ if compile_prog "" "" ; then
386
+ cmpxchg128=yes
387
+ fi
388
+fi
389
+
390
#########################################
391
# See if 64-bit atomic operations are supported.
392
# Note that without __atomic builtins, we can only
393
@@ -XXX,XX +XXX,XX @@ if test "$atomic128" = "yes" ; then
394
echo "CONFIG_ATOMIC128=y" >> $config_host_mak
395
fi
396
397
+if test "$cmpxchg128" = "yes" ; then
398
+ echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
399
+fi
400
+
401
if test "$atomic64" = "yes" ; then
402
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
403
fi
311
--
404
--
312
2.25.1
405
2.17.2
313
406
314
407
diff view generated by jsdifflib
1
As noted by qemu-plugins.h, plugins can neither read nor write
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
guest registers.
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
4
---
8
accel/tcg/plugin-helpers.h | 4 ++--
5
target/i386/mem_helper.c | 9 ++++-----
9
1 file changed, 2 insertions(+), 2 deletions(-)
6
1 file changed, 4 insertions(+), 5 deletions(-)
10
7
11
diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h
8
diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/plugin-helpers.h
10
--- a/target/i386/mem_helper.c
14
+++ b/accel/tcg/plugin-helpers.h
11
+++ b/target/i386/mem_helper.c
15
@@ -XXX,XX +XXX,XX @@
12
@@ -XXX,XX +XXX,XX @@
16
#ifdef CONFIG_PLUGIN
13
#include "exec/exec-all.h"
17
-DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr)
14
#include "exec/cpu_ldst.h"
18
-DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr)
15
#include "qemu/int128.h"
19
+DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG, void, i32, ptr)
16
+#include "qemu/atomic128.h"
20
+DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG, void, i32, i32, i64, ptr)
17
#include "tcg.h"
18
19
void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
20
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
21
22
if ((a0 & 0xf) != 0) {
23
raise_exception_ra(env, EXCP0D_GPF, ra);
24
- } else {
25
-#ifndef CONFIG_ATOMIC128
26
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
27
-#else
28
+ } else if (HAVE_CMPXCHG128) {
29
int eflags = cpu_cc_compute_all(env, CC_OP);
30
31
Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
32
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
33
eflags &= ~CC_Z;
34
}
35
CC_SRC = eflags;
36
-#endif
37
+ } else {
38
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
39
}
40
}
21
#endif
41
#endif
22
--
42
--
23
2.25.1
43
2.17.2
24
44
25
45
diff view generated by jsdifflib
1
When this opcode is not available in the backend, tcg middle-end
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
will expand this as a series of 5 opcodes. So implementing this
3
saves bytecode space.
4
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
tcg/tci/tcg-target.h | 4 ++--
4
target/arm/helper-a64.c | 259 +++++++++++++++++++++-------------------
10
tcg/tci.c | 16 +++++++++++++++-
5
1 file changed, 133 insertions(+), 126 deletions(-)
11
tcg/tci/tcg-target.c.inc | 10 +++++++---
12
3 files changed, 24 insertions(+), 6 deletions(-)
13
6
14
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
7
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
15
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tci/tcg-target.h
9
--- a/target/arm/helper-a64.c
17
+++ b/tcg/tci/tcg-target.h
10
+++ b/target/arm/helper-a64.c
18
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@
19
#define TCG_TARGET_HAS_not_i32 1
12
#include "exec/exec-all.h"
20
#define TCG_TARGET_HAS_orc_i32 0
13
#include "exec/cpu_ldst.h"
21
#define TCG_TARGET_HAS_rot_i32 1
14
#include "qemu/int128.h"
22
-#define TCG_TARGET_HAS_movcond_i32 0
15
+#include "qemu/atomic128.h"
23
+#define TCG_TARGET_HAS_movcond_i32 1
16
#include "tcg.h"
24
#define TCG_TARGET_HAS_muls2_i32 0
17
#include "fpu/softfloat.h"
25
#define TCG_TARGET_HAS_muluh_i32 0
18
#include <zlib.h> /* For crc32 */
26
#define TCG_TARGET_HAS_mulsh_i32 0
19
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
27
@@ -XXX,XX +XXX,XX @@
20
return crc32c(acc, buf, bytes) ^ 0xffffffff;
28
#define TCG_TARGET_HAS_not_i64 1
21
}
29
#define TCG_TARGET_HAS_orc_i64 0
22
30
#define TCG_TARGET_HAS_rot_i64 1
23
-/* Returns 0 on success; 1 otherwise. */
31
-#define TCG_TARGET_HAS_movcond_i64 0
24
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
32
+#define TCG_TARGET_HAS_movcond_i64 1
25
- uint64_t new_lo, uint64_t new_hi,
33
#define TCG_TARGET_HAS_muls2_i64 0
26
- bool parallel, uintptr_t ra)
34
#define TCG_TARGET_HAS_add2_i32 0
27
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
35
#define TCG_TARGET_HAS_sub2_i32 0
28
+ uint64_t new_lo, uint64_t new_hi)
36
diff --git a/tcg/tci.c b/tcg/tci.c
29
{
37
index XXXXXXX..XXXXXXX 100644
30
- Int128 oldv, cmpv, newv;
38
--- a/tcg/tci.c
31
+ Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
39
+++ b/tcg/tci.c
32
+ Int128 newv = int128_make128(new_lo, new_hi);
40
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(uint32_t insn,
33
+ Int128 oldv;
41
*r2 = extract32(insn, 16, 4);
34
+ uintptr_t ra = GETPC();
42
*r3 = extract32(insn, 20, 4);
35
+ uint64_t o0, o1;
43
}
36
bool success;
37
38
- cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
39
- newv = int128_make128(new_lo, new_hi);
40
-
41
- if (parallel) {
42
-#ifndef CONFIG_ATOMIC128
43
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
44
-#else
45
- int mem_idx = cpu_mmu_index(env, false);
46
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
47
- oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
48
- success = int128_eq(oldv, cmpv);
49
-#endif
50
- } else {
51
- uint64_t o0, o1;
52
-
53
#ifdef CONFIG_USER_ONLY
54
- /* ??? Enforce alignment. */
55
- uint64_t *haddr = g2h(addr);
56
+ /* ??? Enforce alignment. */
57
+ uint64_t *haddr = g2h(addr);
58
59
- helper_retaddr = ra;
60
- o0 = ldq_le_p(haddr + 0);
61
- o1 = ldq_le_p(haddr + 1);
62
- oldv = int128_make128(o0, o1);
63
+ helper_retaddr = ra;
64
+ o0 = ldq_le_p(haddr + 0);
65
+ o1 = ldq_le_p(haddr + 1);
66
+ oldv = int128_make128(o0, o1);
67
68
- success = int128_eq(oldv, cmpv);
69
- if (success) {
70
- stq_le_p(haddr + 0, int128_getlo(newv));
71
- stq_le_p(haddr + 1, int128_gethi(newv));
72
- }
73
- helper_retaddr = 0;
74
-#else
75
- int mem_idx = cpu_mmu_index(env, false);
76
- TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
77
- TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
78
-
79
- o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
80
- o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
81
- oldv = int128_make128(o0, o1);
82
-
83
- success = int128_eq(oldv, cmpv);
84
- if (success) {
85
- helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
86
- helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
87
- }
88
-#endif
89
+ success = int128_eq(oldv, cmpv);
90
+ if (success) {
91
+ stq_le_p(haddr + 0, int128_getlo(newv));
92
+ stq_le_p(haddr + 1, int128_gethi(newv));
93
}
94
+ helper_retaddr = 0;
95
+#else
96
+ int mem_idx = cpu_mmu_index(env, false);
97
+ TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
98
+ TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
99
+
100
+ o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
101
+ o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
102
+ oldv = int128_make128(o0, o1);
103
+
104
+ success = int128_eq(oldv, cmpv);
105
+ if (success) {
106
+ helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
107
+ helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
108
+ }
44
+#endif
109
+#endif
45
110
46
static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
111
return !success;
47
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
112
}
48
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
113
49
*c5 = extract32(insn, 28, 4);
114
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
50
}
115
- uint64_t new_lo, uint64_t new_hi)
51
116
-{
52
+#if TCG_TARGET_REG_BITS == 32
117
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
53
static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
118
-}
54
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
119
-
55
{
120
uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
56
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
121
uint64_t new_lo, uint64_t new_hi)
57
tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
122
-{
58
regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
123
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
59
break;
124
-}
60
+ case INDEX_op_movcond_i32:
125
-
61
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
126
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
62
+ tmp32 = tci_compare32(regs[r1], regs[r2], condition);
127
- uint64_t new_lo, uint64_t new_hi,
63
+ regs[r0] = regs[tmp32 ? r3 : r4];
128
- bool parallel, uintptr_t ra)
64
+ break;
129
{
65
#if TCG_TARGET_REG_BITS == 32
130
Int128 oldv, cmpv, newv;
66
case INDEX_op_setcond2_i32:
131
+ uintptr_t ra = GETPC();
67
tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
132
bool success;
68
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
133
+ int mem_idx;
69
tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
134
+ TCGMemOpIdx oi;
70
regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
135
71
break;
136
- /* high and low need to be switched here because this is not actually a
72
+ case INDEX_op_movcond_i64:
137
- * 128bit store but two doublewords stored consecutively
73
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
138
- */
74
+ tmp32 = tci_compare64(regs[r1], regs[r2], condition);
139
- cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
75
+ regs[r0] = regs[tmp32 ? r3 : r4];
140
- newv = int128_make128(new_hi, new_lo);
76
+ break;
141
-
77
#endif
142
- if (parallel) {
78
CASE_32_64(mov)
143
-#ifndef CONFIG_ATOMIC128
79
tci_args_rr(insn, &r0, &r1);
144
+ if (!HAVE_CMPXCHG128) {
80
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
145
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
81
op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
146
-#else
82
break;
147
- int mem_idx = cpu_mmu_index(env, false);
83
148
- TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
84
-#if TCG_TARGET_REG_BITS == 32
149
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
85
+ case INDEX_op_movcond_i32:
150
- success = int128_eq(oldv, cmpv);
86
+ case INDEX_op_movcond_i64:
151
-#endif
87
case INDEX_op_setcond2_i32:
152
- } else {
88
tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c);
153
- uint64_t o0, o1;
89
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
154
-
90
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
155
-#ifdef CONFIG_USER_ONLY
91
str_r(r3), str_r(r4), str_c(c));
156
- /* ??? Enforce alignment. */
92
break;
157
- uint64_t *haddr = g2h(addr);
93
158
-
94
+#if TCG_TARGET_REG_BITS == 32
159
- helper_retaddr = ra;
95
case INDEX_op_mulu2_i32:
160
- o1 = ldq_be_p(haddr + 0);
96
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
161
- o0 = ldq_be_p(haddr + 1);
97
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
162
- oldv = int128_make128(o0, o1);
98
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
163
-
99
index XXXXXXX..XXXXXXX 100644
164
- success = int128_eq(oldv, cmpv);
100
--- a/tcg/tci/tcg-target.c.inc
165
- if (success) {
101
+++ b/tcg/tci/tcg-target.c.inc
166
- stq_be_p(haddr + 0, int128_gethi(newv));
102
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
167
- stq_be_p(haddr + 1, int128_getlo(newv));
103
return C_O0_I4(r, r, r, r);
168
- }
104
case INDEX_op_mulu2_i32:
169
- helper_retaddr = 0;
105
return C_O2_I2(r, r, r, r);
170
-#else
171
- int mem_idx = cpu_mmu_index(env, false);
172
- TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
173
- TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
174
-
175
- o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
176
- o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
177
- oldv = int128_make128(o0, o1);
178
-
179
- success = int128_eq(oldv, cmpv);
180
- if (success) {
181
- helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
182
- helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
183
- }
184
-#endif
185
}
186
187
+ mem_idx = cpu_mmu_index(env, false);
188
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
189
+
190
+ cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
191
+ newv = int128_make128(new_lo, new_hi);
192
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
193
+
194
+ success = int128_eq(oldv, cmpv);
195
return !success;
196
}
197
198
uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
199
uint64_t new_lo, uint64_t new_hi)
200
{
201
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
202
+ /*
203
+ * High and low need to be switched here because this is not actually a
204
+ * 128bit store but two doublewords stored consecutively
205
+ */
206
+ Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
207
+ Int128 newv = int128_make128(new_lo, new_hi);
208
+ Int128 oldv;
209
+ uintptr_t ra = GETPC();
210
+ uint64_t o0, o1;
211
+ bool success;
212
+
213
+#ifdef CONFIG_USER_ONLY
214
+ /* ??? Enforce alignment. */
215
+ uint64_t *haddr = g2h(addr);
216
+
217
+ helper_retaddr = ra;
218
+ o1 = ldq_be_p(haddr + 0);
219
+ o0 = ldq_be_p(haddr + 1);
220
+ oldv = int128_make128(o0, o1);
221
+
222
+ success = int128_eq(oldv, cmpv);
223
+ if (success) {
224
+ stq_be_p(haddr + 0, int128_gethi(newv));
225
+ stq_be_p(haddr + 1, int128_getlo(newv));
226
+ }
227
+ helper_retaddr = 0;
228
+#else
229
+ int mem_idx = cpu_mmu_index(env, false);
230
+ TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
231
+ TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
232
+
233
+ o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
234
+ o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
235
+ oldv = int128_make128(o0, o1);
236
+
237
+ success = int128_eq(oldv, cmpv);
238
+ if (success) {
239
+ helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
240
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
241
+ }
106
+#endif
242
+#endif
107
+
243
+
108
+ case INDEX_op_movcond_i32:
244
+ return !success;
109
+ case INDEX_op_movcond_i64:
245
}
110
case INDEX_op_setcond2_i32:
246
111
return C_O1_I4(r, r, r, r, r);
247
uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
112
-#endif
248
- uint64_t new_lo, uint64_t new_hi)
113
249
+ uint64_t new_lo, uint64_t new_hi)
114
case INDEX_op_qemu_ld_i32:
250
{
115
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
251
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
116
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
252
+ Int128 oldv, cmpv, newv;
117
insn = deposit32(insn, 20, 4, r3);
253
+ uintptr_t ra = GETPC();
118
tcg_out32(s, insn);
254
+ bool success;
119
}
255
+ int mem_idx;
120
+#endif
256
+ TCGMemOpIdx oi;
121
257
+
122
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
258
+ if (!HAVE_CMPXCHG128) {
123
TCGReg r0, TCGReg r1, TCGReg r2,
259
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
260
+ }
125
tcg_out32(s, insn);
261
+
126
}
262
+ mem_idx = cpu_mmu_index(env, false);
127
263
+ oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
128
+#if TCG_TARGET_REG_BITS == 32
264
+
129
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
265
+ /*
130
TCGReg r0, TCGReg r1, TCGReg r2,
266
+ * High and low need to be switched here because this is not actually a
131
TCGReg r3, TCGReg r4, TCGReg r5)
267
+ * 128bit store but two doublewords stored consecutively
132
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
268
+ */
133
tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]);
269
+ cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
134
break;
270
+ newv = int128_make128(new_hi, new_lo);
135
271
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
136
-#if TCG_TARGET_REG_BITS == 32
272
+
137
+ CASE_32_64(movcond)
273
+ success = int128_eq(oldv, cmpv);
138
case INDEX_op_setcond2_i32:
274
+ return !success;
139
tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2],
275
}
140
args[3], args[4], args[5]);
276
141
break;
277
/* Writes back the old data into Rs. */
142
-#endif
278
void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
143
279
uint64_t new_lo, uint64_t new_hi)
144
CASE_32_64(ld8u)
280
{
145
CASE_32_64(ld8s)
281
- uintptr_t ra = GETPC();
282
-#ifndef CONFIG_ATOMIC128
283
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
284
-#else
285
Int128 oldv, cmpv, newv;
286
+ uintptr_t ra = GETPC();
287
+ int mem_idx;
288
+ TCGMemOpIdx oi;
289
+
290
+ if (!HAVE_CMPXCHG128) {
291
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
292
+ }
293
+
294
+ mem_idx = cpu_mmu_index(env, false);
295
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
296
297
cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
298
newv = int128_make128(new_lo, new_hi);
299
-
300
- int mem_idx = cpu_mmu_index(env, false);
301
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
302
oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
303
304
env->xregs[rs] = int128_getlo(oldv);
305
env->xregs[rs + 1] = int128_gethi(oldv);
306
-#endif
307
}
308
309
void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
310
uint64_t new_hi, uint64_t new_lo)
311
{
312
- uintptr_t ra = GETPC();
313
-#ifndef CONFIG_ATOMIC128
314
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
315
-#else
316
Int128 oldv, cmpv, newv;
317
+ uintptr_t ra = GETPC();
318
+ int mem_idx;
319
+ TCGMemOpIdx oi;
320
+
321
+ if (!HAVE_CMPXCHG128) {
322
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
323
+ }
324
+
325
+ mem_idx = cpu_mmu_index(env, false);
326
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
327
328
cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
329
newv = int128_make128(new_lo, new_hi);
330
-
331
- int mem_idx = cpu_mmu_index(env, false);
332
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
333
oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
334
335
env->xregs[rs + 1] = int128_getlo(oldv);
336
env->xregs[rs] = int128_gethi(oldv);
337
-#endif
338
}
339
340
/*
146
--
341
--
147
2.25.1
342
2.17.2
148
343
149
344
diff view generated by jsdifflib
1
Inline it into its one caller, tci_write_reg64.
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
Drop the asserts that are redundant with tcg_read_r.
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
4
---
8
tcg/tci.c | 13 ++-----------
5
target/arm/helper-a64.c | 16 ++++------------
9
1 file changed, 2 insertions(+), 11 deletions(-)
6
target/arm/translate-a64.c | 38 ++++++++++++++++++++++----------------
7
2 files changed, 26 insertions(+), 28 deletions(-)
10
8
11
diff --git a/tcg/tci.c b/tcg/tci.c
9
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tci.c
11
--- a/target/arm/helper-a64.c
14
+++ b/tcg/tci.c
12
+++ b/target/arm/helper-a64.c
13
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
14
int mem_idx;
15
TCGMemOpIdx oi;
16
17
- if (!HAVE_CMPXCHG128) {
18
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
19
- }
20
+ assert(HAVE_CMPXCHG128);
21
22
mem_idx = cpu_mmu_index(env, false);
23
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
24
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
25
int mem_idx;
26
TCGMemOpIdx oi;
27
28
- if (!HAVE_CMPXCHG128) {
29
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
30
- }
31
+ assert(HAVE_CMPXCHG128);
32
33
mem_idx = cpu_mmu_index(env, false);
34
oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
35
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
36
int mem_idx;
37
TCGMemOpIdx oi;
38
39
- if (!HAVE_CMPXCHG128) {
40
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
41
- }
42
+ assert(HAVE_CMPXCHG128);
43
44
mem_idx = cpu_mmu_index(env, false);
45
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
46
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
47
int mem_idx;
48
TCGMemOpIdx oi;
49
50
- if (!HAVE_CMPXCHG128) {
51
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
52
- }
53
+ assert(HAVE_CMPXCHG128);
54
55
mem_idx = cpu_mmu_index(env, false);
56
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
57
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate-a64.c
60
+++ b/target/arm/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@
61
@@ -XXX,XX +XXX,XX @@
16
62
17
__thread uintptr_t tci_tb_ptr;
63
#include "trace-tcg.h"
18
64
#include "translate-a64.h"
19
-static void
65
+#include "qemu/atomic128.h"
20
-tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
66
21
-{
67
static TCGv_i64 cpu_X[32];
22
- tci_assert(index < TCG_TARGET_NB_REGS);
68
static TCGv_i64 cpu_pc;
23
- tci_assert(index != TCG_AREG0);
69
@@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
24
- tci_assert(index != TCG_REG_CALL_STACK);
70
get_mem_index(s),
25
- regs[index] = value;
71
MO_64 | MO_ALIGN | s->be_data);
26
-}
72
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
73
- } else if (s->be_data == MO_LE) {
74
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
75
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
76
+ if (!HAVE_CMPXCHG128) {
77
+ gen_helper_exit_atomic(cpu_env);
78
+ s->base.is_jmp = DISAS_NORETURN;
79
+ } else if (s->be_data == MO_LE) {
80
gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
81
cpu_exclusive_addr,
82
cpu_reg(s, rt),
83
cpu_reg(s, rt2));
84
} else {
85
- gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
86
- cpu_reg(s, rt), cpu_reg(s, rt2));
87
- }
88
- } else {
89
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
90
gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
91
cpu_exclusive_addr,
92
cpu_reg(s, rt),
93
cpu_reg(s, rt2));
94
- } else {
95
- gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
96
- cpu_reg(s, rt), cpu_reg(s, rt2));
97
}
98
+ } else if (s->be_data == MO_LE) {
99
+ gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
100
+ cpu_reg(s, rt), cpu_reg(s, rt2));
101
+ } else {
102
+ gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
103
+ cpu_reg(s, rt), cpu_reg(s, rt2));
104
}
105
} else {
106
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
107
@@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
108
}
109
tcg_temp_free_i64(cmp);
110
} else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
111
- TCGv_i32 tcg_rs = tcg_const_i32(rs);
27
-
112
-
28
static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
113
- if (s->be_data == MO_LE) {
29
uint32_t low_index, uint64_t value)
114
- gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
30
{
115
+ if (HAVE_CMPXCHG128) {
31
- tci_write_reg(regs, low_index, value);
116
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
32
- tci_write_reg(regs, high_index, value >> 32);
117
+ if (s->be_data == MO_LE) {
33
+ regs[low_index] = value;
118
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
34
+ regs[high_index] = value >> 32;
119
+ } else {
35
}
120
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
36
121
+ }
37
/* Create a 64 bit value from two 32 bit values. */
122
+ tcg_temp_free_i32(tcg_rs);
123
} else {
124
- gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
125
+ gen_helper_exit_atomic(cpu_env);
126
+ s->base.is_jmp = DISAS_NORETURN;
127
}
128
- tcg_temp_free_i32(tcg_rs);
129
} else {
130
TCGv_i64 d1 = tcg_temp_new_i64();
131
TCGv_i64 d2 = tcg_temp_new_i64();
38
--
132
--
39
2.25.1
133
2.17.2
40
134
41
135
diff view generated by jsdifflib
1
We will shortly be interested in distinguishing pointers
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
from integers in the helper's declaration, as well as a
3
true void return. We currently have two parallel 1 bit
4
fields; merge them and expand to a 3 bit field.
5
6
Our current maximum is 7 helper arguments, plus the return
7
makes 8 * 3 = 24 bits used within the uint32_t typemask.
8
9
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
3
---
13
include/exec/helper-head.h | 37 +++++--------------
4
target/ppc/helper.h | 2 +-
14
include/exec/helper-tcg.h | 34 ++++++++---------
5
target/ppc/mem_helper.c | 33 ++++++++++--
15
target/hppa/helper.h | 3 --
6
target/ppc/translate.c | 115 +++++++++++++++++++++-------------------
16
target/i386/ops_sse_header.h | 3 --
7
3 files changed, 88 insertions(+), 62 deletions(-)
17
target/m68k/helper.h | 1 -
18
target/ppc/helper.h | 3 --
19
tcg/tcg.c | 71 +++++++++++++++++++++---------------
20
7 files changed, 67 insertions(+), 85 deletions(-)
21
8
22
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/exec/helper-head.h
25
+++ b/include/exec/helper-head.h
26
@@ -XXX,XX +XXX,XX @@
27
#define dh_retvar_ptr tcgv_ptr_temp(retval)
28
#define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
29
30
-#define dh_is_64bit_void 0
31
-#define dh_is_64bit_noreturn 0
32
-#define dh_is_64bit_i32 0
33
-#define dh_is_64bit_i64 1
34
-#define dh_is_64bit_ptr (sizeof(void *) == 8)
35
-#define dh_is_64bit_cptr dh_is_64bit_ptr
36
-#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
37
-
38
-#define dh_is_signed_void 0
39
-#define dh_is_signed_noreturn 0
40
-#define dh_is_signed_i32 0
41
-#define dh_is_signed_s32 1
42
-#define dh_is_signed_i64 0
43
-#define dh_is_signed_s64 1
44
-#define dh_is_signed_f16 0
45
-#define dh_is_signed_f32 0
46
-#define dh_is_signed_f64 0
47
-#define dh_is_signed_tl 0
48
-#define dh_is_signed_int 1
49
-/* ??? This is highly specific to the host cpu. There are even special
50
- extension instructions that may be required, e.g. ia64's addp4. But
51
- for now we don't support any 64-bit targets with 32-bit pointers. */
52
-#define dh_is_signed_ptr 0
53
-#define dh_is_signed_cptr dh_is_signed_ptr
54
-#define dh_is_signed_env dh_is_signed_ptr
55
-#define dh_is_signed(t) dh_is_signed_##t
56
+#define dh_typecode_void 0
57
+#define dh_typecode_noreturn 0
58
+#define dh_typecode_i32 2
59
+#define dh_typecode_s32 3
60
+#define dh_typecode_i64 4
61
+#define dh_typecode_s64 5
62
+#define dh_typecode_ptr 6
63
+#define dh_typecode(t) glue(dh_typecode_, dh_alias(t))
64
65
#define dh_callflag_i32 0
66
#define dh_callflag_s32 0
67
@@ -XXX,XX +XXX,XX @@
68
#define dh_callflag_noreturn TCG_CALL_NO_RETURN
69
#define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
70
71
-#define dh_sizemask(t, n) \
72
- ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1)))
73
+#define dh_typemask(t, n) (dh_typecode(t) << (n * 3))
74
75
#define dh_arg(t, n) \
76
glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n))
77
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/exec/helper-tcg.h
80
+++ b/include/exec/helper-tcg.h
81
@@ -XXX,XX +XXX,XX @@
82
#define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \
83
{ .func = HELPER(NAME), .name = str(NAME), \
84
.flags = FLAGS | dh_callflag(ret), \
85
- .sizemask = dh_sizemask(ret, 0) },
86
+ .typemask = dh_typemask(ret, 0) },
87
88
#define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \
89
{ .func = HELPER(NAME), .name = str(NAME), \
90
.flags = FLAGS | dh_callflag(ret), \
91
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) },
92
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) },
93
94
#define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \
95
{ .func = HELPER(NAME), .name = str(NAME), \
96
.flags = FLAGS | dh_callflag(ret), \
97
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
98
- | dh_sizemask(t2, 2) },
99
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
100
+ | dh_typemask(t2, 2) },
101
102
#define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \
103
{ .func = HELPER(NAME), .name = str(NAME), \
104
.flags = FLAGS | dh_callflag(ret), \
105
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
106
- | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) },
107
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
108
+ | dh_typemask(t2, 2) | dh_typemask(t3, 3) },
109
110
#define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \
111
{ .func = HELPER(NAME), .name = str(NAME), \
112
.flags = FLAGS | dh_callflag(ret), \
113
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
114
- | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) },
115
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
116
+ | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) },
117
118
#define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \
119
{ .func = HELPER(NAME), .name = str(NAME), \
120
.flags = FLAGS | dh_callflag(ret), \
121
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
122
- | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
123
- | dh_sizemask(t5, 5) },
124
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
125
+ | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
126
+ | dh_typemask(t5, 5) },
127
128
#define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \
129
{ .func = HELPER(NAME), .name = str(NAME), \
130
.flags = FLAGS | dh_callflag(ret), \
131
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
132
- | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
133
- | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
134
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
135
+ | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
136
+ | dh_typemask(t5, 5) | dh_typemask(t6, 6) },
137
138
#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \
139
{ .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
140
- .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
141
- | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
142
- | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) },
143
+ .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
144
+ | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
145
+ | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7) },
146
147
#include "helper.h"
148
#include "trace/generated-helpers.h"
149
diff --git a/target/hppa/helper.h b/target/hppa/helper.h
150
index XXXXXXX..XXXXXXX 100644
151
--- a/target/hppa/helper.h
152
+++ b/target/hppa/helper.h
153
@@ -XXX,XX +XXX,XX @@
154
#if TARGET_REGISTER_BITS == 64
155
# define dh_alias_tr i64
156
-# define dh_is_64bit_tr 1
157
#else
158
# define dh_alias_tr i32
159
-# define dh_is_64bit_tr 0
160
#endif
161
#define dh_ctype_tr target_ureg
162
-#define dh_is_signed_tr 0
163
164
DEF_HELPER_2(excp, noreturn, env, int)
165
DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tr)
166
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
167
index XXXXXXX..XXXXXXX 100644
168
--- a/target/i386/ops_sse_header.h
169
+++ b/target/i386/ops_sse_header.h
170
@@ -XXX,XX +XXX,XX @@
171
#define dh_ctype_Reg Reg *
172
#define dh_ctype_ZMMReg ZMMReg *
173
#define dh_ctype_MMXReg MMXReg *
174
-#define dh_is_signed_Reg dh_is_signed_ptr
175
-#define dh_is_signed_ZMMReg dh_is_signed_ptr
176
-#define dh_is_signed_MMXReg dh_is_signed_ptr
177
178
DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg)
179
DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg)
180
diff --git a/target/m68k/helper.h b/target/m68k/helper.h
181
index XXXXXXX..XXXXXXX 100644
182
--- a/target/m68k/helper.h
183
+++ b/target/m68k/helper.h
184
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(cas2l_parallel, void, env, i32, i32, i32)
185
186
#define dh_alias_fp ptr
187
#define dh_ctype_fp FPReg *
188
-#define dh_is_signed_fp dh_is_signed_ptr
189
190
DEF_HELPER_3(exts32, void, env, fp, s32)
191
DEF_HELPER_3(extf32, void, env, fp, f32)
192
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
9
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
193
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
194
--- a/target/ppc/helper.h
11
--- a/target/ppc/helper.h
195
+++ b/target/ppc/helper.h
12
+++ b/target/ppc/helper.h
196
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
13
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
197
14
DEF_HELPER_1(tbegin, void, env)
198
#define dh_alias_avr ptr
15
DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
199
#define dh_ctype_avr ppc_avr_t *
16
200
-#define dh_is_signed_avr dh_is_signed_ptr
17
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
201
18
+#ifdef TARGET_PPC64
202
#define dh_alias_vsr ptr
19
DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
203
#define dh_ctype_vsr ppc_vsr_t *
20
DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
204
-#define dh_is_signed_vsr dh_is_signed_ptr
21
DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
205
22
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
206
DEF_HELPER_3(vavgub, void, avr, avr, avr)
207
DEF_HELPER_3(vavguh, void, avr, avr, avr)
208
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(store_601_batu, void, env, i32, tl)
209
210
#define dh_alias_fprp ptr
211
#define dh_ctype_fprp ppc_fprp_t *
212
-#define dh_is_signed_fprp dh_is_signed_ptr
213
214
DEF_HELPER_4(dadd, void, env, fprp, fprp, fprp)
215
DEF_HELPER_4(daddq, void, env, fprp, fprp, fprp)
216
diff --git a/tcg/tcg.c b/tcg/tcg.c
217
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
218
--- a/tcg/tcg.c
24
--- a/target/ppc/mem_helper.c
219
+++ b/tcg/tcg.c
25
+++ b/target/ppc/mem_helper.c
220
@@ -XXX,XX +XXX,XX @@ typedef struct TCGHelperInfo {
26
@@ -XXX,XX +XXX,XX @@
221
void *func;
27
#include "exec/cpu_ldst.h"
222
const char *name;
28
#include "tcg.h"
223
unsigned flags;
29
#include "internal.h"
224
- unsigned sizemask;
30
+#include "qemu/atomic128.h"
225
+ unsigned typemask;
31
226
} TCGHelperInfo;
32
//#define DEBUG_OP
227
33
228
#include "exec/helper-proto.h"
34
@@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
229
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
35
return i;
230
void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
36
}
231
{
37
232
int i, real_args, nb_rets, pi;
38
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
233
- unsigned sizemask, flags;
39
+#ifdef TARGET_PPC64
234
+ unsigned typemask, flags;
40
uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
235
TCGHelperInfo *info;
41
uint32_t opidx)
236
TCGOp *op;
42
{
237
43
- Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
238
info = g_hash_table_lookup(helper_table, (gpointer)func);
44
+ Int128 ret;
239
flags = info->flags;
45
+
240
- sizemask = info->sizemask;
46
+ /* We will have raised EXCP_ATOMIC from the translator. */
241
+ typemask = info->typemask;
47
+ assert(HAVE_ATOMIC128);
242
48
+ ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
243
#ifdef CONFIG_PLUGIN
49
env->retxh = int128_gethi(ret);
244
/* detect non-plugin helpers */
50
return int128_getlo(ret);
245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
51
}
246
&& !defined(CONFIG_TCG_INTERPRETER)
52
@@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
247
/* We have 64-bit values in one register, but need to pass as two
53
uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
248
separate parameters. Split them. */
54
uint32_t opidx)
249
- int orig_sizemask = sizemask;
55
{
250
+ int orig_typemask = typemask;
56
- Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
251
int orig_nargs = nargs;
57
+ Int128 ret;
252
TCGv_i64 retl, reth;
58
+
253
TCGTemp *split_args[MAX_OPC_PARAM];
59
+ /* We will have raised EXCP_ATOMIC from the translator. */
254
60
+ assert(HAVE_ATOMIC128);
255
retl = NULL;
61
+ ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
256
reth = NULL;
62
env->retxh = int128_gethi(ret);
257
- if (sizemask != 0) {
63
return int128_getlo(ret);
258
- for (i = real_args = 0; i < nargs; ++i) {
64
}
259
- int is_64bit = sizemask & (1 << (i+1)*2);
65
@@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
260
- if (is_64bit) {
66
void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
261
- TCGv_i64 orig = temp_tcgv_i64(args[i]);
67
uint64_t lo, uint64_t hi, uint32_t opidx)
262
- TCGv_i32 h = tcg_temp_new_i32();
68
{
263
- TCGv_i32 l = tcg_temp_new_i32();
69
- Int128 val = int128_make128(lo, hi);
264
- tcg_gen_extr_i64_i32(l, h, orig);
70
+ Int128 val;
265
- split_args[real_args++] = tcgv_i32_temp(h);
71
+
266
- split_args[real_args++] = tcgv_i32_temp(l);
72
+ /* We will have raised EXCP_ATOMIC from the translator. */
267
- } else {
73
+ assert(HAVE_ATOMIC128);
268
- split_args[real_args++] = args[i];
74
+ val = int128_make128(lo, hi);
269
- }
75
helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
270
+ typemask = 0;
76
}
271
+ for (i = real_args = 0; i < nargs; ++i) {
77
272
+ int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
78
void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
273
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
79
uint64_t lo, uint64_t hi, uint32_t opidx)
274
+
80
{
275
+ if (is_64bit) {
81
- Int128 val = int128_make128(lo, hi);
276
+ TCGv_i64 orig = temp_tcgv_i64(args[i]);
82
+ Int128 val;
277
+ TCGv_i32 h = tcg_temp_new_i32();
83
+
278
+ TCGv_i32 l = tcg_temp_new_i32();
84
+ /* We will have raised EXCP_ATOMIC from the translator. */
279
+ tcg_gen_extr_i64_i32(l, h, orig);
85
+ assert(HAVE_ATOMIC128);
280
+ split_args[real_args++] = tcgv_i32_temp(h);
86
+ val = int128_make128(lo, hi);
281
+ typemask |= dh_typecode_i32 << (real_args * 3);
87
helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
282
+ split_args[real_args++] = tcgv_i32_temp(l);
88
}
283
+ typemask |= dh_typecode_i32 << (real_args * 3);
89
284
+ } else {
90
@@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
285
+ split_args[real_args++] = args[i];
91
{
286
+ typemask |= argtype << (real_args * 3);
92
bool success = false;
93
94
+ /* We will have raised EXCP_ATOMIC from the translator. */
95
+ assert(HAVE_CMPXCHG128);
96
+
97
if (likely(addr == env->reserve_addr)) {
98
Int128 oldv, cmpv, newv;
99
100
@@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
101
{
102
bool success = false;
103
104
+ /* We will have raised EXCP_ATOMIC from the translator. */
105
+ assert(HAVE_CMPXCHG128);
106
+
107
if (likely(addr == env->reserve_addr)) {
108
Int128 oldv, cmpv, newv;
109
110
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/ppc/translate.c
113
+++ b/target/ppc/translate.c
114
@@ -XXX,XX +XXX,XX @@
115
#include "trace-tcg.h"
116
#include "exec/translator.h"
117
#include "exec/log.h"
118
+#include "qemu/atomic128.h"
119
120
121
#define CPU_SINGLE_STEP 0x1
122
@@ -XXX,XX +XXX,XX @@ static void gen_lq(DisasContext *ctx)
123
hi = cpu_gpr[rd];
124
125
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
126
-#ifdef CONFIG_ATOMIC128
127
- TCGv_i32 oi = tcg_temp_new_i32();
128
- if (ctx->le_mode) {
129
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
130
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
131
+ if (HAVE_ATOMIC128) {
132
+ TCGv_i32 oi = tcg_temp_new_i32();
133
+ if (ctx->le_mode) {
134
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
135
+ gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
136
+ } else {
137
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
138
+ gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
139
+ }
140
+ tcg_temp_free_i32(oi);
141
+ tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
142
} else {
143
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
144
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
145
+ /* Restart with exclusive lock. */
146
+ gen_helper_exit_atomic(cpu_env);
147
+ ctx->base.is_jmp = DISAS_NORETURN;
287
}
148
}
288
- nargs = real_args;
149
- tcg_temp_free_i32(oi);
289
- args = split_args;
150
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
290
- sizemask = 0;
151
-#else
291
}
152
- /* Restart with exclusive lock. */
292
+ nargs = real_args;
153
- gen_helper_exit_atomic(cpu_env);
293
+ args = split_args;
154
- ctx->base.is_jmp = DISAS_NORETURN;
294
#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
155
-#endif
295
for (i = 0; i < nargs; ++i) {
156
} else if (ctx->le_mode) {
296
- int is_64bit = sizemask & (1 << (i+1)*2);
157
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
297
- int is_signed = sizemask & (2 << (i+1)*2);
158
gen_addr_add(ctx, EA, EA, 8);
298
- if (!is_64bit) {
159
@@ -XXX,XX +XXX,XX @@ static void gen_std(DisasContext *ctx)
299
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
160
hi = cpu_gpr[rs];
300
+ bool is_32bit = (argtype & ~1) == dh_typecode_i32;
161
301
+ bool is_signed = argtype & 1;
162
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
302
+
163
-#ifdef CONFIG_ATOMIC128
303
+ if (is_32bit) {
164
- TCGv_i32 oi = tcg_temp_new_i32();
304
TCGv_i64 temp = tcg_temp_new_i64();
165
- if (ctx->le_mode) {
305
TCGv_i64 orig = temp_tcgv_i64(args[i]);
166
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
306
if (is_signed) {
167
- gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
307
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
168
+ if (HAVE_ATOMIC128) {
308
if (ret != NULL) {
169
+ TCGv_i32 oi = tcg_temp_new_i32();
309
#if defined(__sparc__) && !defined(__arch64__) \
170
+ if (ctx->le_mode) {
310
&& !defined(CONFIG_TCG_INTERPRETER)
171
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
311
- if (orig_sizemask & 1) {
172
+ gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
312
+ if ((typemask & 6) == dh_typecode_i64) {
173
+ } else {
313
/* The 32-bit ABI is going to return the 64-bit value in
174
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
314
the %o0/%o1 register pair. Prepare for this by using
175
+ gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
315
two return temporaries, and reassemble below. */
176
+ }
316
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
177
+ tcg_temp_free_i32(oi);
317
nb_rets = 1;
178
} else {
179
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
180
- gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
181
+ /* Restart with exclusive lock. */
182
+ gen_helper_exit_atomic(cpu_env);
183
+ ctx->base.is_jmp = DISAS_NORETURN;
184
}
185
- tcg_temp_free_i32(oi);
186
-#else
187
- /* Restart with exclusive lock. */
188
- gen_helper_exit_atomic(cpu_env);
189
- ctx->base.is_jmp = DISAS_NORETURN;
190
-#endif
191
} else if (ctx->le_mode) {
192
tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
193
gen_addr_add(ctx, EA, EA, 8);
194
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
195
hi = cpu_gpr[rd];
196
197
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
198
-#ifdef CONFIG_ATOMIC128
199
- TCGv_i32 oi = tcg_temp_new_i32();
200
- if (ctx->le_mode) {
201
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
202
- ctx->mem_idx));
203
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
204
+ if (HAVE_ATOMIC128) {
205
+ TCGv_i32 oi = tcg_temp_new_i32();
206
+ if (ctx->le_mode) {
207
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
208
+ ctx->mem_idx));
209
+ gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
210
+ } else {
211
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
212
+ ctx->mem_idx));
213
+ gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
214
+ }
215
+ tcg_temp_free_i32(oi);
216
+ tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
217
} else {
218
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
219
- ctx->mem_idx));
220
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
221
+ /* Restart with exclusive lock. */
222
+ gen_helper_exit_atomic(cpu_env);
223
+ ctx->base.is_jmp = DISAS_NORETURN;
224
+ tcg_temp_free(EA);
225
+ return;
318
}
226
}
319
#else
227
- tcg_temp_free_i32(oi);
320
- if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
228
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
321
+ if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
229
-#else
322
#ifdef HOST_WORDS_BIGENDIAN
230
- /* Restart with exclusive lock. */
323
op->args[pi++] = temp_arg(ret + 1);
231
- gen_helper_exit_atomic(cpu_env);
324
op->args[pi++] = temp_arg(ret);
232
- ctx->base.is_jmp = DISAS_NORETURN;
325
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
233
- tcg_temp_free(EA);
326
234
- return;
327
real_args = 0;
235
-#endif
328
for (i = 0; i < nargs; i++) {
236
} else if (ctx->le_mode) {
329
- int is_64bit = sizemask & (1 << (i+1)*2);
237
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
330
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
238
tcg_gen_mov_tl(cpu_reserve, EA);
331
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
239
@@ -XXX,XX +XXX,XX @@ static void gen_stqcx_(DisasContext *ctx)
332
+
240
hi = cpu_gpr[rs];
333
if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
241
334
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
242
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
335
/* some targets want aligned 64 bit args */
243
- TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
336
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
244
-#ifdef CONFIG_ATOMIC128
337
&& !defined(CONFIG_TCG_INTERPRETER)
245
- if (ctx->le_mode) {
338
/* Free all of the parts we allocated above. */
246
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
339
for (i = real_args = 0; i < orig_nargs; ++i) {
247
+ if (HAVE_CMPXCHG128) {
340
- int is_64bit = orig_sizemask & (1 << (i+1)*2);
248
+ TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
341
+ int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
249
+ if (ctx->le_mode) {
342
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
250
+ gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
343
+
251
+ EA, lo, hi, oi);
344
if (is_64bit) {
252
+ } else {
345
tcg_temp_free_internal(args[real_args++]);
253
+ gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
346
tcg_temp_free_internal(args[real_args++]);
254
+ EA, lo, hi, oi);
347
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
255
+ }
348
real_args++;
256
+ tcg_temp_free_i32(oi);
257
} else {
258
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
259
+ /* Restart with exclusive lock. */
260
+ gen_helper_exit_atomic(cpu_env);
261
+ ctx->base.is_jmp = DISAS_NORETURN;
349
}
262
}
350
}
263
-#else
351
- if (orig_sizemask & 1) {
264
- /* Restart with exclusive lock. */
352
+ if ((orig_typemask & 6) == dh_typecode_i64) {
265
- gen_helper_exit_atomic(cpu_env);
353
/* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
266
- ctx->base.is_jmp = DISAS_NORETURN;
354
Note that describing these as TCGv_i64 eliminates an unnecessary
267
-#endif
355
zero-extension that tcg_gen_concat_i32_i64 would create. */
268
tcg_temp_free(EA);
356
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
269
- tcg_temp_free_i32(oi);
357
}
270
} else {
358
#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
271
TCGLabel *lab_fail = gen_new_label();
359
for (i = 0; i < nargs; ++i) {
272
TCGLabel *lab_over = gen_new_label();
360
- int is_64bit = sizemask & (1 << (i+1)*2);
361
- if (!is_64bit) {
362
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
363
+ bool is_32bit = (argtype & ~1) == dh_typecode_i32;
364
+
365
+ if (is_32bit) {
366
tcg_temp_free_internal(args[i]);
367
}
368
}
369
--
273
--
370
2.25.1
274
2.17.2
371
275
372
276
diff view generated by jsdifflib
Deleted patch
1
We're going to change how to look up the call flags from a TCGop,
2
so extract it as a helper.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tcg-internal.h | 5 +++++
9
tcg/optimize.c | 3 ++-
10
tcg/tcg.c | 14 ++++++--------
11
3 files changed, 13 insertions(+), 9 deletions(-)
12
13
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tcg-internal.h
16
+++ b/tcg/tcg-internal.h
17
@@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s);
18
void tcg_region_initial_alloc(TCGContext *s);
19
void tcg_region_prologue_set(TCGContext *s);
20
21
+static inline unsigned tcg_call_flags(TCGOp *op)
22
+{
23
+ return op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
24
+}
25
+
26
#endif /* TCG_INTERNAL_H */
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/optimize.c
30
+++ b/tcg/optimize.c
31
@@ -XXX,XX +XXX,XX @@
32
33
#include "qemu/osdep.h"
34
#include "tcg/tcg-op.h"
35
+#include "tcg-internal.h"
36
37
#define CASE_OP_32_64(x) \
38
glue(glue(case INDEX_op_, x), _i32): \
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
break;
41
42
case INDEX_op_call:
43
- if (!(op->args[nb_oargs + nb_iargs + 1]
44
+ if (!(tcg_call_flags(op)
45
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
46
for (i = 0; i < nb_globals; i++) {
47
if (test_bit(i, temps_used.l)) {
48
diff --git a/tcg/tcg.c b/tcg/tcg.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/tcg.c
51
+++ b/tcg/tcg.c
52
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
53
nb_cargs = def->nb_cargs;
54
55
/* function name, flags, out args */
56
- col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
57
+ col += qemu_log(" %s %s,$0x%x,$%d", def->name,
58
tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
59
- op->args[nb_oargs + nb_iargs + 1], nb_oargs);
60
+ tcg_call_flags(op), nb_oargs);
61
for (i = 0; i < nb_oargs; i++) {
62
col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
63
op->args[i]));
64
@@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s)
65
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
66
bool remove = dead;
67
TCGLabel *label;
68
- int call_flags;
69
70
switch (op->opc) {
71
case INDEX_op_set_label:
72
@@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s)
73
74
case INDEX_op_call:
75
/* Notice noreturn helper calls, raising exceptions. */
76
- call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
77
- if (call_flags & TCG_CALL_NO_RETURN) {
78
+ if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
79
dead = true;
80
}
81
break;
82
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
83
84
nb_oargs = TCGOP_CALLO(op);
85
nb_iargs = TCGOP_CALLI(op);
86
- call_flags = op->args[nb_oargs + nb_iargs + 1];
87
+ call_flags = tcg_call_flags(op);
88
89
/* pure functions can be removed if their result is unused */
90
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
91
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
92
if (opc == INDEX_op_call) {
93
nb_oargs = TCGOP_CALLO(op);
94
nb_iargs = TCGOP_CALLI(op);
95
- call_flags = op->args[nb_oargs + nb_iargs + 1];
96
+ call_flags = tcg_call_flags(op);
97
} else {
98
nb_iargs = def->nb_iargs;
99
nb_oargs = def->nb_oargs;
100
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
101
TCGRegSet allocated_regs;
102
103
func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
104
- flags = op->args[nb_oargs + nb_iargs + 1];
105
+ flags = tcg_call_flags(op);
106
107
nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
108
if (nb_regs > nb_iargs) {
109
--
110
2.25.1
111
112
diff view generated by jsdifflib
Deleted patch
1
Let the compiler decide on inlining.
2
1
3
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/plugin-gen.c | 12 +++++-------
8
1 file changed, 5 insertions(+), 7 deletions(-)
9
10
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/plugin-gen.c
13
+++ b/accel/tcg/plugin-gen.c
14
@@ -XXX,XX +XXX,XX @@ static void gen_empty_mem_helper(void)
15
tcg_temp_free_ptr(ptr);
16
}
17
18
-static inline
19
-void gen_plugin_cb_start(enum plugin_gen_from from,
20
- enum plugin_gen_cb type, unsigned wr)
21
+static void gen_plugin_cb_start(enum plugin_gen_from from,
22
+ enum plugin_gen_cb type, unsigned wr)
23
{
24
TCGOp *op;
25
26
@@ -XXX,XX +XXX,XX @@ static void gen_wrapped(enum plugin_gen_from from,
27
tcg_gen_plugin_cb_end();
28
}
29
30
-static inline void plugin_gen_empty_callback(enum plugin_gen_from from)
31
+static void plugin_gen_empty_callback(enum plugin_gen_from from)
32
{
33
switch (from) {
34
case PLUGIN_GEN_AFTER_INSN:
35
@@ -XXX,XX +XXX,XX @@ static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb)
36
return !!(cb->rw & (w + 1));
37
}
38
39
-static inline
40
-void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject,
41
- op_ok_fn ok)
42
+static void inject_cb_type(const GArray *cbs, TCGOp *begin_op,
43
+ inject_fn inject, op_ok_fn ok)
44
{
45
TCGOp *end_op;
46
TCGOp *op;
47
--
48
2.25.1
49
50
diff view generated by jsdifflib
1
This removes all of the problems with unaligned accesses
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
to the bytecode stream.
3
4
With an 8-bit opcode at the bottom, we have 24 bits remaining,
5
which are generally split into 6 4-bit slots. This fits well
6
with the maximum length opcodes, e.g. INDEX_op_add2_i32, which
7
have 6 register operands.
8
9
We have, in previous patches, rearranged things such that there
10
are no operations with a label which have more than one other
11
operand. Which leaves us with a 20-bit field in which to encode
12
a label, giving us a maximum TB size of 512k -- easily large.
13
14
Change the INDEX_op_tci_movi_{i32,i64} opcodes to tci_mov[il].
15
The former puts the immediate in the upper 20 bits of the insn,
16
like we do for the label displacement. The later uses a label
17
to reference an entry in the constant pool. Thus, in the worst
18
case we still have a single memory reference for any constant,
19
but now the constants are out-of-line of the bytecode and can
20
be shared between different moves saving space.
21
22
Change INDEX_op_call to use a label to reference a pair of
23
pointers in the constant pool. This removes the only slightly
24
dodgy link with the layout of struct TCGHelperInfo.
25
26
The re-encode cannot be done in pieces.
27
28
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
29
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
30
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
31
---
3
---
32
include/tcg/tcg-opc.h | 4 +-
4
target/s390x/mem_helper.c | 92 +++++++++++++++++----------------------
33
tcg/tci/tcg-target.h | 3 +-
5
1 file changed, 41 insertions(+), 51 deletions(-)
34
tcg/tci.c | 539 +++++++++++++++------------------------
35
tcg/tci/tcg-target.c.inc | 379 ++++++++++++---------------
36
tcg/tci/README | 20 +-
37
5 files changed, 383 insertions(+), 562 deletions(-)
38
6
39
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
7
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
40
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
41
--- a/include/tcg/tcg-opc.h
9
--- a/target/s390x/mem_helper.c
42
+++ b/include/tcg/tcg-opc.h
10
+++ b/target/s390x/mem_helper.c
43
@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
11
@@ -XXX,XX +XXX,XX @@
44
12
#include "exec/exec-all.h"
45
#ifdef TCG_TARGET_INTERPRETER
13
#include "exec/cpu_ldst.h"
46
/* These opcodes are only for use between the tci generator and interpreter. */
14
#include "qemu/int128.h"
47
-DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
15
+#include "qemu/atomic128.h"
48
-DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
16
49
+DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
17
#if !defined(CONFIG_USER_ONLY)
50
+DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
18
#include "hw/s390x/storage-keys.h"
19
@@ -XXX,XX +XXX,XX @@ static void do_cdsg(CPUS390XState *env, uint64_t addr,
20
bool fail;
21
22
if (parallel) {
23
-#ifndef CONFIG_ATOMIC128
24
+#if !HAVE_CMPXCHG128
25
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
26
#else
27
int mem_idx = cpu_mmu_index(env, false);
28
@@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
29
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
30
uint64_t a2, bool parallel)
31
{
32
-#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
33
uint32_t mem_idx = cpu_mmu_index(env, false);
34
-#endif
35
uintptr_t ra = GETPC();
36
uint32_t fc = extract32(env->regs[0], 0, 8);
37
uint32_t sc = extract32(env->regs[0], 8, 8);
38
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
39
probe_write(env, a2, 0, mem_idx, ra);
51
#endif
40
#endif
52
41
53
#undef TLADDR_ARGS
42
- /* Note that the compare-and-swap is atomic, and the store is atomic, but
54
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
43
- the complete operation is not. Therefore we do not need to assert serial
55
index XXXXXXX..XXXXXXX 100644
44
- context in order to implement this. That said, restart early if we can't
56
--- a/tcg/tci/tcg-target.h
45
- support either operation that is supposed to be atomic. */
57
+++ b/tcg/tci/tcg-target.h
46
+ /*
58
@@ -XXX,XX +XXX,XX @@
47
+ * Note that the compare-and-swap is atomic, and the store is atomic,
59
#define TCG_TARGET_H
48
+ * but the complete operation is not. Therefore we do not need to
60
49
+ * assert serial context in order to implement this. That said,
61
#define TCG_TARGET_INTERPRETER 1
50
+ * restart early if we can't support either operation that is supposed
62
-#define TCG_TARGET_INSN_UNIT_SIZE 1
51
+ * to be atomic.
63
+#define TCG_TARGET_INSN_UNIT_SIZE 4
52
+ */
64
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
53
if (parallel) {
65
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
54
- int mask = 0;
66
55
-#if !defined(CONFIG_ATOMIC64)
67
@@ -XXX,XX +XXX,XX @@ typedef enum {
56
- mask = -8;
68
#define TCG_TARGET_STACK_ALIGN 8
57
-#elif !defined(CONFIG_ATOMIC128)
69
58
- mask = -16;
70
#define HAVE_TCG_QEMU_TB_EXEC
59
+ uint32_t max = 2;
71
+#define TCG_TARGET_NEED_POOL_LABELS
60
+#ifdef CONFIG_ATOMIC64
72
61
+ max = 3;
73
/* We could notice __i386__ or __s390x__ and reduce the barriers depending
62
#endif
74
on the host. But if you want performance, you use the normal backend.
63
- if (((4 << fc) | (1 << sc)) & mask) {
75
diff --git a/tcg/tci.c b/tcg/tci.c
64
+ if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
76
index XXXXXXX..XXXXXXX 100644
65
+ (HAVE_ATOMIC128 ? 0 : sc > max)) {
77
--- a/tcg/tci.c
66
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
78
+++ b/tcg/tci.c
67
}
79
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_uint64(uint32_t high, uint32_t low)
68
}
80
return ((uint64_t)high << 32) + low;
69
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
81
}
70
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
82
71
Int128 ov;
83
-/* Read constant byte from bytecode. */
72
84
-static uint8_t tci_read_b(const uint8_t **tb_ptr)
73
- if (parallel) {
85
-{
74
-#ifdef CONFIG_ATOMIC128
86
- return *(tb_ptr[0]++);
75
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
87
-}
76
- ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
88
-
77
- cc = !int128_eq(ov, cv);
89
-/* Read register number from bytecode. */
78
-#else
90
-static TCGReg tci_read_r(const uint8_t **tb_ptr)
79
- /* Note that we asserted !parallel above. */
91
-{
80
- g_assert_not_reached();
92
- uint8_t regno = tci_read_b(tb_ptr);
93
- tci_assert(regno < TCG_TARGET_NB_REGS);
94
- return regno;
95
-}
96
-
97
-/* Read constant (native size) from bytecode. */
98
-static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr)
99
-{
100
- tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr);
101
- *tb_ptr += sizeof(value);
102
- return value;
103
-}
104
-
105
-/* Read unsigned constant (32 bit) from bytecode. */
106
-static uint32_t tci_read_i32(const uint8_t **tb_ptr)
107
-{
108
- uint32_t value = *(const uint32_t *)(*tb_ptr);
109
- *tb_ptr += sizeof(value);
110
- return value;
111
-}
112
-
113
-/* Read signed constant (32 bit) from bytecode. */
114
-static int32_t tci_read_s32(const uint8_t **tb_ptr)
115
-{
116
- int32_t value = *(const int32_t *)(*tb_ptr);
117
- *tb_ptr += sizeof(value);
118
- return value;
119
-}
120
-
121
-static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
122
-{
123
- return tci_read_i(tb_ptr);
124
-}
125
-
126
/*
127
* Load sets of arguments all at once. The naming convention is:
128
* tci_args_<arguments>
129
@@ -XXX,XX +XXX,XX @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
130
* s = signed ldst offset
131
*/
132
133
-static void check_size(const uint8_t *start, const uint8_t **tb_ptr)
134
+static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0)
135
{
136
- const uint8_t *old_code_ptr = start - 2;
137
- uint8_t op_size = old_code_ptr[1];
138
- tci_assert(*tb_ptr == old_code_ptr + op_size);
139
+ int diff = sextract32(insn, 12, 20);
140
+ *l0 = diff ? (void *)tb_ptr + diff : NULL;
141
}
142
143
-static void tci_args_l(const uint8_t **tb_ptr, void **l0)
144
+static void tci_args_nl(uint32_t insn, const void *tb_ptr,
145
+ uint8_t *n0, void **l1)
146
{
147
- const uint8_t *start = *tb_ptr;
148
-
149
- *l0 = (void *)tci_read_label(tb_ptr);
150
-
151
- check_size(start, tb_ptr);
152
+ *n0 = extract32(insn, 8, 4);
153
+ *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
154
}
155
156
-static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0,
157
- void **l1, void **l2)
158
+static void tci_args_rl(uint32_t insn, const void *tb_ptr,
159
+ TCGReg *r0, void **l1)
160
{
161
- const uint8_t *start = *tb_ptr;
162
-
163
- *n0 = tci_read_b(tb_ptr);
164
- *l1 = (void *)tci_read_label(tb_ptr);
165
- *l2 = (void *)tci_read_label(tb_ptr);
166
-
167
- check_size(start, tb_ptr);
168
+ *r0 = extract32(insn, 8, 4);
169
+ *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
170
}
171
172
-static void tci_args_rl(const uint8_t **tb_ptr, TCGReg *r0, void **l1)
173
+static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1)
174
{
175
- const uint8_t *start = *tb_ptr;
176
-
177
- *r0 = tci_read_r(tb_ptr);
178
- *l1 = (void *)tci_read_label(tb_ptr);
179
-
180
- check_size(start, tb_ptr);
181
+ *r0 = extract32(insn, 8, 4);
182
+ *r1 = extract32(insn, 12, 4);
183
}
184
185
-static void tci_args_rr(const uint8_t **tb_ptr,
186
- TCGReg *r0, TCGReg *r1)
187
+static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1)
188
{
189
- const uint8_t *start = *tb_ptr;
190
-
191
- *r0 = tci_read_r(tb_ptr);
192
- *r1 = tci_read_r(tb_ptr);
193
-
194
- check_size(start, tb_ptr);
195
+ *r0 = extract32(insn, 8, 4);
196
+ *i1 = sextract32(insn, 12, 20);
197
}
198
199
-static void tci_args_ri(const uint8_t **tb_ptr,
200
- TCGReg *r0, tcg_target_ulong *i1)
201
+static void tci_args_rrm(uint32_t insn, TCGReg *r0,
202
+ TCGReg *r1, TCGMemOpIdx *m2)
203
{
204
- const uint8_t *start = *tb_ptr;
205
-
206
- *r0 = tci_read_r(tb_ptr);
207
- *i1 = tci_read_i32(tb_ptr);
208
-
209
- check_size(start, tb_ptr);
210
+ *r0 = extract32(insn, 8, 4);
211
+ *r1 = extract32(insn, 12, 4);
212
+ *m2 = extract32(insn, 20, 12);
213
}
214
215
-#if TCG_TARGET_REG_BITS == 64
216
-static void tci_args_rI(const uint8_t **tb_ptr,
217
- TCGReg *r0, tcg_target_ulong *i1)
218
+static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
219
{
220
- const uint8_t *start = *tb_ptr;
221
-
222
- *r0 = tci_read_r(tb_ptr);
223
- *i1 = tci_read_i(tb_ptr);
224
-
225
- check_size(start, tb_ptr);
226
-}
227
-#endif
81
-#endif
228
-
82
- } else {
229
-static void tci_args_rrm(const uint8_t **tb_ptr,
83
+ if (!parallel) {
230
- TCGReg *r0, TCGReg *r1, TCGMemOpIdx *m2)
84
uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
231
-{
85
uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
232
- const uint8_t *start = *tb_ptr;
86
233
-
87
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
234
- *r0 = tci_read_r(tb_ptr);
88
235
- *r1 = tci_read_r(tb_ptr);
89
cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
236
- *m2 = tci_read_i32(tb_ptr);
90
cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
237
-
91
+ } else if (HAVE_CMPXCHG128) {
238
- check_size(start, tb_ptr);
92
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
239
+ *r0 = extract32(insn, 8, 4);
93
+ ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
240
+ *r1 = extract32(insn, 12, 4);
94
+ cc = !int128_eq(ov, cv);
241
+ *r2 = extract32(insn, 16, 4);
95
+ } else {
242
}
96
+ /* Note that we asserted !parallel above. */
243
97
+ g_assert_not_reached();
244
-static void tci_args_rrr(const uint8_t **tb_ptr,
245
- TCGReg *r0, TCGReg *r1, TCGReg *r2)
246
+static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2)
247
{
248
- const uint8_t *start = *tb_ptr;
249
-
250
- *r0 = tci_read_r(tb_ptr);
251
- *r1 = tci_read_r(tb_ptr);
252
- *r2 = tci_read_r(tb_ptr);
253
-
254
- check_size(start, tb_ptr);
255
+ *r0 = extract32(insn, 8, 4);
256
+ *r1 = extract32(insn, 12, 4);
257
+ *i2 = sextract32(insn, 16, 16);
258
}
259
260
-static void tci_args_rrs(const uint8_t **tb_ptr,
261
- TCGReg *r0, TCGReg *r1, int32_t *i2)
262
-{
263
- const uint8_t *start = *tb_ptr;
264
-
265
- *r0 = tci_read_r(tb_ptr);
266
- *r1 = tci_read_r(tb_ptr);
267
- *i2 = tci_read_s32(tb_ptr);
268
-
269
- check_size(start, tb_ptr);
270
-}
271
-
272
-static void tci_args_rrrc(const uint8_t **tb_ptr,
273
+static void tci_args_rrrc(uint32_t insn,
274
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
275
{
276
- const uint8_t *start = *tb_ptr;
277
-
278
- *r0 = tci_read_r(tb_ptr);
279
- *r1 = tci_read_r(tb_ptr);
280
- *r2 = tci_read_r(tb_ptr);
281
- *c3 = tci_read_b(tb_ptr);
282
-
283
- check_size(start, tb_ptr);
284
+ *r0 = extract32(insn, 8, 4);
285
+ *r1 = extract32(insn, 12, 4);
286
+ *r2 = extract32(insn, 16, 4);
287
+ *c3 = extract32(insn, 20, 4);
288
}
289
290
-static void tci_args_rrrm(const uint8_t **tb_ptr,
291
+static void tci_args_rrrm(uint32_t insn,
292
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGMemOpIdx *m3)
293
{
294
- const uint8_t *start = *tb_ptr;
295
-
296
- *r0 = tci_read_r(tb_ptr);
297
- *r1 = tci_read_r(tb_ptr);
298
- *r2 = tci_read_r(tb_ptr);
299
- *m3 = tci_read_i32(tb_ptr);
300
-
301
- check_size(start, tb_ptr);
302
+ *r0 = extract32(insn, 8, 4);
303
+ *r1 = extract32(insn, 12, 4);
304
+ *r2 = extract32(insn, 16, 4);
305
+ *m3 = extract32(insn, 20, 12);
306
}
307
308
-static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
309
+static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
310
TCGReg *r2, uint8_t *i3, uint8_t *i4)
311
{
312
- const uint8_t *start = *tb_ptr;
313
-
314
- *r0 = tci_read_r(tb_ptr);
315
- *r1 = tci_read_r(tb_ptr);
316
- *r2 = tci_read_r(tb_ptr);
317
- *i3 = tci_read_b(tb_ptr);
318
- *i4 = tci_read_b(tb_ptr);
319
-
320
- check_size(start, tb_ptr);
321
+ *r0 = extract32(insn, 8, 4);
322
+ *r1 = extract32(insn, 12, 4);
323
+ *r2 = extract32(insn, 16, 4);
324
+ *i3 = extract32(insn, 20, 6);
325
+ *i4 = extract32(insn, 26, 6);
326
}
327
328
-static void tci_args_rrrrm(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
329
- TCGReg *r2, TCGReg *r3, TCGMemOpIdx *m4)
330
+static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
331
+ TCGReg *r2, TCGReg *r3, TCGReg *r4)
332
{
333
- const uint8_t *start = *tb_ptr;
334
-
335
- *r0 = tci_read_r(tb_ptr);
336
- *r1 = tci_read_r(tb_ptr);
337
- *r2 = tci_read_r(tb_ptr);
338
- *r3 = tci_read_r(tb_ptr);
339
- *m4 = tci_read_i32(tb_ptr);
340
-
341
- check_size(start, tb_ptr);
342
+ *r0 = extract32(insn, 8, 4);
343
+ *r1 = extract32(insn, 12, 4);
344
+ *r2 = extract32(insn, 16, 4);
345
+ *r3 = extract32(insn, 20, 4);
346
+ *r4 = extract32(insn, 24, 4);
347
}
348
349
#if TCG_TARGET_REG_BITS == 32
350
-static void tci_args_rrrr(const uint8_t **tb_ptr,
351
+static void tci_args_rrrr(uint32_t insn,
352
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
353
{
354
- const uint8_t *start = *tb_ptr;
355
-
356
- *r0 = tci_read_r(tb_ptr);
357
- *r1 = tci_read_r(tb_ptr);
358
- *r2 = tci_read_r(tb_ptr);
359
- *r3 = tci_read_r(tb_ptr);
360
-
361
- check_size(start, tb_ptr);
362
+ *r0 = extract32(insn, 8, 4);
363
+ *r1 = extract32(insn, 12, 4);
364
+ *r2 = extract32(insn, 16, 4);
365
+ *r3 = extract32(insn, 20, 4);
366
}
367
368
-static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
369
+static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
370
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
371
{
372
- const uint8_t *start = *tb_ptr;
373
-
374
- *r0 = tci_read_r(tb_ptr);
375
- *r1 = tci_read_r(tb_ptr);
376
- *r2 = tci_read_r(tb_ptr);
377
- *r3 = tci_read_r(tb_ptr);
378
- *r4 = tci_read_r(tb_ptr);
379
- *c5 = tci_read_b(tb_ptr);
380
-
381
- check_size(start, tb_ptr);
382
+ *r0 = extract32(insn, 8, 4);
383
+ *r1 = extract32(insn, 12, 4);
384
+ *r2 = extract32(insn, 16, 4);
385
+ *r3 = extract32(insn, 20, 4);
386
+ *r4 = extract32(insn, 24, 4);
387
+ *c5 = extract32(insn, 28, 4);
388
}
389
390
-static void tci_args_rrrrrr(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
391
+static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
392
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
393
{
394
- const uint8_t *start = *tb_ptr;
395
-
396
- *r0 = tci_read_r(tb_ptr);
397
- *r1 = tci_read_r(tb_ptr);
398
- *r2 = tci_read_r(tb_ptr);
399
- *r3 = tci_read_r(tb_ptr);
400
- *r4 = tci_read_r(tb_ptr);
401
- *r5 = tci_read_r(tb_ptr);
402
-
403
- check_size(start, tb_ptr);
404
+ *r0 = extract32(insn, 8, 4);
405
+ *r1 = extract32(insn, 12, 4);
406
+ *r2 = extract32(insn, 16, 4);
407
+ *r3 = extract32(insn, 20, 4);
408
+ *r4 = extract32(insn, 24, 4);
409
+ *r5 = extract32(insn, 28, 4);
410
}
411
#endif
412
413
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
414
uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
415
const void *v_tb_ptr)
416
{
417
- const uint8_t *tb_ptr = v_tb_ptr;
418
+ const uint32_t *tb_ptr = v_tb_ptr;
419
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
420
uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
421
/ sizeof(uint64_t)];
422
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
423
tci_assert(tb_ptr);
424
425
for (;;) {
426
- TCGOpcode opc = tb_ptr[0];
427
- TCGReg r0, r1, r2, r3;
428
+ uint32_t insn;
429
+ TCGOpcode opc;
430
+ TCGReg r0, r1, r2, r3, r4;
431
tcg_target_ulong t1;
432
TCGCond condition;
433
target_ulong taddr;
434
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
435
uint32_t tmp32;
436
uint64_t tmp64;
437
#if TCG_TARGET_REG_BITS == 32
438
- TCGReg r4, r5;
439
+ TCGReg r5;
440
uint64_t T1, T2;
441
#endif
442
TCGMemOpIdx oi;
443
int32_t ofs;
444
- void *ptr, *cif;
445
+ void *ptr;
446
447
- /* Skip opcode and size entry. */
448
- tb_ptr += 2;
449
+ insn = *tb_ptr++;
450
+ opc = extract32(insn, 0, 8);
451
452
switch (opc) {
453
case INDEX_op_call:
454
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
455
}
456
}
98
}
457
99
458
- tci_args_nll(&tb_ptr, &len, &ptr, &cif);
100
env->regs[r3 + 0] = int128_gethi(ov);
459
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
101
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
460
102
cpu_stq_data_ra(env, a2, svh, ra);
461
/* Helper functions may need to access the "return address" */
462
tci_tb_ptr = (uintptr_t)tb_ptr;
463
464
- ffi_call(cif, ptr, stack, call_slots);
465
+ {
466
+ void **pptr = ptr;
467
+ ffi_call(pptr[1], pptr[0], stack, call_slots);
468
+ }
469
470
/* Any result winds up "left-aligned" in the stack[0] slot. */
471
switch (len) {
472
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
473
break;
103
break;
474
104
case 4:
475
case INDEX_op_br:
105
- if (parallel) {
476
- tci_args_l(&tb_ptr, &ptr);
106
-#ifdef CONFIG_ATOMIC128
477
+ tci_args_l(insn, tb_ptr, &ptr);
107
+ if (!parallel) {
478
tb_ptr = ptr;
108
+ cpu_stq_data_ra(env, a2 + 0, svh, ra);
479
continue;
109
+ cpu_stq_data_ra(env, a2 + 8, svl, ra);
480
case INDEX_op_setcond_i32:
110
+ } else if (HAVE_ATOMIC128) {
481
- tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition);
111
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
482
+ tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
112
Int128 sv = int128_make128(svl, svh);
483
regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
113
helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
484
break;
114
-#else
485
#if TCG_TARGET_REG_BITS == 32
115
+ } else {
486
case INDEX_op_setcond2_i32:
116
/* Note that we asserted !parallel above. */
487
- tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &condition);
117
g_assert_not_reached();
488
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
118
-#endif
489
T1 = tci_uint64(regs[r2], regs[r1]);
119
- } else {
490
T2 = tci_uint64(regs[r4], regs[r3]);
120
- cpu_stq_data_ra(env, a2 + 0, svh, ra);
491
regs[r0] = tci_compare64(T1, T2, condition);
121
- cpu_stq_data_ra(env, a2 + 8, svl, ra);
492
break;
493
#elif TCG_TARGET_REG_BITS == 64
494
case INDEX_op_setcond_i64:
495
- tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition);
496
+ tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
497
regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
498
break;
499
#endif
500
CASE_32_64(mov)
501
- tci_args_rr(&tb_ptr, &r0, &r1);
502
+ tci_args_rr(insn, &r0, &r1);
503
regs[r0] = regs[r1];
504
break;
505
- case INDEX_op_tci_movi_i32:
506
- tci_args_ri(&tb_ptr, &r0, &t1);
507
+ case INDEX_op_tci_movi:
508
+ tci_args_ri(insn, &r0, &t1);
509
regs[r0] = t1;
510
break;
511
+ case INDEX_op_tci_movl:
512
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
513
+ regs[r0] = *(tcg_target_ulong *)ptr;
514
+ break;
515
516
/* Load/store operations (32 bit). */
517
518
CASE_32_64(ld8u)
519
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
520
+ tci_args_rrs(insn, &r0, &r1, &ofs);
521
ptr = (void *)(regs[r1] + ofs);
522
regs[r0] = *(uint8_t *)ptr;
523
break;
524
CASE_32_64(ld8s)
525
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
526
+ tci_args_rrs(insn, &r0, &r1, &ofs);
527
ptr = (void *)(regs[r1] + ofs);
528
regs[r0] = *(int8_t *)ptr;
529
break;
530
CASE_32_64(ld16u)
531
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
532
+ tci_args_rrs(insn, &r0, &r1, &ofs);
533
ptr = (void *)(regs[r1] + ofs);
534
regs[r0] = *(uint16_t *)ptr;
535
break;
536
CASE_32_64(ld16s)
537
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
538
+ tci_args_rrs(insn, &r0, &r1, &ofs);
539
ptr = (void *)(regs[r1] + ofs);
540
regs[r0] = *(int16_t *)ptr;
541
break;
542
case INDEX_op_ld_i32:
543
CASE_64(ld32u)
544
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
545
+ tci_args_rrs(insn, &r0, &r1, &ofs);
546
ptr = (void *)(regs[r1] + ofs);
547
regs[r0] = *(uint32_t *)ptr;
548
break;
549
CASE_32_64(st8)
550
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
551
+ tci_args_rrs(insn, &r0, &r1, &ofs);
552
ptr = (void *)(regs[r1] + ofs);
553
*(uint8_t *)ptr = regs[r0];
554
break;
555
CASE_32_64(st16)
556
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
557
+ tci_args_rrs(insn, &r0, &r1, &ofs);
558
ptr = (void *)(regs[r1] + ofs);
559
*(uint16_t *)ptr = regs[r0];
560
break;
561
case INDEX_op_st_i32:
562
CASE_64(st32)
563
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
564
+ tci_args_rrs(insn, &r0, &r1, &ofs);
565
ptr = (void *)(regs[r1] + ofs);
566
*(uint32_t *)ptr = regs[r0];
567
break;
568
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
569
/* Arithmetic operations (mixed 32/64 bit). */
570
571
CASE_32_64(add)
572
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
573
+ tci_args_rrr(insn, &r0, &r1, &r2);
574
regs[r0] = regs[r1] + regs[r2];
575
break;
576
CASE_32_64(sub)
577
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
578
+ tci_args_rrr(insn, &r0, &r1, &r2);
579
regs[r0] = regs[r1] - regs[r2];
580
break;
581
CASE_32_64(mul)
582
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
583
+ tci_args_rrr(insn, &r0, &r1, &r2);
584
regs[r0] = regs[r1] * regs[r2];
585
break;
586
CASE_32_64(and)
587
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
588
+ tci_args_rrr(insn, &r0, &r1, &r2);
589
regs[r0] = regs[r1] & regs[r2];
590
break;
591
CASE_32_64(or)
592
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
593
+ tci_args_rrr(insn, &r0, &r1, &r2);
594
regs[r0] = regs[r1] | regs[r2];
595
break;
596
CASE_32_64(xor)
597
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
598
+ tci_args_rrr(insn, &r0, &r1, &r2);
599
regs[r0] = regs[r1] ^ regs[r2];
600
break;
601
602
/* Arithmetic operations (32 bit). */
603
604
case INDEX_op_div_i32:
605
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
606
+ tci_args_rrr(insn, &r0, &r1, &r2);
607
regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2];
608
break;
609
case INDEX_op_divu_i32:
610
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
611
+ tci_args_rrr(insn, &r0, &r1, &r2);
612
regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2];
613
break;
614
case INDEX_op_rem_i32:
615
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
616
+ tci_args_rrr(insn, &r0, &r1, &r2);
617
regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2];
618
break;
619
case INDEX_op_remu_i32:
620
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
621
+ tci_args_rrr(insn, &r0, &r1, &r2);
622
regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2];
623
break;
624
625
/* Shift/rotate operations (32 bit). */
626
627
case INDEX_op_shl_i32:
628
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
629
+ tci_args_rrr(insn, &r0, &r1, &r2);
630
regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31);
631
break;
632
case INDEX_op_shr_i32:
633
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
634
+ tci_args_rrr(insn, &r0, &r1, &r2);
635
regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31);
636
break;
637
case INDEX_op_sar_i32:
638
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
639
+ tci_args_rrr(insn, &r0, &r1, &r2);
640
regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31);
641
break;
642
#if TCG_TARGET_HAS_rot_i32
643
case INDEX_op_rotl_i32:
644
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
645
+ tci_args_rrr(insn, &r0, &r1, &r2);
646
regs[r0] = rol32(regs[r1], regs[r2] & 31);
647
break;
648
case INDEX_op_rotr_i32:
649
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
650
+ tci_args_rrr(insn, &r0, &r1, &r2);
651
regs[r0] = ror32(regs[r1], regs[r2] & 31);
652
break;
653
#endif
654
#if TCG_TARGET_HAS_deposit_i32
655
case INDEX_op_deposit_i32:
656
- tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
657
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
658
regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
659
break;
660
#endif
661
case INDEX_op_brcond_i32:
662
- tci_args_rl(&tb_ptr, &r0, &ptr);
663
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
664
if ((uint32_t)regs[r0]) {
665
tb_ptr = ptr;
666
}
122
}
667
break;
123
break;
668
#if TCG_TARGET_REG_BITS == 32
124
default:
669
case INDEX_op_add2_i32:
125
@@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
670
- tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
126
uintptr_t ra = GETPC();
671
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
127
uint64_t hi, lo;
672
T1 = tci_uint64(regs[r3], regs[r2]);
128
673
T2 = tci_uint64(regs[r5], regs[r4]);
129
- if (parallel) {
674
tci_write_reg64(regs, r1, r0, T1 + T2);
130
-#ifndef CONFIG_ATOMIC128
675
break;
131
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
676
case INDEX_op_sub2_i32:
132
-#else
677
- tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
133
+ if (!parallel) {
678
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
134
+ check_alignment(env, addr, 16, ra);
679
T1 = tci_uint64(regs[r3], regs[r2]);
135
+ hi = cpu_ldq_data_ra(env, addr + 0, ra);
680
T2 = tci_uint64(regs[r5], regs[r4]);
136
+ lo = cpu_ldq_data_ra(env, addr + 8, ra);
681
tci_write_reg64(regs, r1, r0, T1 - T2);
137
+ } else if (HAVE_ATOMIC128) {
682
break;
138
int mem_idx = cpu_mmu_index(env, false);
683
case INDEX_op_mulu2_i32:
139
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
684
- tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
140
Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
685
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
141
hi = int128_gethi(v);
686
tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]);
142
lo = int128_getlo(v);
687
break;
143
-#endif
688
#endif /* TCG_TARGET_REG_BITS == 32 */
144
} else {
689
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
145
- check_alignment(env, addr, 16, ra);
690
CASE_32_64(ext8s)
691
- tci_args_rr(&tb_ptr, &r0, &r1);
692
+ tci_args_rr(insn, &r0, &r1);
693
regs[r0] = (int8_t)regs[r1];
694
break;
695
#endif
696
#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
697
CASE_32_64(ext16s)
698
- tci_args_rr(&tb_ptr, &r0, &r1);
699
+ tci_args_rr(insn, &r0, &r1);
700
regs[r0] = (int16_t)regs[r1];
701
break;
702
#endif
703
#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
704
CASE_32_64(ext8u)
705
- tci_args_rr(&tb_ptr, &r0, &r1);
706
+ tci_args_rr(insn, &r0, &r1);
707
regs[r0] = (uint8_t)regs[r1];
708
break;
709
#endif
710
#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
711
CASE_32_64(ext16u)
712
- tci_args_rr(&tb_ptr, &r0, &r1);
713
+ tci_args_rr(insn, &r0, &r1);
714
regs[r0] = (uint16_t)regs[r1];
715
break;
716
#endif
717
#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
718
CASE_32_64(bswap16)
719
- tci_args_rr(&tb_ptr, &r0, &r1);
720
+ tci_args_rr(insn, &r0, &r1);
721
regs[r0] = bswap16(regs[r1]);
722
break;
723
#endif
724
#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
725
CASE_32_64(bswap32)
726
- tci_args_rr(&tb_ptr, &r0, &r1);
727
+ tci_args_rr(insn, &r0, &r1);
728
regs[r0] = bswap32(regs[r1]);
729
break;
730
#endif
731
#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
732
CASE_32_64(not)
733
- tci_args_rr(&tb_ptr, &r0, &r1);
734
+ tci_args_rr(insn, &r0, &r1);
735
regs[r0] = ~regs[r1];
736
break;
737
#endif
738
#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
739
CASE_32_64(neg)
740
- tci_args_rr(&tb_ptr, &r0, &r1);
741
+ tci_args_rr(insn, &r0, &r1);
742
regs[r0] = -regs[r1];
743
break;
744
#endif
745
#if TCG_TARGET_REG_BITS == 64
746
- case INDEX_op_tci_movi_i64:
747
- tci_args_rI(&tb_ptr, &r0, &t1);
748
- regs[r0] = t1;
749
- break;
750
-
146
-
751
/* Load/store operations (64 bit). */
147
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
752
148
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
753
case INDEX_op_ld32s_i64:
149
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
754
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
150
}
755
+ tci_args_rrs(insn, &r0, &r1, &ofs);
151
756
ptr = (void *)(regs[r1] + ofs);
152
env->retxl = lo;
757
regs[r0] = *(int32_t *)ptr;
153
@@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
758
break;
759
case INDEX_op_ld_i64:
760
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
761
+ tci_args_rrs(insn, &r0, &r1, &ofs);
762
ptr = (void *)(regs[r1] + ofs);
763
regs[r0] = *(uint64_t *)ptr;
764
break;
765
case INDEX_op_st_i64:
766
- tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
767
+ tci_args_rrs(insn, &r0, &r1, &ofs);
768
ptr = (void *)(regs[r1] + ofs);
769
*(uint64_t *)ptr = regs[r0];
770
break;
771
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
772
/* Arithmetic operations (64 bit). */
773
774
case INDEX_op_div_i64:
775
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
776
+ tci_args_rrr(insn, &r0, &r1, &r2);
777
regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2];
778
break;
779
case INDEX_op_divu_i64:
780
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
781
+ tci_args_rrr(insn, &r0, &r1, &r2);
782
regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2];
783
break;
784
case INDEX_op_rem_i64:
785
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
786
+ tci_args_rrr(insn, &r0, &r1, &r2);
787
regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2];
788
break;
789
case INDEX_op_remu_i64:
790
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
791
+ tci_args_rrr(insn, &r0, &r1, &r2);
792
regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2];
793
break;
794
795
/* Shift/rotate operations (64 bit). */
796
797
case INDEX_op_shl_i64:
798
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
799
+ tci_args_rrr(insn, &r0, &r1, &r2);
800
regs[r0] = regs[r1] << (regs[r2] & 63);
801
break;
802
case INDEX_op_shr_i64:
803
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
804
+ tci_args_rrr(insn, &r0, &r1, &r2);
805
regs[r0] = regs[r1] >> (regs[r2] & 63);
806
break;
807
case INDEX_op_sar_i64:
808
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
809
+ tci_args_rrr(insn, &r0, &r1, &r2);
810
regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63);
811
break;
812
#if TCG_TARGET_HAS_rot_i64
813
case INDEX_op_rotl_i64:
814
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
815
+ tci_args_rrr(insn, &r0, &r1, &r2);
816
regs[r0] = rol64(regs[r1], regs[r2] & 63);
817
break;
818
case INDEX_op_rotr_i64:
819
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
820
+ tci_args_rrr(insn, &r0, &r1, &r2);
821
regs[r0] = ror64(regs[r1], regs[r2] & 63);
822
break;
823
#endif
824
#if TCG_TARGET_HAS_deposit_i64
825
case INDEX_op_deposit_i64:
826
- tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
827
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
828
regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
829
break;
830
#endif
831
case INDEX_op_brcond_i64:
832
- tci_args_rl(&tb_ptr, &r0, &ptr);
833
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
834
if (regs[r0]) {
835
tb_ptr = ptr;
836
}
837
break;
838
case INDEX_op_ext32s_i64:
839
case INDEX_op_ext_i32_i64:
840
- tci_args_rr(&tb_ptr, &r0, &r1);
841
+ tci_args_rr(insn, &r0, &r1);
842
regs[r0] = (int32_t)regs[r1];
843
break;
844
case INDEX_op_ext32u_i64:
845
case INDEX_op_extu_i32_i64:
846
- tci_args_rr(&tb_ptr, &r0, &r1);
847
+ tci_args_rr(insn, &r0, &r1);
848
regs[r0] = (uint32_t)regs[r1];
849
break;
850
#if TCG_TARGET_HAS_bswap64_i64
851
case INDEX_op_bswap64_i64:
852
- tci_args_rr(&tb_ptr, &r0, &r1);
853
+ tci_args_rr(insn, &r0, &r1);
854
regs[r0] = bswap64(regs[r1]);
855
break;
856
#endif
857
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
858
/* QEMU specific operations. */
859
860
case INDEX_op_exit_tb:
861
- tci_args_l(&tb_ptr, &ptr);
862
+ tci_args_l(insn, tb_ptr, &ptr);
863
return (uintptr_t)ptr;
864
865
case INDEX_op_goto_tb:
866
- tci_args_l(&tb_ptr, &ptr);
867
+ tci_args_l(insn, tb_ptr, &ptr);
868
tb_ptr = *(void **)ptr;
869
break;
870
871
case INDEX_op_qemu_ld_i32:
872
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
873
- tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
874
+ tci_args_rrm(insn, &r0, &r1, &oi);
875
taddr = regs[r1];
876
} else {
877
- tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
878
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
879
taddr = tci_uint64(regs[r2], regs[r1]);
880
}
881
switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
882
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
883
884
case INDEX_op_qemu_ld_i64:
885
if (TCG_TARGET_REG_BITS == 64) {
886
- tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
887
+ tci_args_rrm(insn, &r0, &r1, &oi);
888
taddr = regs[r1];
889
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
890
- tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
891
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
892
taddr = regs[r2];
893
} else {
894
- tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
895
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
896
taddr = tci_uint64(regs[r3], regs[r2]);
897
+ oi = regs[r4];
898
}
899
switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
900
case MO_UB:
901
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
902
903
case INDEX_op_qemu_st_i32:
904
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
905
- tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
906
+ tci_args_rrm(insn, &r0, &r1, &oi);
907
taddr = regs[r1];
908
} else {
909
- tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
910
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
911
taddr = tci_uint64(regs[r2], regs[r1]);
912
}
913
tmp32 = regs[r0];
914
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
915
916
case INDEX_op_qemu_st_i64:
917
if (TCG_TARGET_REG_BITS == 64) {
918
- tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
919
+ tci_args_rrm(insn, &r0, &r1, &oi);
920
taddr = regs[r1];
921
tmp64 = regs[r0];
922
} else {
923
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
924
- tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
925
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
926
taddr = regs[r2];
927
} else {
928
- tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
929
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
930
taddr = tci_uint64(regs[r3], regs[r2]);
931
+ oi = regs[r4];
932
}
933
tmp64 = tci_uint64(regs[r1], regs[r0]);
934
}
935
@@ -XXX,XX +XXX,XX @@ static const char *str_c(TCGCond c)
936
/* Disassemble TCI bytecode. */
937
int print_insn_tci(bfd_vma addr, disassemble_info *info)
938
{
154
{
939
- uint8_t buf[256];
155
uintptr_t ra = GETPC();
940
- int length, status;
156
941
+ const uint32_t *tb_ptr = (const void *)(uintptr_t)addr;
157
- if (parallel) {
942
const TCGOpDef *def;
158
-#ifndef CONFIG_ATOMIC128
943
const char *op_name;
159
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
944
+ uint32_t insn;
160
-#else
945
TCGOpcode op;
161
- int mem_idx = cpu_mmu_index(env, false);
946
- TCGReg r0, r1, r2, r3;
162
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
947
+ TCGReg r0, r1, r2, r3, r4;
948
#if TCG_TARGET_REG_BITS == 32
949
- TCGReg r4, r5;
950
+ TCGReg r5;
951
#endif
952
tcg_target_ulong i1;
953
int32_t s2;
954
TCGCond c;
955
TCGMemOpIdx oi;
956
uint8_t pos, len;
957
- void *ptr, *cif;
958
- const uint8_t *tb_ptr;
959
+ void *ptr;
960
961
- status = info->read_memory_func(addr, buf, 2, info);
962
- if (status != 0) {
963
- info->memory_error_func(status, addr, info);
964
- return -1;
965
- }
966
- op = buf[0];
967
- length = buf[1];
968
+ /* TCI is always the host, so we don't need to load indirect. */
969
+ insn = *tb_ptr++;
970
971
- if (length < 2) {
972
- info->fprintf_func(info->stream, "invalid length %d", length);
973
- return 1;
974
- }
975
-
163
-
976
- status = info->read_memory_func(addr + 2, buf + 2, length - 2, info);
164
- Int128 v = int128_make128(low, high);
977
- if (status != 0) {
165
- helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
978
- info->memory_error_func(status, addr + 2, info);
979
- return -1;
980
- }
981
+ info->fprintf_func(info->stream, "%08x ", insn);
982
983
+ op = extract32(insn, 0, 8);
984
def = &tcg_op_defs[op];
985
op_name = def->name;
986
- tb_ptr = buf + 2;
987
988
switch (op) {
989
case INDEX_op_br:
990
case INDEX_op_exit_tb:
991
case INDEX_op_goto_tb:
992
- tci_args_l(&tb_ptr, &ptr);
993
+ tci_args_l(insn, tb_ptr, &ptr);
994
info->fprintf_func(info->stream, "%-12s %p", op_name, ptr);
995
break;
996
997
case INDEX_op_call:
998
- tci_args_nll(&tb_ptr, &len, &ptr, &cif);
999
- info->fprintf_func(info->stream, "%-12s %d, %p, %p",
1000
- op_name, len, ptr, cif);
1001
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
1002
+ info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr);
1003
break;
1004
1005
case INDEX_op_brcond_i32:
1006
case INDEX_op_brcond_i64:
1007
- tci_args_rl(&tb_ptr, &r0, &ptr);
1008
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
1009
info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p",
1010
op_name, str_r(r0), ptr);
1011
break;
1012
1013
case INDEX_op_setcond_i32:
1014
case INDEX_op_setcond_i64:
1015
- tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &c);
1016
+ tci_args_rrrc(insn, &r0, &r1, &r2, &c);
1017
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
1018
op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c));
1019
break;
1020
1021
- case INDEX_op_tci_movi_i32:
1022
- tci_args_ri(&tb_ptr, &r0, &i1);
1023
+ case INDEX_op_tci_movi:
1024
+ tci_args_ri(insn, &r0, &i1);
1025
info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx,
1026
op_name, str_r(r0), i1);
1027
break;
1028
1029
-#if TCG_TARGET_REG_BITS == 64
1030
- case INDEX_op_tci_movi_i64:
1031
- tci_args_rI(&tb_ptr, &r0, &i1);
1032
- info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx,
1033
- op_name, str_r(r0), i1);
1034
+ case INDEX_op_tci_movl:
1035
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
1036
+ info->fprintf_func(info->stream, "%-12s %s, %p",
1037
+ op_name, str_r(r0), ptr);
1038
break;
1039
-#endif
166
-#endif
1040
167
- } else {
1041
case INDEX_op_ld8u_i32:
168
+ if (!parallel) {
1042
case INDEX_op_ld8u_i64:
169
check_alignment(env, addr, 16, ra);
1043
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
170
-
1044
case INDEX_op_st32_i64:
171
cpu_stq_data_ra(env, addr + 0, high, ra);
1045
case INDEX_op_st_i32:
172
cpu_stq_data_ra(env, addr + 8, low, ra);
1046
case INDEX_op_st_i64:
173
+ } else if (HAVE_ATOMIC128) {
1047
- tci_args_rrs(&tb_ptr, &r0, &r1, &s2);
174
+ int mem_idx = cpu_mmu_index(env, false);
1048
+ tci_args_rrs(insn, &r0, &r1, &s2);
175
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1049
info->fprintf_func(info->stream, "%-12s %s, %s, %d",
176
+ Int128 v = int128_make128(low, high);
1050
op_name, str_r(r0), str_r(r1), s2);
177
+ helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
1051
break;
178
+ } else {
1052
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
179
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
1053
case INDEX_op_not_i64:
1054
case INDEX_op_neg_i32:
1055
case INDEX_op_neg_i64:
1056
- tci_args_rr(&tb_ptr, &r0, &r1);
1057
+ tci_args_rr(insn, &r0, &r1);
1058
info->fprintf_func(info->stream, "%-12s %s, %s",
1059
op_name, str_r(r0), str_r(r1));
1060
break;
1061
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
1062
case INDEX_op_rotl_i64:
1063
case INDEX_op_rotr_i32:
1064
case INDEX_op_rotr_i64:
1065
- tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
1066
+ tci_args_rrr(insn, &r0, &r1, &r2);
1067
info->fprintf_func(info->stream, "%-12s %s, %s, %s",
1068
op_name, str_r(r0), str_r(r1), str_r(r2));
1069
break;
1070
1071
case INDEX_op_deposit_i32:
1072
case INDEX_op_deposit_i64:
1073
- tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
1074
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
1075
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d",
1076
op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
1077
break;
1078
1079
#if TCG_TARGET_REG_BITS == 32
1080
case INDEX_op_setcond2_i32:
1081
- tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &c);
1082
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c);
1083
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
1084
op_name, str_r(r0), str_r(r1), str_r(r2),
1085
str_r(r3), str_r(r4), str_c(c));
1086
break;
1087
1088
case INDEX_op_mulu2_i32:
1089
- tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
1090
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
1091
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
1092
op_name, str_r(r0), str_r(r1),
1093
str_r(r2), str_r(r3));
1094
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
1095
1096
case INDEX_op_add2_i32:
1097
case INDEX_op_sub2_i32:
1098
- tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
1099
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
1100
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
1101
op_name, str_r(r0), str_r(r1), str_r(r2),
1102
str_r(r3), str_r(r4), str_r(r5));
1103
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
1104
len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
1105
switch (len) {
1106
case 2:
1107
- tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
1108
+ tci_args_rrm(insn, &r0, &r1, &oi);
1109
info->fprintf_func(info->stream, "%-12s %s, %s, %x",
1110
op_name, str_r(r0), str_r(r1), oi);
1111
break;
1112
case 3:
1113
- tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
1114
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
1115
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x",
1116
op_name, str_r(r0), str_r(r1), str_r(r2), oi);
1117
break;
1118
case 4:
1119
- tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
1120
- info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %x",
1121
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
1122
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s",
1123
op_name, str_r(r0), str_r(r1),
1124
- str_r(r2), str_r(r3), oi);
1125
+ str_r(r2), str_r(r3), str_r(r4));
1126
break;
1127
default:
1128
g_assert_not_reached();
1129
}
1130
break;
1131
1132
+ case 0:
1133
+ /* tcg_out_nop_fill uses zeros */
1134
+ if (insn == 0) {
1135
+ info->fprintf_func(info->stream, "align");
1136
+ break;
1137
+ }
1138
+ /* fall through */
1139
+
1140
default:
1141
info->fprintf_func(info->stream, "illegal opcode %d", op);
1142
break;
1143
}
180
}
1144
1145
- return length;
1146
+ return sizeof(insn);
1147
}
181
}
1148
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
182
1149
index XXXXXXX..XXXXXXX 100644
1150
--- a/tcg/tci/tcg-target.c.inc
1151
+++ b/tcg/tci/tcg-target.c.inc
1152
@@ -XXX,XX +XXX,XX @@
1153
* THE SOFTWARE.
1154
*/
1155
1156
-/* TODO list:
1157
- * - See TODO comments in code.
1158
- */
1159
-
1160
-/* Marker for missing code. */
1161
-#define TODO() \
1162
- do { \
1163
- fprintf(stderr, "TODO %s:%u: %s()\n", \
1164
- __FILE__, __LINE__, __func__); \
1165
- tcg_abort(); \
1166
- } while (0)
1167
-
1168
-/* Bitfield n...m (in 32 bit value). */
1169
-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
1170
+#include "../tcg-pool.c.inc"
1171
1172
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1173
{
1174
@@ -XXX,XX +XXX,XX @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
1175
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
1176
intptr_t value, intptr_t addend)
1177
{
1178
- /* tcg_out_reloc always uses the same type, addend. */
1179
- tcg_debug_assert(type == sizeof(tcg_target_long));
1180
+ intptr_t diff = value - (intptr_t)(code_ptr + 1);
1181
+
1182
tcg_debug_assert(addend == 0);
1183
- tcg_debug_assert(value != 0);
1184
- if (TCG_TARGET_REG_BITS == 32) {
1185
- tcg_patch32(code_ptr, value);
1186
- } else {
1187
- tcg_patch64(code_ptr, value);
1188
- }
1189
- return true;
1190
-}
1191
-
1192
-/* Write value (native size). */
1193
-static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
1194
-{
1195
- if (TCG_TARGET_REG_BITS == 32) {
1196
- tcg_out32(s, v);
1197
- } else {
1198
- tcg_out64(s, v);
1199
- }
1200
-}
1201
-
1202
-/* Write opcode. */
1203
-static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
1204
-{
1205
- tcg_out8(s, op);
1206
- tcg_out8(s, 0);
1207
-}
1208
-
1209
-/* Write register. */
1210
-static void tcg_out_r(TCGContext *s, TCGArg t0)
1211
-{
1212
- tcg_debug_assert(t0 < TCG_TARGET_NB_REGS);
1213
- tcg_out8(s, t0);
1214
-}
1215
-
1216
-/* Write label. */
1217
-static void tci_out_label(TCGContext *s, TCGLabel *label)
1218
-{
1219
- if (label->has_value) {
1220
- tcg_out_i(s, label->u.value);
1221
- tcg_debug_assert(label->u.value);
1222
- } else {
1223
- tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
1224
- s->code_ptr += sizeof(tcg_target_ulong);
1225
+ tcg_debug_assert(type == 20);
1226
+
1227
+ if (diff == sextract32(diff, 0, type)) {
1228
+ tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff));
1229
+ return true;
1230
}
1231
+ return false;
1232
}
1233
1234
static void stack_bounds_check(TCGReg base, target_long offset)
1235
@@ -XXX,XX +XXX,XX @@ static void stack_bounds_check(TCGReg base, target_long offset)
1236
1237
static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0)
1238
{
1239
- uint8_t *old_code_ptr = s->code_ptr;
1240
+ tcg_insn_unit insn = 0;
1241
1242
- tcg_out_op_t(s, op);
1243
- tci_out_label(s, l0);
1244
-
1245
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1246
+ tcg_out_reloc(s, s->code_ptr, 20, l0, 0);
1247
+ insn = deposit32(insn, 0, 8, op);
1248
+ tcg_out32(s, insn);
1249
}
1250
1251
static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
1252
{
1253
- uint8_t *old_code_ptr = s->code_ptr;
1254
+ tcg_insn_unit insn = 0;
1255
+ intptr_t diff;
1256
1257
- tcg_out_op_t(s, op);
1258
- tcg_out_i(s, (uintptr_t)p0);
1259
-
1260
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1261
+ /* Special case for exit_tb: map null -> 0. */
1262
+ if (p0 == NULL) {
1263
+ diff = 0;
1264
+ } else {
1265
+ diff = p0 - (void *)(s->code_ptr + 1);
1266
+ tcg_debug_assert(diff != 0);
1267
+ if (diff != sextract32(diff, 0, 20)) {
1268
+ tcg_raise_tb_overflow(s);
1269
+ }
1270
+ }
1271
+ insn = deposit32(insn, 0, 8, op);
1272
+ insn = deposit32(insn, 12, 20, diff);
1273
+ tcg_out32(s, insn);
1274
}
1275
1276
static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
1277
{
1278
- uint8_t *old_code_ptr = s->code_ptr;
1279
-
1280
- tcg_out_op_t(s, op);
1281
-
1282
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1283
+ tcg_out32(s, (uint8_t)op);
1284
}
1285
1286
static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1)
1287
{
1288
- uint8_t *old_code_ptr = s->code_ptr;
1289
+ tcg_insn_unit insn = 0;
1290
1291
- tcg_out_op_t(s, op);
1292
- tcg_out_r(s, r0);
1293
- tcg_out32(s, i1);
1294
-
1295
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1296
+ tcg_debug_assert(i1 == sextract32(i1, 0, 20));
1297
+ insn = deposit32(insn, 0, 8, op);
1298
+ insn = deposit32(insn, 8, 4, r0);
1299
+ insn = deposit32(insn, 12, 20, i1);
1300
+ tcg_out32(s, insn);
1301
}
1302
1303
-#if TCG_TARGET_REG_BITS == 64
1304
-static void tcg_out_op_rI(TCGContext *s, TCGOpcode op,
1305
- TCGReg r0, uint64_t i1)
1306
-{
1307
- uint8_t *old_code_ptr = s->code_ptr;
1308
-
1309
- tcg_out_op_t(s, op);
1310
- tcg_out_r(s, r0);
1311
- tcg_out64(s, i1);
1312
-
1313
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1314
-}
1315
-#endif
1316
-
1317
static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
1318
{
1319
- uint8_t *old_code_ptr = s->code_ptr;
1320
+ tcg_insn_unit insn = 0;
1321
1322
- tcg_out_op_t(s, op);
1323
- tcg_out_r(s, r0);
1324
- tci_out_label(s, l1);
1325
-
1326
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1327
+ tcg_out_reloc(s, s->code_ptr, 20, l1, 0);
1328
+ insn = deposit32(insn, 0, 8, op);
1329
+ insn = deposit32(insn, 8, 4, r0);
1330
+ tcg_out32(s, insn);
1331
}
1332
1333
static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
1334
{
1335
- uint8_t *old_code_ptr = s->code_ptr;
1336
+ tcg_insn_unit insn = 0;
1337
1338
- tcg_out_op_t(s, op);
1339
- tcg_out_r(s, r0);
1340
- tcg_out_r(s, r1);
1341
-
1342
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1343
+ insn = deposit32(insn, 0, 8, op);
1344
+ insn = deposit32(insn, 8, 4, r0);
1345
+ insn = deposit32(insn, 12, 4, r1);
1346
+ tcg_out32(s, insn);
1347
}
1348
1349
static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
1350
TCGReg r0, TCGReg r1, TCGArg m2)
1351
{
1352
- uint8_t *old_code_ptr = s->code_ptr;
1353
+ tcg_insn_unit insn = 0;
1354
1355
- tcg_out_op_t(s, op);
1356
- tcg_out_r(s, r0);
1357
- tcg_out_r(s, r1);
1358
- tcg_out32(s, m2);
1359
-
1360
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1361
+ tcg_debug_assert(m2 == extract32(m2, 0, 12));
1362
+ insn = deposit32(insn, 0, 8, op);
1363
+ insn = deposit32(insn, 8, 4, r0);
1364
+ insn = deposit32(insn, 12, 4, r1);
1365
+ insn = deposit32(insn, 20, 12, m2);
1366
+ tcg_out32(s, insn);
1367
}
1368
1369
static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
1370
TCGReg r0, TCGReg r1, TCGReg r2)
1371
{
1372
- uint8_t *old_code_ptr = s->code_ptr;
1373
+ tcg_insn_unit insn = 0;
1374
1375
- tcg_out_op_t(s, op);
1376
- tcg_out_r(s, r0);
1377
- tcg_out_r(s, r1);
1378
- tcg_out_r(s, r2);
1379
-
1380
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1381
+ insn = deposit32(insn, 0, 8, op);
1382
+ insn = deposit32(insn, 8, 4, r0);
1383
+ insn = deposit32(insn, 12, 4, r1);
1384
+ insn = deposit32(insn, 16, 4, r2);
1385
+ tcg_out32(s, insn);
1386
}
1387
1388
static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
1389
TCGReg r0, TCGReg r1, intptr_t i2)
1390
{
1391
- uint8_t *old_code_ptr = s->code_ptr;
1392
+ tcg_insn_unit insn = 0;
1393
1394
- tcg_out_op_t(s, op);
1395
- tcg_out_r(s, r0);
1396
- tcg_out_r(s, r1);
1397
- tcg_debug_assert(i2 == (int32_t)i2);
1398
- tcg_out32(s, i2);
1399
-
1400
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1401
+ tcg_debug_assert(i2 == sextract32(i2, 0, 16));
1402
+ insn = deposit32(insn, 0, 8, op);
1403
+ insn = deposit32(insn, 8, 4, r0);
1404
+ insn = deposit32(insn, 12, 4, r1);
1405
+ insn = deposit32(insn, 16, 16, i2);
1406
+ tcg_out32(s, insn);
1407
}
1408
1409
static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
1410
TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
1411
{
1412
- uint8_t *old_code_ptr = s->code_ptr;
1413
+ tcg_insn_unit insn = 0;
1414
1415
- tcg_out_op_t(s, op);
1416
- tcg_out_r(s, r0);
1417
- tcg_out_r(s, r1);
1418
- tcg_out_r(s, r2);
1419
- tcg_out8(s, c3);
1420
-
1421
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1422
+ insn = deposit32(insn, 0, 8, op);
1423
+ insn = deposit32(insn, 8, 4, r0);
1424
+ insn = deposit32(insn, 12, 4, r1);
1425
+ insn = deposit32(insn, 16, 4, r2);
1426
+ insn = deposit32(insn, 20, 4, c3);
1427
+ tcg_out32(s, insn);
1428
}
1429
1430
static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
1431
TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
1432
{
1433
- uint8_t *old_code_ptr = s->code_ptr;
1434
+ tcg_insn_unit insn = 0;
1435
1436
- tcg_out_op_t(s, op);
1437
- tcg_out_r(s, r0);
1438
- tcg_out_r(s, r1);
1439
- tcg_out_r(s, r2);
1440
- tcg_out32(s, m3);
1441
-
1442
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1443
+ tcg_debug_assert(m3 == extract32(m3, 0, 12));
1444
+ insn = deposit32(insn, 0, 8, op);
1445
+ insn = deposit32(insn, 8, 4, r0);
1446
+ insn = deposit32(insn, 12, 4, r1);
1447
+ insn = deposit32(insn, 16, 4, r2);
1448
+ insn = deposit32(insn, 20, 12, m3);
1449
+ tcg_out32(s, insn);
1450
}
1451
1452
static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
1453
TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
1454
{
1455
- uint8_t *old_code_ptr = s->code_ptr;
1456
+ tcg_insn_unit insn = 0;
1457
1458
- tcg_out_op_t(s, op);
1459
- tcg_out_r(s, r0);
1460
- tcg_out_r(s, r1);
1461
- tcg_out_r(s, r2);
1462
- tcg_out8(s, b3);
1463
- tcg_out8(s, b4);
1464
-
1465
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1466
+ tcg_debug_assert(b3 == extract32(b3, 0, 6));
1467
+ tcg_debug_assert(b4 == extract32(b4, 0, 6));
1468
+ insn = deposit32(insn, 0, 8, op);
1469
+ insn = deposit32(insn, 8, 4, r0);
1470
+ insn = deposit32(insn, 12, 4, r1);
1471
+ insn = deposit32(insn, 16, 4, r2);
1472
+ insn = deposit32(insn, 20, 6, b3);
1473
+ insn = deposit32(insn, 26, 6, b4);
1474
+ tcg_out32(s, insn);
1475
}
1476
1477
-static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0,
1478
- TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4)
1479
+static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
1480
+ TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
1481
{
1482
- uint8_t *old_code_ptr = s->code_ptr;
1483
+ tcg_insn_unit insn = 0;
1484
1485
- tcg_out_op_t(s, op);
1486
- tcg_out_r(s, r0);
1487
- tcg_out_r(s, r1);
1488
- tcg_out_r(s, r2);
1489
- tcg_out_r(s, r3);
1490
- tcg_out32(s, m4);
1491
-
1492
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1493
+ insn = deposit32(insn, 0, 8, op);
1494
+ insn = deposit32(insn, 8, 4, r0);
1495
+ insn = deposit32(insn, 12, 4, r1);
1496
+ insn = deposit32(insn, 16, 4, r2);
1497
+ insn = deposit32(insn, 20, 4, r3);
1498
+ insn = deposit32(insn, 24, 4, r4);
1499
+ tcg_out32(s, insn);
1500
}
1501
1502
#if TCG_TARGET_REG_BITS == 32
1503
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
1504
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
1505
{
1506
- uint8_t *old_code_ptr = s->code_ptr;
1507
+ tcg_insn_unit insn = 0;
1508
1509
- tcg_out_op_t(s, op);
1510
- tcg_out_r(s, r0);
1511
- tcg_out_r(s, r1);
1512
- tcg_out_r(s, r2);
1513
- tcg_out_r(s, r3);
1514
-
1515
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1516
+ insn = deposit32(insn, 0, 8, op);
1517
+ insn = deposit32(insn, 8, 4, r0);
1518
+ insn = deposit32(insn, 12, 4, r1);
1519
+ insn = deposit32(insn, 16, 4, r2);
1520
+ insn = deposit32(insn, 20, 4, r3);
1521
+ tcg_out32(s, insn);
1522
}
1523
1524
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
1525
TCGReg r0, TCGReg r1, TCGReg r2,
1526
TCGReg r3, TCGReg r4, TCGCond c5)
1527
{
1528
- uint8_t *old_code_ptr = s->code_ptr;
1529
+ tcg_insn_unit insn = 0;
1530
1531
- tcg_out_op_t(s, op);
1532
- tcg_out_r(s, r0);
1533
- tcg_out_r(s, r1);
1534
- tcg_out_r(s, r2);
1535
- tcg_out_r(s, r3);
1536
- tcg_out_r(s, r4);
1537
- tcg_out8(s, c5);
1538
-
1539
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1540
+ insn = deposit32(insn, 0, 8, op);
1541
+ insn = deposit32(insn, 8, 4, r0);
1542
+ insn = deposit32(insn, 12, 4, r1);
1543
+ insn = deposit32(insn, 16, 4, r2);
1544
+ insn = deposit32(insn, 20, 4, r3);
1545
+ insn = deposit32(insn, 24, 4, r4);
1546
+ insn = deposit32(insn, 28, 4, c5);
1547
+ tcg_out32(s, insn);
1548
}
1549
1550
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
1551
TCGReg r0, TCGReg r1, TCGReg r2,
1552
TCGReg r3, TCGReg r4, TCGReg r5)
1553
{
1554
- uint8_t *old_code_ptr = s->code_ptr;
1555
+ tcg_insn_unit insn = 0;
1556
1557
- tcg_out_op_t(s, op);
1558
- tcg_out_r(s, r0);
1559
- tcg_out_r(s, r1);
1560
- tcg_out_r(s, r2);
1561
- tcg_out_r(s, r3);
1562
- tcg_out_r(s, r4);
1563
- tcg_out_r(s, r5);
1564
-
1565
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1566
+ insn = deposit32(insn, 0, 8, op);
1567
+ insn = deposit32(insn, 8, 4, r0);
1568
+ insn = deposit32(insn, 12, 4, r1);
1569
+ insn = deposit32(insn, 16, 4, r2);
1570
+ insn = deposit32(insn, 20, 4, r3);
1571
+ insn = deposit32(insn, 24, 4, r4);
1572
+ insn = deposit32(insn, 28, 4, r5);
1573
+ tcg_out32(s, insn);
1574
}
1575
#endif
1576
1577
+static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
1578
+ TCGReg base, intptr_t offset)
1579
+{
1580
+ stack_bounds_check(base, offset);
1581
+ if (offset != sextract32(offset, 0, 16)) {
1582
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
1583
+ tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32
1584
+ ? INDEX_op_add_i32 : INDEX_op_add_i64),
1585
+ TCG_REG_TMP, TCG_REG_TMP, base);
1586
+ base = TCG_REG_TMP;
1587
+ offset = 0;
1588
+ }
1589
+ tcg_out_op_rrs(s, op, val, base, offset);
1590
+}
1591
+
1592
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
1593
intptr_t offset)
1594
{
1595
- stack_bounds_check(base, offset);
1596
switch (type) {
1597
case TCG_TYPE_I32:
1598
- tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset);
1599
+ tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset);
1600
break;
1601
#if TCG_TARGET_REG_BITS == 64
1602
case TCG_TYPE_I64:
1603
- tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset);
1604
+ tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset);
1605
break;
1606
#endif
1607
default:
1608
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
1609
{
1610
switch (type) {
1611
case TCG_TYPE_I32:
1612
- tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg);
1613
- break;
1614
#if TCG_TARGET_REG_BITS == 64
1615
+ arg = (int32_t)arg;
1616
+ /* fall through */
1617
case TCG_TYPE_I64:
1618
- tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg);
1619
- break;
1620
#endif
1621
+ break;
1622
default:
1623
g_assert_not_reached();
1624
}
1625
+
1626
+ if (arg == sextract32(arg, 0, 20)) {
1627
+ tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg);
1628
+ } else {
1629
+ tcg_insn_unit insn = 0;
1630
+
1631
+ new_pool_label(s, arg, 20, s->code_ptr, 0);
1632
+ insn = deposit32(insn, 0, 8, INDEX_op_tci_movl);
1633
+ insn = deposit32(insn, 8, 4, ret);
1634
+ tcg_out32(s, insn);
1635
+ }
1636
}
1637
1638
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
1639
ffi_cif *cif)
1640
{
1641
- uint8_t *old_code_ptr = s->code_ptr;
1642
+ tcg_insn_unit insn = 0;
1643
uint8_t which;
1644
1645
if (cif->rtype == &ffi_type_void) {
1646
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
1647
tcg_debug_assert(cif->rtype->size == 8);
1648
which = 2;
1649
}
1650
- tcg_out_op_t(s, INDEX_op_call);
1651
- tcg_out8(s, which);
1652
- tcg_out_i(s, (uintptr_t)func);
1653
- tcg_out_i(s, (uintptr_t)cif);
1654
-
1655
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
1656
+ new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
1657
+ insn = deposit32(insn, 0, 8, INDEX_op_call);
1658
+ insn = deposit32(insn, 8, 4, which);
1659
+ tcg_out32(s, insn);
1660
}
1661
1662
#if TCG_TARGET_REG_BITS == 64
1663
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1664
case INDEX_op_st_i32:
1665
CASE_64(st32)
1666
CASE_64(st)
1667
- stack_bounds_check(args[1], args[2]);
1668
- tcg_out_op_rrs(s, opc, args[0], args[1], args[2]);
1669
+ tcg_out_ldst(s, opc, args[0], args[1], args[2]);
1670
break;
1671
1672
CASE_32_64(add)
1673
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1674
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1675
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
1676
} else {
1677
- tcg_out_op_rrrrm(s, opc, args[0], args[1],
1678
- args[2], args[3], args[4]);
1679
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
1680
+ tcg_out_op_rrrrr(s, opc, args[0], args[1],
1681
+ args[2], args[3], TCG_REG_TMP);
1682
}
1683
break;
1684
1685
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
1686
return ct & TCG_CT_CONST;
1687
}
1688
1689
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
1690
+{
1691
+ memset(p, 0, sizeof(*p) * count);
1692
+}
1693
+
1694
static void tcg_target_init(TCGContext *s)
1695
{
1696
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
1697
diff --git a/tcg/tci/README b/tcg/tci/README
1698
index XXXXXXX..XXXXXXX 100644
1699
--- a/tcg/tci/README
1700
+++ b/tcg/tci/README
1701
@@ -XXX,XX +XXX,XX @@ This is what TCI (Tiny Code Interpreter) does.
1702
Like each TCG host frontend, TCI implements the code generator in
1703
tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci.
1704
1705
-The additional file tcg/tci.c adds the interpreter.
1706
+The additional file tcg/tci.c adds the interpreter and disassembler.
1707
1708
-The bytecode consists of opcodes (same numeric values as those used by
1709
-TCG), command length and arguments of variable size and number.
1710
+The bytecode consists of opcodes (with only a few exceptions, with
1711
+the same same numeric values and semantics as used by TCG), and up
1712
+to six arguments packed into a 32-bit integer. See comments in tci.c
1713
+for details on the encoding.
1714
1715
3) Usage
1716
1717
@@ -XXX,XX +XXX,XX @@ suggest using this option. Setting it automatically would need
1718
additional code in configure which must be fixed when new native TCG
1719
implementations are added.
1720
1721
-System emulation should work on any 32 or 64 bit host.
1722
-User mode emulation might work. Maybe a new linker script (*.ld)
1723
-is needed. Byte order might be wrong (on big endian hosts)
1724
-and need fixes in configure.
1725
-
1726
For hosts with native TCG, the interpreter TCI can be enabled by
1727
1728
configure --enable-tcg-interpreter
1729
@@ -XXX,XX +XXX,XX @@ u1 = linux-user-test works
1730
in the interpreter. These opcodes raise a runtime exception, so it is
1731
possible to see where code must be added.
1732
1733
-* The pseudo code is not optimized and still ugly. For hosts with special
1734
- alignment requirements, it needs some fixes (maybe aligned bytecode
1735
- would also improve speed for hosts which support byte alignment).
1736
-
1737
-* A better disassembler for the pseudo code would be nice (a very primitive
1738
- disassembler is included in tcg-target.c.inc).
1739
-
1740
* It might be useful to have a runtime option which selects the native TCG
1741
or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
1742
is a configure option, so you need two compilations of QEMU.
1743
--
183
--
1744
2.25.1
184
2.17.2
1745
185
1746
186
diff view generated by jsdifflib
1
The encoding planned for tci does not have enough room for
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
brcond2, with 4 registers and a condition as input as well
3
as the label. Resolve the condition into TCG_REG_TMP, and
4
relax brcond to one register plus a label, considering the
5
condition to always be reg != 0.
6
7
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
3
---
11
tcg/tci.c | 68 ++++++++++------------------------------
4
target/s390x/mem_helper.c | 128 ++++++++++++++++++--------------------
12
tcg/tci/tcg-target.c.inc | 52 +++++++++++-------------------
5
1 file changed, 61 insertions(+), 67 deletions(-)
13
2 files changed, 35 insertions(+), 85 deletions(-)
14
6
15
diff --git a/tcg/tci.c b/tcg/tci.c
7
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
16
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/tci.c
9
--- a/target/s390x/mem_helper.c
18
+++ b/tcg/tci.c
10
+++ b/target/s390x/mem_helper.c
19
@@ -XXX,XX +XXX,XX @@ static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0,
11
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
20
check_size(start, tb_ptr);
12
return cc;
21
}
13
}
22
14
23
+static void tci_args_rl(const uint8_t **tb_ptr, TCGReg *r0, void **l1)
15
-static void do_cdsg(CPUS390XState *env, uint64_t addr,
24
+{
16
- uint32_t r1, uint32_t r3, bool parallel)
25
+ const uint8_t *start = *tb_ptr;
17
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
18
+ uint32_t r1, uint32_t r3)
19
{
20
uintptr_t ra = GETPC();
21
Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
22
Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
23
Int128 oldv;
24
+ uint64_t oldh, oldl;
25
bool fail;
26
27
- if (parallel) {
28
-#if !HAVE_CMPXCHG128
29
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
30
-#else
31
- int mem_idx = cpu_mmu_index(env, false);
32
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
33
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
34
- fail = !int128_eq(oldv, cmpv);
35
-#endif
36
- } else {
37
- uint64_t oldh, oldl;
38
+ check_alignment(env, addr, 16, ra);
39
40
- check_alignment(env, addr, 16, ra);
41
+ oldh = cpu_ldq_data_ra(env, addr + 0, ra);
42
+ oldl = cpu_ldq_data_ra(env, addr + 8, ra);
43
44
- oldh = cpu_ldq_data_ra(env, addr + 0, ra);
45
- oldl = cpu_ldq_data_ra(env, addr + 8, ra);
46
-
47
- oldv = int128_make128(oldl, oldh);
48
- fail = !int128_eq(oldv, cmpv);
49
- if (fail) {
50
- newv = oldv;
51
- }
52
-
53
- cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
54
- cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
55
+ oldv = int128_make128(oldl, oldh);
56
+ fail = !int128_eq(oldv, cmpv);
57
+ if (fail) {
58
+ newv = oldv;
59
}
60
61
+ cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
62
+ cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
26
+
63
+
27
+ *r0 = tci_read_r(tb_ptr);
64
env->cc_op = fail;
28
+ *l1 = (void *)tci_read_label(tb_ptr);
65
env->regs[r1] = int128_gethi(oldv);
66
env->regs[r1 + 1] = int128_getlo(oldv);
67
}
68
69
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
70
- uint32_t r1, uint32_t r3)
71
-{
72
- do_cdsg(env, addr, r1, r3, false);
73
-}
74
-
75
void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
76
uint32_t r1, uint32_t r3)
77
{
78
- do_cdsg(env, addr, r1, r3, true);
79
+ uintptr_t ra = GETPC();
80
+ Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
81
+ Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
82
+ int mem_idx;
83
+ TCGMemOpIdx oi;
84
+ Int128 oldv;
85
+ bool fail;
29
+
86
+
30
+ check_size(start, tb_ptr);
87
+ if (!HAVE_CMPXCHG128) {
88
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
89
+ }
90
+
91
+ mem_idx = cpu_mmu_index(env, false);
92
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
93
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
94
+ fail = !int128_eq(oldv, cmpv);
95
+
96
+ env->cc_op = fail;
97
+ env->regs[r1] = int128_gethi(oldv);
98
+ env->regs[r1 + 1] = int128_getlo(oldv);
99
}
100
101
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
102
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
103
#endif
104
105
/* load pair from quadword */
106
-static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
107
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
108
{
109
uintptr_t ra = GETPC();
110
uint64_t hi, lo;
111
112
- if (!parallel) {
113
- check_alignment(env, addr, 16, ra);
114
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
115
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
116
- } else if (HAVE_ATOMIC128) {
117
+ check_alignment(env, addr, 16, ra);
118
+ hi = cpu_ldq_data_ra(env, addr + 0, ra);
119
+ lo = cpu_ldq_data_ra(env, addr + 8, ra);
120
+
121
+ env->retxl = lo;
122
+ return hi;
31
+}
123
+}
32
+
124
+
33
static void tci_args_rr(const uint8_t **tb_ptr,
125
+uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
34
TCGReg *r0, TCGReg *r1)
126
+{
35
{
127
+ uintptr_t ra = GETPC();
36
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrs(const uint8_t **tb_ptr,
128
+ uint64_t hi, lo;
37
check_size(start, tb_ptr);
129
+
130
+ if (HAVE_ATOMIC128) {
131
int mem_idx = cpu_mmu_index(env, false);
132
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
133
Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
134
@@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
135
return hi;
38
}
136
}
39
137
40
-static void tci_args_rrcl(const uint8_t **tb_ptr,
138
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
41
- TCGReg *r0, TCGReg *r1, TCGCond *c2, void **l3)
42
-{
139
-{
43
- const uint8_t *start = *tb_ptr;
140
- return do_lpq(env, addr, false);
44
-
45
- *r0 = tci_read_r(tb_ptr);
46
- *r1 = tci_read_r(tb_ptr);
47
- *c2 = tci_read_b(tb_ptr);
48
- *l3 = (void *)tci_read_label(tb_ptr);
49
-
50
- check_size(start, tb_ptr);
51
-}
141
-}
52
-
142
-
53
static void tci_args_rrrc(const uint8_t **tb_ptr,
143
-uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
54
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
55
{
56
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(const uint8_t **tb_ptr,
57
check_size(start, tb_ptr);
58
}
59
60
-static void tci_args_rrrrcl(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
61
- TCGReg *r2, TCGReg *r3, TCGCond *c4, void **l5)
62
-{
144
-{
63
- const uint8_t *start = *tb_ptr;
145
- return do_lpq(env, addr, true);
64
-
65
- *r0 = tci_read_r(tb_ptr);
66
- *r1 = tci_read_r(tb_ptr);
67
- *r2 = tci_read_r(tb_ptr);
68
- *r3 = tci_read_r(tb_ptr);
69
- *c4 = tci_read_b(tb_ptr);
70
- *l5 = (void *)tci_read_label(tb_ptr);
71
-
72
- check_size(start, tb_ptr);
73
-}
146
-}
74
-
147
-
75
static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
148
/* store pair to quadword */
76
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
149
-static void do_stpq(CPUS390XState *env, uint64_t addr,
150
- uint64_t low, uint64_t high, bool parallel)
151
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
152
+ uint64_t low, uint64_t high)
77
{
153
{
78
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
154
uintptr_t ra = GETPC();
79
break;
155
80
#endif
156
- if (!parallel) {
81
case INDEX_op_brcond_i32:
157
- check_alignment(env, addr, 16, ra);
82
- tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr);
158
- cpu_stq_data_ra(env, addr + 0, high, ra);
83
- if (tci_compare32(regs[r0], regs[r1], condition)) {
159
- cpu_stq_data_ra(env, addr + 8, low, ra);
84
+ tci_args_rl(&tb_ptr, &r0, &ptr);
160
- } else if (HAVE_ATOMIC128) {
85
+ if ((uint32_t)regs[r0]) {
161
+ check_alignment(env, addr, 16, ra);
86
tb_ptr = ptr;
162
+ cpu_stq_data_ra(env, addr + 0, high, ra);
87
}
163
+ cpu_stq_data_ra(env, addr + 8, low, ra);
88
break;
89
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
90
T2 = tci_uint64(regs[r5], regs[r4]);
91
tci_write_reg64(regs, r1, r0, T1 - T2);
92
break;
93
- case INDEX_op_brcond2_i32:
94
- tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &condition, &ptr);
95
- T1 = tci_uint64(regs[r1], regs[r0]);
96
- T2 = tci_uint64(regs[r3], regs[r2]);
97
- if (tci_compare64(T1, T2, condition)) {
98
- tb_ptr = ptr;
99
- continue;
100
- }
101
- break;
102
case INDEX_op_mulu2_i32:
103
tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
104
tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]);
105
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
106
break;
107
#endif
108
case INDEX_op_brcond_i64:
109
- tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr);
110
- if (tci_compare64(regs[r0], regs[r1], condition)) {
111
+ tci_args_rl(&tb_ptr, &r0, &ptr);
112
+ if (regs[r0]) {
113
tb_ptr = ptr;
114
}
115
break;
116
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
117
118
case INDEX_op_brcond_i32:
119
case INDEX_op_brcond_i64:
120
- tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr);
121
- info->fprintf_func(info->stream, "%-12s %s, %s, %s, %p",
122
- op_name, str_r(r0), str_r(r1), str_c(c), ptr);
123
+ tci_args_rl(&tb_ptr, &r0, &ptr);
124
+ info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p",
125
+ op_name, str_r(r0), ptr);
126
break;
127
128
case INDEX_op_setcond_i32:
129
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
130
str_r(r3), str_r(r4), str_c(c));
131
break;
132
133
- case INDEX_op_brcond2_i32:
134
- tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &c, &ptr);
135
- info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %p",
136
- op_name, str_r(r0), str_r(r1),
137
- str_r(r2), str_r(r3), str_c(c), ptr);
138
- break;
139
-
140
case INDEX_op_mulu2_i32:
141
tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
142
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
143
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
144
index XXXXXXX..XXXXXXX 100644
145
--- a/tcg/tci/tcg-target.c.inc
146
+++ b/tcg/tci/tcg-target.c.inc
147
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rI(TCGContext *s, TCGOpcode op,
148
}
149
#endif
150
151
+static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
152
+{
153
+ uint8_t *old_code_ptr = s->code_ptr;
154
+
155
+ tcg_out_op_t(s, op);
156
+ tcg_out_r(s, r0);
157
+ tci_out_label(s, l1);
158
+
159
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
160
+}
164
+}
161
+
165
+
162
static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
166
+void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
163
{
167
+ uint64_t low, uint64_t high)
164
uint8_t *old_code_ptr = s->code_ptr;
168
+{
165
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
169
+ uintptr_t ra = GETPC();
166
old_code_ptr[1] = s->code_ptr - old_code_ptr;
170
+
171
+ if (HAVE_ATOMIC128) {
172
int mem_idx = cpu_mmu_index(env, false);
173
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
174
Int128 v = int128_make128(low, high);
175
@@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
176
}
167
}
177
}
168
178
169
-static void tcg_out_op_rrcl(TCGContext *s, TCGOpcode op,
179
-void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
170
- TCGReg r0, TCGReg r1, TCGCond c2, TCGLabel *l3)
180
- uint64_t low, uint64_t high)
171
-{
181
-{
172
- uint8_t *old_code_ptr = s->code_ptr;
182
- do_stpq(env, addr, low, high, false);
173
-
174
- tcg_out_op_t(s, op);
175
- tcg_out_r(s, r0);
176
- tcg_out_r(s, r1);
177
- tcg_out8(s, c2);
178
- tci_out_label(s, l3);
179
-
180
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
181
-}
183
-}
182
-
184
-
183
static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
185
-void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
184
TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
186
- uint64_t low, uint64_t high)
185
{
186
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
187
old_code_ptr[1] = s->code_ptr - old_code_ptr;
188
}
189
190
-static void tcg_out_op_rrrrcl(TCGContext *s, TCGOpcode op,
191
- TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3,
192
- TCGCond c4, TCGLabel *l5)
193
-{
187
-{
194
- uint8_t *old_code_ptr = s->code_ptr;
188
- do_stpq(env, addr, low, high, true);
195
-
196
- tcg_out_op_t(s, op);
197
- tcg_out_r(s, r0);
198
- tcg_out_r(s, r1);
199
- tcg_out_r(s, r2);
200
- tcg_out_r(s, r3);
201
- tcg_out8(s, c4);
202
- tci_out_label(s, l5);
203
-
204
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
205
-}
189
-}
206
-
190
-
207
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
191
/* Execute instruction. This instruction executes an insn modified with
208
TCGReg r0, TCGReg r1, TCGReg r2,
192
the contents of r1. It does not change the executed instruction in memory;
209
TCGReg r3, TCGReg r4, TCGCond c5)
193
it does not change the program counter.
210
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
211
break;
212
213
CASE_32_64(brcond)
214
- tcg_out_op_rrcl(s, opc, args[0], args[1], args[2], arg_label(args[3]));
215
+ tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32
216
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64),
217
+ TCG_REG_TMP, args[0], args[1], args[2]);
218
+ tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3]));
219
break;
220
221
CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */
222
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
223
args[3], args[4], args[5]);
224
break;
225
case INDEX_op_brcond2_i32:
226
- tcg_out_op_rrrrcl(s, opc, args[0], args[1], args[2],
227
- args[3], args[4], arg_label(args[5]));
228
+ tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
229
+ args[0], args[1], args[2], args[3], args[4]);
230
+ tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
231
break;
232
case INDEX_op_mulu2_i32:
233
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
234
--
194
--
235
2.25.1
195
2.17.2
236
196
237
197
diff view generated by jsdifflib
1
As noted by qemu-plugins.h, enum qemu_plugin_cb_flags is
1
When op raises an exception, it may not have initialized the output
2
currently unused -- plugins can neither read nor write
2
temps that would be written back by wout or cout.
3
guest registers.
4
3
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/plugin-helpers.h | 1 -
7
target/s390x/translate.c | 20 +++++++++++++++-----
9
include/qemu/plugin.h | 1 -
8
1 file changed, 15 insertions(+), 5 deletions(-)
10
accel/tcg/plugin-gen.c | 8 ++++----
11
plugins/core.c | 30 ++++++------------------------
12
4 files changed, 10 insertions(+), 30 deletions(-)
13
9
14
diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h
10
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/plugin-helpers.h
12
--- a/target/s390x/translate.c
17
+++ b/accel/tcg/plugin-helpers.h
13
+++ b/target/s390x/translate.c
18
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ struct DisasInsn {
19
#ifdef CONFIG_PLUGIN
15
20
-/* Note: no TCG flags because those are overwritten later */
16
const char *name;
21
DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr)
17
22
DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr)
18
+ /* Pre-process arguments before HELP_OP. */
23
#endif
19
void (*help_in1)(DisasContext *, DisasFields *, DisasOps *);
24
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
20
void (*help_in2)(DisasContext *, DisasFields *, DisasOps *);
25
index XXXXXXX..XXXXXXX 100644
21
void (*help_prep)(DisasContext *, DisasFields *, DisasOps *);
26
--- a/include/qemu/plugin.h
22
+
27
+++ b/include/qemu/plugin.h
23
+ /*
28
@@ -XXX,XX +XXX,XX @@ enum plugin_dyn_cb_subtype {
24
+ * Post-process output after HELP_OP.
29
struct qemu_plugin_dyn_cb {
25
+ * Note that these are not called if HELP_OP returns DISAS_NORETURN.
30
union qemu_plugin_cb_sig f;
26
+ */
31
void *userp;
27
void (*help_wout)(DisasContext *, DisasFields *, DisasOps *);
32
- unsigned tcg_flags;
28
void (*help_cout)(DisasContext *, DisasOps *);
33
enum plugin_dyn_cb_subtype type;
29
+
34
/* @rw applies to mem callbacks only (both regular and inline) */
30
+ /* Implement the operation itself. */
35
enum qemu_plugin_mem_rw rw;
31
DisasJumpType (*help_op)(DisasContext *, DisasOps *);
36
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
32
37
index XXXXXXX..XXXXXXX 100644
33
uint64_t data;
38
--- a/accel/tcg/plugin-gen.c
34
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
39
+++ b/accel/tcg/plugin-gen.c
35
if (insn->help_op) {
40
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
36
ret = insn->help_op(s, &o);
41
}
42
43
static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
44
- void *func, unsigned tcg_flags, int *cb_idx)
45
+ void *func, int *cb_idx)
46
{
47
/* copy all ops until the call */
48
do {
49
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
50
tcg_debug_assert(i < MAX_OPC_PARAM_ARGS);
51
}
37
}
52
op->args[*cb_idx] = (uintptr_t)func;
38
- if (insn->help_wout) {
53
- op->args[*cb_idx + 1] = tcg_flags;
39
- insn->help_wout(s, &f, &o);
54
+ op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1];
40
- }
55
41
- if (insn->help_cout) {
56
return op;
42
- insn->help_cout(s, &o);
57
}
43
+ if (ret != DISAS_NORETURN) {
58
@@ -XXX,XX +XXX,XX @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb,
44
+ if (insn->help_wout) {
59
45
+ insn->help_wout(s, &f, &o);
60
/* call */
46
+ }
61
op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb),
47
+ if (insn->help_cout) {
62
- cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
48
+ insn->help_cout(s, &o);
63
+ cb->f.vcpu_udata, cb_idx);
49
+ }
64
65
return op;
66
}
67
@@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
68
if (type == PLUGIN_GEN_CB_MEM) {
69
/* call */
70
op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb),
71
- cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
72
+ cb->f.vcpu_udata, cb_idx);
73
}
50
}
74
51
75
return op;
52
/* Free any temporaries created by the helpers. */
76
diff --git a/plugins/core.c b/plugins/core.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/plugins/core.c
79
+++ b/plugins/core.c
80
@@ -XXX,XX +XXX,XX @@ void plugin_register_inline_op(GArray **arr,
81
dyn_cb->inline_insn.imm = imm;
82
}
83
84
-static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags)
85
-{
86
- uint32_t ret;
87
-
88
- switch (flags) {
89
- case QEMU_PLUGIN_CB_RW_REGS:
90
- ret = 0;
91
- break;
92
- case QEMU_PLUGIN_CB_R_REGS:
93
- ret = TCG_CALL_NO_WG;
94
- break;
95
- case QEMU_PLUGIN_CB_NO_REGS:
96
- default:
97
- ret = TCG_CALL_NO_RWG;
98
- }
99
- return ret;
100
-}
101
-
102
-inline void
103
-plugin_register_dyn_cb__udata(GArray **arr,
104
- qemu_plugin_vcpu_udata_cb_t cb,
105
- enum qemu_plugin_cb_flags flags, void *udata)
106
+void plugin_register_dyn_cb__udata(GArray **arr,
107
+ qemu_plugin_vcpu_udata_cb_t cb,
108
+ enum qemu_plugin_cb_flags flags,
109
+ void *udata)
110
{
111
struct qemu_plugin_dyn_cb *dyn_cb = plugin_get_dyn_cb(arr);
112
113
dyn_cb->userp = udata;
114
- dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
115
+ /* Note flags are discarded as unused. */
116
dyn_cb->f.vcpu_udata = cb;
117
dyn_cb->type = PLUGIN_CB_REGULAR;
118
}
119
@@ -XXX,XX +XXX,XX @@ void plugin_register_vcpu_mem_cb(GArray **arr,
120
121
dyn_cb = plugin_get_dyn_cb(arr);
122
dyn_cb->userp = udata;
123
- dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
124
+ /* Note flags are discarded as unused. */
125
dyn_cb->type = PLUGIN_CB_REGULAR;
126
dyn_cb->rw = rw;
127
dyn_cb->f.generic = cb;
128
--
53
--
129
2.25.1
54
2.17.2
130
55
131
56
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/tcg-internal.h | 5 +++++
6
tcg/tcg.c | 5 ++---
7
2 files changed, 7 insertions(+), 3 deletions(-)
8
1
9
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tcg-internal.h
12
+++ b/tcg/tcg-internal.h
13
@@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s);
14
void tcg_region_initial_alloc(TCGContext *s);
15
void tcg_region_prologue_set(TCGContext *s);
16
17
+static inline void *tcg_call_func(TCGOp *op)
18
+{
19
+ return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op)];
20
+}
21
+
22
static inline const TCGHelperInfo *tcg_call_info(TCGOp *op)
23
{
24
return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
25
diff --git a/tcg/tcg.c b/tcg/tcg.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/tcg.c
28
+++ b/tcg/tcg.c
29
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
30
}
31
} else if (c == INDEX_op_call) {
32
const TCGHelperInfo *info = tcg_call_info(op);
33
- void *func;
34
+ void *func = tcg_call_func(op);
35
36
/* variable number of arguments */
37
nb_oargs = TCGOP_CALLO(op);
38
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
39
* Note that plugins have a template function for the info,
40
* but the actual function pointer comes from the plugin.
41
*/
42
- func = (void *)(uintptr_t)op->args[nb_oargs + nb_iargs];
43
if (func == info->func) {
44
col += qemu_log("%s", info->name);
45
} else {
46
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
47
int allocate_args;
48
TCGRegSet allocated_regs;
49
50
- func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
51
+ func_addr = tcg_call_func(op);
52
flags = tcg_call_flags(op);
53
54
nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
55
--
56
2.25.1
57
58
diff view generated by jsdifflib
Deleted patch
1
Add libffi as a build requirement for TCI.
2
Add libffi to the dockerfiles to satisfy that requirement.
3
1
4
Construct an ffi_cif structure for each unique typemask.
5
Record the result in a separate hash table for later lookup;
6
this allows helper_table to stay const.
7
8
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/tcg.c | 58 +++++++++++++++++++
13
tcg/meson.build | 8 ++-
14
tests/docker/dockerfiles/alpine.docker | 1 +
15
tests/docker/dockerfiles/centos8.docker | 1 +
16
tests/docker/dockerfiles/debian10.docker | 1 +
17
.../dockerfiles/fedora-i386-cross.docker | 1 +
18
.../dockerfiles/fedora-win32-cross.docker | 1 +
19
.../dockerfiles/fedora-win64-cross.docker | 1 +
20
tests/docker/dockerfiles/fedora.docker | 1 +
21
tests/docker/dockerfiles/ubuntu.docker | 1 +
22
tests/docker/dockerfiles/ubuntu1804.docker | 1 +
23
tests/docker/dockerfiles/ubuntu2004.docker | 1 +
24
12 files changed, 75 insertions(+), 1 deletion(-)
25
26
diff --git a/tcg/tcg.c b/tcg/tcg.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/tcg.c
29
+++ b/tcg/tcg.c
30
@@ -XXX,XX +XXX,XX @@
31
#include "exec/log.h"
32
#include "tcg-internal.h"
33
34
+#ifdef CONFIG_TCG_INTERPRETER
35
+#include <ffi.h>
36
+#endif
37
+
38
/* Forward declarations for functions declared in tcg-target.c.inc and
39
used here. */
40
static void tcg_target_init(TCGContext *s);
41
@@ -XXX,XX +XXX,XX @@ static const TCGHelperInfo all_helpers[] = {
42
};
43
static GHashTable *helper_table;
44
45
+#ifdef CONFIG_TCG_INTERPRETER
46
+static GHashTable *ffi_table;
47
+
48
+static ffi_type * const typecode_to_ffi[8] = {
49
+ [dh_typecode_void] = &ffi_type_void,
50
+ [dh_typecode_i32] = &ffi_type_uint32,
51
+ [dh_typecode_s32] = &ffi_type_sint32,
52
+ [dh_typecode_i64] = &ffi_type_uint64,
53
+ [dh_typecode_s64] = &ffi_type_sint64,
54
+ [dh_typecode_ptr] = &ffi_type_pointer,
55
+};
56
+#endif
57
+
58
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
59
static void process_op_defs(TCGContext *s);
60
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
61
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
62
(gpointer)&all_helpers[i]);
63
}
64
65
+#ifdef CONFIG_TCG_INTERPRETER
66
+ /* g_direct_hash/equal for direct comparisons on uint32_t. */
67
+ ffi_table = g_hash_table_new(NULL, NULL);
68
+ for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
69
+ struct {
70
+ ffi_cif cif;
71
+ ffi_type *args[];
72
+ } *ca;
73
+ uint32_t typemask = all_helpers[i].typemask;
74
+ gpointer hash = (gpointer)(uintptr_t)typemask;
75
+ ffi_status status;
76
+ int nargs;
77
+
78
+ if (g_hash_table_lookup(ffi_table, hash)) {
79
+ continue;
80
+ }
81
+
82
+ /* Ignoring the return type, find the last non-zero field. */
83
+ nargs = 32 - clz32(typemask >> 3);
84
+ nargs = DIV_ROUND_UP(nargs, 3);
85
+
86
+ ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
87
+ ca->cif.rtype = typecode_to_ffi[typemask & 7];
88
+ ca->cif.nargs = nargs;
89
+
90
+ if (nargs != 0) {
91
+ ca->cif.arg_types = ca->args;
92
+ for (i = 0; i < nargs; ++i) {
93
+ int typecode = extract32(typemask, (i + 1) * 3, 3);
94
+ ca->args[i] = typecode_to_ffi[typecode];
95
+ }
96
+ }
97
+
98
+ status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
99
+ ca->cif.rtype, ca->cif.arg_types);
100
+ assert(status == FFI_OK);
101
+
102
+ g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
103
+ }
104
+#endif
105
+
106
tcg_target_init(s);
107
process_op_defs(s);
108
109
diff --git a/tcg/meson.build b/tcg/meson.build
110
index XXXXXXX..XXXXXXX 100644
111
--- a/tcg/meson.build
112
+++ b/tcg/meson.build
113
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
114
'tcg-op-gvec.c',
115
'tcg-op-vec.c',
116
))
117
-tcg_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c'))
118
+
119
+if get_option('tcg_interpreter')
120
+ libffi = dependency('libffi', version: '>=3.0', required: true,
121
+ method: 'pkg-config', kwargs: static_kwargs)
122
+ specific_ss.add(libffi)
123
+ specific_ss.add(files('tci.c'))
124
+endif
125
126
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
127
diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker
128
index XXXXXXX..XXXXXXX 100644
129
--- a/tests/docker/dockerfiles/alpine.docker
130
+++ b/tests/docker/dockerfiles/alpine.docker
131
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
132
    libaio-dev \
133
    libbpf-dev \
134
    libcap-ng-dev \
135
+    libffi-dev \
136
    libjpeg-turbo-dev \
137
    libnfs-dev \
138
    libpng-dev \
139
diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker
140
index XXXXXXX..XXXXXXX 100644
141
--- a/tests/docker/dockerfiles/centos8.docker
142
+++ b/tests/docker/dockerfiles/centos8.docker
143
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
144
libbpf-devel \
145
libepoxy-devel \
146
libfdt-devel \
147
+ libffi-devel \
148
libgcrypt-devel \
149
lzo-devel \
150
make \
151
diff --git a/tests/docker/dockerfiles/debian10.docker b/tests/docker/dockerfiles/debian10.docker
152
index XXXXXXX..XXXXXXX 100644
153
--- a/tests/docker/dockerfiles/debian10.docker
154
+++ b/tests/docker/dockerfiles/debian10.docker
155
@@ -XXX,XX +XXX,XX @@ RUN apt update && \
156
gdb-multiarch \
157
gettext \
158
git \
159
+ libffi-dev \
160
libncurses5-dev \
161
ninja-build \
162
pkg-config \
163
diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tests/docker/dockerfiles/fedora-i386-cross.docker
166
+++ b/tests/docker/dockerfiles/fedora-i386-cross.docker
167
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
168
findutils \
169
gcc \
170
git \
171
+ libffi-devel.i686 \
172
libtasn1-devel.i686 \
173
libzstd-devel.i686 \
174
make \
175
diff --git a/tests/docker/dockerfiles/fedora-win32-cross.docker b/tests/docker/dockerfiles/fedora-win32-cross.docker
176
index XXXXXXX..XXXXXXX 100644
177
--- a/tests/docker/dockerfiles/fedora-win32-cross.docker
178
+++ b/tests/docker/dockerfiles/fedora-win32-cross.docker
179
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
180
mingw32-gmp \
181
mingw32-gnutls \
182
mingw32-gtk3 \
183
+ mingw32-libffi \
184
mingw32-libjpeg-turbo \
185
mingw32-libpng \
186
mingw32-libtasn1 \
187
diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker
188
index XXXXXXX..XXXXXXX 100644
189
--- a/tests/docker/dockerfiles/fedora-win64-cross.docker
190
+++ b/tests/docker/dockerfiles/fedora-win64-cross.docker
191
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
192
mingw64-glib2 \
193
mingw64-gmp \
194
mingw64-gtk3 \
195
+ mingw64-libffi \
196
mingw64-libjpeg-turbo \
197
mingw64-libpng \
198
mingw64-libtasn1 \
199
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
200
index XXXXXXX..XXXXXXX 100644
201
--- a/tests/docker/dockerfiles/fedora.docker
202
+++ b/tests/docker/dockerfiles/fedora.docker
203
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
204
libepoxy-devel \
205
libfdt-devel \
206
libbpf-devel \
207
+ libffi-devel \
208
libiscsi-devel \
209
libjpeg-devel \
210
libpmem-devel \
211
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
212
index XXXXXXX..XXXXXXX 100644
213
--- a/tests/docker/dockerfiles/ubuntu.docker
214
+++ b/tests/docker/dockerfiles/ubuntu.docker
215
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
216
libdrm-dev \
217
libepoxy-dev \
218
libfdt-dev \
219
+ libffi-dev \
220
libgbm-dev \
221
libgnutls28-dev \
222
libgtk-3-dev \
223
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
224
index XXXXXXX..XXXXXXX 100644
225
--- a/tests/docker/dockerfiles/ubuntu1804.docker
226
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
227
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
228
libdrm-dev \
229
libepoxy-dev \
230
libfdt-dev \
231
+ libffi-dev \
232
libgbm-dev \
233
libgtk-3-dev \
234
libibverbs-dev \
235
diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker
236
index XXXXXXX..XXXXXXX 100644
237
--- a/tests/docker/dockerfiles/ubuntu2004.docker
238
+++ b/tests/docker/dockerfiles/ubuntu2004.docker
239
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
240
libdrm-dev \
241
libepoxy-dev \
242
libfdt-dev \
243
+ libffi-dev \
244
libgbm-dev \
245
libgtk-3-dev \
246
libibverbs-dev \
247
--
248
2.25.1
249
250
diff view generated by jsdifflib
Deleted patch
1
The current setting is much too pessimistic. Indicating only
2
the one or two registers that are actually assigned after a
3
call should avoid unnecessary movement between the register
4
array and the stack array.
5
1
6
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tci/tcg-target.c.inc | 10 ++++++++--
11
1 file changed, 8 insertions(+), 2 deletions(-)
12
13
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci/tcg-target.c.inc
16
+++ b/tcg/tci/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
18
tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
19
/* Registers available for 64 bit operations. */
20
tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
21
- /* TODO: Which registers should be set here? */
22
- tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1;
23
+ /*
24
+ * The interpreter "registers" are in the local stack frame and
25
+ * cannot be clobbered by the called helper functions. However,
26
+ * the interpreter assumes a 64-bit return value and assigns to
27
+ * the return value registers.
28
+ */
29
+ tcg_target_call_clobber_regs =
30
+ MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
31
32
s->reserved_regs = 0;
33
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
34
--
35
2.25.1
36
37
diff view generated by jsdifflib
Deleted patch
1
As the only call-clobbered regs for TCI, these should
2
receive the least priority.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tci/tcg-target.c.inc | 4 ++--
9
1 file changed, 2 insertions(+), 2 deletions(-)
10
11
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tci/tcg-target.c.inc
14
+++ b/tcg/tci/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
16
}
17
18
static const int tcg_target_reg_alloc_order[] = {
19
- TCG_REG_R0,
20
- TCG_REG_R1,
21
TCG_REG_R2,
22
TCG_REG_R3,
23
TCG_REG_R4,
24
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
25
TCG_REG_R13,
26
TCG_REG_R14,
27
TCG_REG_R15,
28
+ TCG_REG_R1,
29
+ TCG_REG_R0,
30
};
31
32
#if MAX_OPC_PARAM_IARGS != 6
33
--
34
2.25.1
35
36
diff view generated by jsdifflib
Deleted patch
1
We're about to adjust the offset range on host memory ops,
2
and the format of branches. Both will require a temporary.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tci/tcg-target.h | 1 +
9
tcg/tci/tcg-target.c.inc | 1 +
10
2 files changed, 2 insertions(+)
11
12
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tci/tcg-target.h
15
+++ b/tcg/tci/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
TCG_REG_R14,
18
TCG_REG_R15,
19
20
+ TCG_REG_TMP = TCG_REG_R13,
21
TCG_AREG0 = TCG_REG_R14,
22
TCG_REG_CALL_STACK = TCG_REG_R15,
23
} TCGReg;
24
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/tci/tcg-target.c.inc
27
+++ b/tcg/tci/tcg-target.c.inc
28
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
29
MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
30
31
s->reserved_regs = 0;
32
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
33
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
34
35
/* The call arguments come first, followed by the temp storage. */
36
--
37
2.25.1
38
39
diff view generated by jsdifflib
Deleted patch
1
These were already present in tcg-target.c.inc,
2
but not in the interpreter.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tci/tcg-target.h | 20 ++++++++++----------
9
tcg/tci.c | 40 ++++++++++++++++++++++++++++++++++++++++
10
2 files changed, 50 insertions(+), 10 deletions(-)
11
12
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tci/tcg-target.h
15
+++ b/tcg/tci/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@
17
#define TCG_TARGET_HAS_ext16s_i32 1
18
#define TCG_TARGET_HAS_ext8u_i32 1
19
#define TCG_TARGET_HAS_ext16u_i32 1
20
-#define TCG_TARGET_HAS_andc_i32 0
21
+#define TCG_TARGET_HAS_andc_i32 1
22
#define TCG_TARGET_HAS_deposit_i32 1
23
#define TCG_TARGET_HAS_extract_i32 0
24
#define TCG_TARGET_HAS_sextract_i32 0
25
#define TCG_TARGET_HAS_extract2_i32 0
26
-#define TCG_TARGET_HAS_eqv_i32 0
27
-#define TCG_TARGET_HAS_nand_i32 0
28
-#define TCG_TARGET_HAS_nor_i32 0
29
+#define TCG_TARGET_HAS_eqv_i32 1
30
+#define TCG_TARGET_HAS_nand_i32 1
31
+#define TCG_TARGET_HAS_nor_i32 1
32
#define TCG_TARGET_HAS_clz_i32 0
33
#define TCG_TARGET_HAS_ctz_i32 0
34
#define TCG_TARGET_HAS_ctpop_i32 0
35
#define TCG_TARGET_HAS_neg_i32 1
36
#define TCG_TARGET_HAS_not_i32 1
37
-#define TCG_TARGET_HAS_orc_i32 0
38
+#define TCG_TARGET_HAS_orc_i32 1
39
#define TCG_TARGET_HAS_rot_i32 1
40
#define TCG_TARGET_HAS_movcond_i32 1
41
#define TCG_TARGET_HAS_muls2_i32 0
42
@@ -XXX,XX +XXX,XX @@
43
#define TCG_TARGET_HAS_ext8u_i64 1
44
#define TCG_TARGET_HAS_ext16u_i64 1
45
#define TCG_TARGET_HAS_ext32u_i64 1
46
-#define TCG_TARGET_HAS_andc_i64 0
47
-#define TCG_TARGET_HAS_eqv_i64 0
48
-#define TCG_TARGET_HAS_nand_i64 0
49
-#define TCG_TARGET_HAS_nor_i64 0
50
+#define TCG_TARGET_HAS_andc_i64 1
51
+#define TCG_TARGET_HAS_eqv_i64 1
52
+#define TCG_TARGET_HAS_nand_i64 1
53
+#define TCG_TARGET_HAS_nor_i64 1
54
#define TCG_TARGET_HAS_clz_i64 0
55
#define TCG_TARGET_HAS_ctz_i64 0
56
#define TCG_TARGET_HAS_ctpop_i64 0
57
#define TCG_TARGET_HAS_neg_i64 1
58
#define TCG_TARGET_HAS_not_i64 1
59
-#define TCG_TARGET_HAS_orc_i64 0
60
+#define TCG_TARGET_HAS_orc_i64 1
61
#define TCG_TARGET_HAS_rot_i64 1
62
#define TCG_TARGET_HAS_movcond_i64 1
63
#define TCG_TARGET_HAS_muls2_i64 0
64
diff --git a/tcg/tci.c b/tcg/tci.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/tci.c
67
+++ b/tcg/tci.c
68
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
69
tci_args_rrr(insn, &r0, &r1, &r2);
70
regs[r0] = regs[r1] ^ regs[r2];
71
break;
72
+#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64
73
+ CASE_32_64(andc)
74
+ tci_args_rrr(insn, &r0, &r1, &r2);
75
+ regs[r0] = regs[r1] & ~regs[r2];
76
+ break;
77
+#endif
78
+#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64
79
+ CASE_32_64(orc)
80
+ tci_args_rrr(insn, &r0, &r1, &r2);
81
+ regs[r0] = regs[r1] | ~regs[r2];
82
+ break;
83
+#endif
84
+#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64
85
+ CASE_32_64(eqv)
86
+ tci_args_rrr(insn, &r0, &r1, &r2);
87
+ regs[r0] = ~(regs[r1] ^ regs[r2]);
88
+ break;
89
+#endif
90
+#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64
91
+ CASE_32_64(nand)
92
+ tci_args_rrr(insn, &r0, &r1, &r2);
93
+ regs[r0] = ~(regs[r1] & regs[r2]);
94
+ break;
95
+#endif
96
+#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64
97
+ CASE_32_64(nor)
98
+ tci_args_rrr(insn, &r0, &r1, &r2);
99
+ regs[r0] = ~(regs[r1] | regs[r2]);
100
+ break;
101
+#endif
102
103
/* Arithmetic operations (32 bit). */
104
105
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
106
case INDEX_op_or_i64:
107
case INDEX_op_xor_i32:
108
case INDEX_op_xor_i64:
109
+ case INDEX_op_andc_i32:
110
+ case INDEX_op_andc_i64:
111
+ case INDEX_op_orc_i32:
112
+ case INDEX_op_orc_i64:
113
+ case INDEX_op_eqv_i32:
114
+ case INDEX_op_eqv_i64:
115
+ case INDEX_op_nand_i32:
116
+ case INDEX_op_nand_i64:
117
+ case INDEX_op_nor_i32:
118
+ case INDEX_op_nor_i64:
119
case INDEX_op_div_i32:
120
case INDEX_op_div_i64:
121
case INDEX_op_rem_i32:
122
--
123
2.25.1
124
125
diff view generated by jsdifflib
Deleted patch
1
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/tci/tcg-target.h | 12 +++++------
6
tcg/tci.c | 44 ++++++++++++++++++++++++++++++++++++++++
7
tcg/tci/tcg-target.c.inc | 9 ++++++++
8
3 files changed, 59 insertions(+), 6 deletions(-)
9
1
10
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tci/tcg-target.h
13
+++ b/tcg/tci/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@
15
#define TCG_TARGET_HAS_eqv_i32 1
16
#define TCG_TARGET_HAS_nand_i32 1
17
#define TCG_TARGET_HAS_nor_i32 1
18
-#define TCG_TARGET_HAS_clz_i32 0
19
-#define TCG_TARGET_HAS_ctz_i32 0
20
-#define TCG_TARGET_HAS_ctpop_i32 0
21
+#define TCG_TARGET_HAS_clz_i32 1
22
+#define TCG_TARGET_HAS_ctz_i32 1
23
+#define TCG_TARGET_HAS_ctpop_i32 1
24
#define TCG_TARGET_HAS_neg_i32 1
25
#define TCG_TARGET_HAS_not_i32 1
26
#define TCG_TARGET_HAS_orc_i32 1
27
@@ -XXX,XX +XXX,XX @@
28
#define TCG_TARGET_HAS_eqv_i64 1
29
#define TCG_TARGET_HAS_nand_i64 1
30
#define TCG_TARGET_HAS_nor_i64 1
31
-#define TCG_TARGET_HAS_clz_i64 0
32
-#define TCG_TARGET_HAS_ctz_i64 0
33
-#define TCG_TARGET_HAS_ctpop_i64 0
34
+#define TCG_TARGET_HAS_clz_i64 1
35
+#define TCG_TARGET_HAS_ctz_i64 1
36
+#define TCG_TARGET_HAS_ctpop_i64 1
37
#define TCG_TARGET_HAS_neg_i64 1
38
#define TCG_TARGET_HAS_not_i64 1
39
#define TCG_TARGET_HAS_orc_i64 1
40
diff --git a/tcg/tci.c b/tcg/tci.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/tci.c
43
+++ b/tcg/tci.c
44
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
45
tci_args_rrr(insn, &r0, &r1, &r2);
46
regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2];
47
break;
48
+#if TCG_TARGET_HAS_clz_i32
49
+ case INDEX_op_clz_i32:
50
+ tci_args_rrr(insn, &r0, &r1, &r2);
51
+ tmp32 = regs[r1];
52
+ regs[r0] = tmp32 ? clz32(tmp32) : regs[r2];
53
+ break;
54
+#endif
55
+#if TCG_TARGET_HAS_ctz_i32
56
+ case INDEX_op_ctz_i32:
57
+ tci_args_rrr(insn, &r0, &r1, &r2);
58
+ tmp32 = regs[r1];
59
+ regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2];
60
+ break;
61
+#endif
62
+#if TCG_TARGET_HAS_ctpop_i32
63
+ case INDEX_op_ctpop_i32:
64
+ tci_args_rr(insn, &r0, &r1);
65
+ regs[r0] = ctpop32(regs[r1]);
66
+ break;
67
+#endif
68
69
/* Shift/rotate operations (32 bit). */
70
71
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
72
tci_args_rrr(insn, &r0, &r1, &r2);
73
regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2];
74
break;
75
+#if TCG_TARGET_HAS_clz_i64
76
+ case INDEX_op_clz_i64:
77
+ tci_args_rrr(insn, &r0, &r1, &r2);
78
+ regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2];
79
+ break;
80
+#endif
81
+#if TCG_TARGET_HAS_ctz_i64
82
+ case INDEX_op_ctz_i64:
83
+ tci_args_rrr(insn, &r0, &r1, &r2);
84
+ regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
85
+ break;
86
+#endif
87
+#if TCG_TARGET_HAS_ctpop_i64
88
+ case INDEX_op_ctpop_i64:
89
+ tci_args_rr(insn, &r0, &r1);
90
+ regs[r0] = ctpop64(regs[r1]);
91
+ break;
92
+#endif
93
94
/* Shift/rotate operations (64 bit). */
95
96
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
97
case INDEX_op_not_i64:
98
case INDEX_op_neg_i32:
99
case INDEX_op_neg_i64:
100
+ case INDEX_op_ctpop_i32:
101
+ case INDEX_op_ctpop_i64:
102
tci_args_rr(insn, &r0, &r1);
103
info->fprintf_func(info->stream, "%-12s %s, %s",
104
op_name, str_r(r0), str_r(r1));
105
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
106
case INDEX_op_rotl_i64:
107
case INDEX_op_rotr_i32:
108
case INDEX_op_rotr_i64:
109
+ case INDEX_op_clz_i32:
110
+ case INDEX_op_clz_i64:
111
+ case INDEX_op_ctz_i32:
112
+ case INDEX_op_ctz_i64:
113
tci_args_rrr(insn, &r0, &r1, &r2);
114
info->fprintf_func(info->stream, "%-12s %s, %s, %s",
115
op_name, str_r(r0), str_r(r1), str_r(r2));
116
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/tci/tcg-target.c.inc
119
+++ b/tcg/tci/tcg-target.c.inc
120
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
121
case INDEX_op_extract_i64:
122
case INDEX_op_sextract_i32:
123
case INDEX_op_sextract_i64:
124
+ case INDEX_op_ctpop_i32:
125
+ case INDEX_op_ctpop_i64:
126
return C_O1_I1(r, r);
127
128
case INDEX_op_st8_i32:
129
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
130
case INDEX_op_setcond_i64:
131
case INDEX_op_deposit_i32:
132
case INDEX_op_deposit_i64:
133
+ case INDEX_op_clz_i32:
134
+ case INDEX_op_clz_i64:
135
+ case INDEX_op_ctz_i32:
136
+ case INDEX_op_ctz_i64:
137
return C_O1_I2(r, r, r);
138
139
case INDEX_op_brcond_i32:
140
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
141
CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */
142
CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */
143
CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */
144
+ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */
145
+ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */
146
tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
147
break;
148
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
150
CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */
151
CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */
152
CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */
153
+ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */
154
tcg_out_op_rr(s, opc, args[0], args[1]);
155
break;
156
157
--
158
2.25.1
159
160
diff view generated by jsdifflib
Deleted patch
1
We already had mulu2_i32 for a 32-bit host; expand this to 64-bit
2
hosts as well. The muls2_i32 and the 64-bit opcodes are new.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tci/tcg-target.h | 8 ++++----
9
tcg/tci.c | 35 +++++++++++++++++++++++++++++------
10
tcg/tci/tcg-target.c.inc | 16 ++++++++++------
11
3 files changed, 43 insertions(+), 16 deletions(-)
12
13
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci/tcg-target.h
16
+++ b/tcg/tci/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@
18
#define TCG_TARGET_HAS_orc_i32 1
19
#define TCG_TARGET_HAS_rot_i32 1
20
#define TCG_TARGET_HAS_movcond_i32 1
21
-#define TCG_TARGET_HAS_muls2_i32 0
22
+#define TCG_TARGET_HAS_muls2_i32 1
23
#define TCG_TARGET_HAS_muluh_i32 0
24
#define TCG_TARGET_HAS_mulsh_i32 0
25
#define TCG_TARGET_HAS_goto_ptr 1
26
@@ -XXX,XX +XXX,XX @@
27
#define TCG_TARGET_HAS_orc_i64 1
28
#define TCG_TARGET_HAS_rot_i64 1
29
#define TCG_TARGET_HAS_movcond_i64 1
30
-#define TCG_TARGET_HAS_muls2_i64 0
31
+#define TCG_TARGET_HAS_muls2_i64 1
32
#define TCG_TARGET_HAS_add2_i32 0
33
#define TCG_TARGET_HAS_sub2_i32 0
34
-#define TCG_TARGET_HAS_mulu2_i32 0
35
+#define TCG_TARGET_HAS_mulu2_i32 1
36
#define TCG_TARGET_HAS_add2_i64 0
37
#define TCG_TARGET_HAS_sub2_i64 0
38
-#define TCG_TARGET_HAS_mulu2_i64 0
39
+#define TCG_TARGET_HAS_mulu2_i64 1
40
#define TCG_TARGET_HAS_muluh_i64 0
41
#define TCG_TARGET_HAS_mulsh_i64 0
42
#else
43
diff --git a/tcg/tci.c b/tcg/tci.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/tci.c
46
+++ b/tcg/tci.c
47
@@ -XXX,XX +XXX,XX @@ __thread uintptr_t tci_tb_ptr;
48
static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
49
uint32_t low_index, uint64_t value)
50
{
51
- regs[low_index] = value;
52
+ regs[low_index] = (uint32_t)value;
53
regs[high_index] = value >> 32;
54
}
55
56
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
57
*r4 = extract32(insn, 24, 4);
58
}
59
60
-#if TCG_TARGET_REG_BITS == 32
61
static void tci_args_rrrr(uint32_t insn,
62
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
63
{
64
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(uint32_t insn,
65
*r2 = extract32(insn, 16, 4);
66
*r3 = extract32(insn, 20, 4);
67
}
68
-#endif
69
70
static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
71
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
72
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
73
T2 = tci_uint64(regs[r5], regs[r4]);
74
tci_write_reg64(regs, r1, r0, T1 - T2);
75
break;
76
+#endif /* TCG_TARGET_REG_BITS == 32 */
77
+#if TCG_TARGET_HAS_mulu2_i32
78
case INDEX_op_mulu2_i32:
79
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
80
- tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]);
81
+ tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
82
+ tci_write_reg64(regs, r1, r0, tmp64);
83
break;
84
-#endif /* TCG_TARGET_REG_BITS == 32 */
85
+#endif
86
+#if TCG_TARGET_HAS_muls2_i32
87
+ case INDEX_op_muls2_i32:
88
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
89
+ tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
90
+ tci_write_reg64(regs, r1, r0, tmp64);
91
+ break;
92
+#endif
93
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
94
CASE_32_64(ext8s)
95
tci_args_rr(insn, &r0, &r1);
96
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
97
regs[r0] = ctpop64(regs[r1]);
98
break;
99
#endif
100
+#if TCG_TARGET_HAS_mulu2_i64
101
+ case INDEX_op_mulu2_i64:
102
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
103
+ mulu64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
104
+ break;
105
+#endif
106
+#if TCG_TARGET_HAS_muls2_i64
107
+ case INDEX_op_muls2_i64:
108
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
109
+ muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
110
+ break;
111
+#endif
112
113
/* Shift/rotate operations (64 bit). */
114
115
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
116
str_r(r3), str_r(r4), str_c(c));
117
break;
118
119
-#if TCG_TARGET_REG_BITS == 32
120
case INDEX_op_mulu2_i32:
121
+ case INDEX_op_mulu2_i64:
122
+ case INDEX_op_muls2_i32:
123
+ case INDEX_op_muls2_i64:
124
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
125
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
126
op_name, str_r(r0), str_r(r1),
127
str_r(r2), str_r(r3));
128
break;
129
130
+#if TCG_TARGET_REG_BITS == 32
131
case INDEX_op_add2_i32:
132
case INDEX_op_sub2_i32:
133
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
134
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
135
index XXXXXXX..XXXXXXX 100644
136
--- a/tcg/tci/tcg-target.c.inc
137
+++ b/tcg/tci/tcg-target.c.inc
138
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
139
return C_O2_I4(r, r, r, r, r, r);
140
case INDEX_op_brcond2_i32:
141
return C_O0_I4(r, r, r, r);
142
- case INDEX_op_mulu2_i32:
143
- return C_O2_I2(r, r, r, r);
144
#endif
145
146
+ case INDEX_op_mulu2_i32:
147
+ case INDEX_op_mulu2_i64:
148
+ case INDEX_op_muls2_i32:
149
+ case INDEX_op_muls2_i64:
150
+ return C_O2_I2(r, r, r, r);
151
+
152
case INDEX_op_movcond_i32:
153
case INDEX_op_movcond_i64:
154
case INDEX_op_setcond2_i32:
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
156
tcg_out32(s, insn);
157
}
158
159
-#if TCG_TARGET_REG_BITS == 32
160
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
161
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
162
{
163
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
164
insn = deposit32(insn, 20, 4, r3);
165
tcg_out32(s, insn);
166
}
167
-#endif
168
169
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
170
TCGReg r0, TCGReg r1, TCGReg r2,
171
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
172
args[0], args[1], args[2], args[3], args[4]);
173
tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
174
break;
175
- case INDEX_op_mulu2_i32:
176
+#endif
177
+
178
+ CASE_32_64(mulu2)
179
+ CASE_32_64(muls2)
180
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
181
break;
182
-#endif
183
184
case INDEX_op_qemu_ld_i32:
185
case INDEX_op_qemu_st_i32:
186
--
187
2.25.1
188
189
diff view generated by jsdifflib
1
We already had the 32-bit versions for a 32-bit host; expand this
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
to 64-bit hosts as well. The 64-bit opcodes are new.
3
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/tci/tcg-target.h | 8 ++++----
4
target/s390x/mem_helper.c | 40 +++++++++++++++++++--------------------
9
tcg/tci.c | 40 ++++++++++++++++++++++++++--------------
5
target/s390x/translate.c | 25 +++++++++++++++++-------
10
tcg/tci/tcg-target.c.inc | 15 ++++++++-------
6
2 files changed, 38 insertions(+), 27 deletions(-)
11
3 files changed, 38 insertions(+), 25 deletions(-)
12
7
13
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
8
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
14
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci/tcg-target.h
10
--- a/target/s390x/mem_helper.c
16
+++ b/tcg/tci/tcg-target.h
11
+++ b/target/s390x/mem_helper.c
12
@@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
13
Int128 oldv;
14
bool fail;
15
16
- if (!HAVE_CMPXCHG128) {
17
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
18
- }
19
+ assert(HAVE_CMPXCHG128);
20
21
mem_idx = cpu_mmu_index(env, false);
22
oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
23
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
24
{
25
uintptr_t ra = GETPC();
26
uint64_t hi, lo;
27
+ int mem_idx;
28
+ TCGMemOpIdx oi;
29
+ Int128 v;
30
31
- if (HAVE_ATOMIC128) {
32
- int mem_idx = cpu_mmu_index(env, false);
33
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
34
- Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
35
- hi = int128_gethi(v);
36
- lo = int128_getlo(v);
37
- } else {
38
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
39
- }
40
+ assert(HAVE_ATOMIC128);
41
+
42
+ mem_idx = cpu_mmu_index(env, false);
43
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
44
+ v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
45
+ hi = int128_gethi(v);
46
+ lo = int128_getlo(v);
47
48
env->retxl = lo;
49
return hi;
50
@@ -XXX,XX +XXX,XX @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
51
uint64_t low, uint64_t high)
52
{
53
uintptr_t ra = GETPC();
54
+ int mem_idx;
55
+ TCGMemOpIdx oi;
56
+ Int128 v;
57
58
- if (HAVE_ATOMIC128) {
59
- int mem_idx = cpu_mmu_index(env, false);
60
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
61
- Int128 v = int128_make128(low, high);
62
- helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
63
- } else {
64
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
65
- }
66
+ assert(HAVE_ATOMIC128);
67
+
68
+ mem_idx = cpu_mmu_index(env, false);
69
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
70
+ v = int128_make128(low, high);
71
+ helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
72
}
73
74
/* Execute instruction. This instruction executes an insn modified with
75
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/s390x/translate.c
78
+++ b/target/s390x/translate.c
17
@@ -XXX,XX +XXX,XX @@
79
@@ -XXX,XX +XXX,XX @@
18
#define TCG_TARGET_HAS_rot_i64 1
80
#include "trace-tcg.h"
19
#define TCG_TARGET_HAS_movcond_i64 1
81
#include "exec/translator.h"
20
#define TCG_TARGET_HAS_muls2_i64 1
82
#include "exec/log.h"
21
-#define TCG_TARGET_HAS_add2_i32 0
83
+#include "qemu/atomic128.h"
22
-#define TCG_TARGET_HAS_sub2_i32 0
84
23
+#define TCG_TARGET_HAS_add2_i32 1
85
24
+#define TCG_TARGET_HAS_sub2_i32 1
86
/* Information that (most) every instruction needs to manipulate. */
25
#define TCG_TARGET_HAS_mulu2_i32 1
87
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
26
-#define TCG_TARGET_HAS_add2_i64 0
88
int r3 = get_field(s->fields, r3);
27
-#define TCG_TARGET_HAS_sub2_i64 0
89
int d2 = get_field(s->fields, d2);
28
+#define TCG_TARGET_HAS_add2_i64 1
90
int b2 = get_field(s->fields, b2);
29
+#define TCG_TARGET_HAS_sub2_i64 1
91
+ DisasJumpType ret = DISAS_NEXT;
30
#define TCG_TARGET_HAS_mulu2_i64 1
92
TCGv_i64 addr;
31
#define TCG_TARGET_HAS_muluh_i64 0
93
TCGv_i32 t_r1, t_r3;
32
#define TCG_TARGET_HAS_mulsh_i64 0
94
33
diff --git a/tcg/tci.c b/tcg/tci.c
95
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
34
index XXXXXXX..XXXXXXX 100644
96
addr = get_address(s, 0, b2, d2);
35
--- a/tcg/tci.c
97
t_r1 = tcg_const_i32(r1);
36
+++ b/tcg/tci.c
98
t_r3 = tcg_const_i32(r3);
37
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
99
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
38
*c5 = extract32(insn, 28, 4);
100
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
101
+ gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
102
+ } else if (HAVE_CMPXCHG128) {
103
gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
104
} else {
105
- gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
106
+ gen_helper_exit_atomic(cpu_env);
107
+ ret = DISAS_NORETURN;
108
}
109
tcg_temp_free_i64(addr);
110
tcg_temp_free_i32(t_r1);
111
tcg_temp_free_i32(t_r3);
112
113
set_cc_static(s);
114
- return DISAS_NEXT;
115
+ return ret;
39
}
116
}
40
117
41
-#if TCG_TARGET_REG_BITS == 32
118
static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
42
static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
119
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
43
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
120
121
static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
44
{
122
{
45
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
123
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
46
*r4 = extract32(insn, 24, 4);
124
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
47
*r5 = extract32(insn, 28, 4);
125
+ gen_helper_lpq(o->out, cpu_env, o->in2);
126
+ } else if (HAVE_ATOMIC128) {
127
gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
128
} else {
129
- gen_helper_lpq(o->out, cpu_env, o->in2);
130
+ gen_helper_exit_atomic(cpu_env);
131
+ return DISAS_NORETURN;
132
}
133
return_low128(o->out2);
134
return DISAS_NEXT;
135
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
136
137
static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
138
{
139
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
140
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
141
+ gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
142
+ } else if (HAVE_ATOMIC128) {
143
gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
144
} else {
145
- gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
146
+ gen_helper_exit_atomic(cpu_env);
147
+ return DISAS_NORETURN;
148
}
149
return DISAS_NEXT;
48
}
150
}
49
-#endif
50
51
static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
52
{
53
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
54
for (;;) {
55
uint32_t insn;
56
TCGOpcode opc;
57
- TCGReg r0, r1, r2, r3, r4;
58
+ TCGReg r0, r1, r2, r3, r4, r5;
59
tcg_target_ulong t1;
60
TCGCond condition;
61
target_ulong taddr;
62
uint8_t pos, len;
63
uint32_t tmp32;
64
uint64_t tmp64;
65
-#if TCG_TARGET_REG_BITS == 32
66
- TCGReg r5;
67
uint64_t T1, T2;
68
-#endif
69
TCGMemOpIdx oi;
70
int32_t ofs;
71
void *ptr;
72
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
73
tb_ptr = ptr;
74
}
75
break;
76
-#if TCG_TARGET_REG_BITS == 32
77
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
78
case INDEX_op_add2_i32:
79
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
80
T1 = tci_uint64(regs[r3], regs[r2]);
81
T2 = tci_uint64(regs[r5], regs[r4]);
82
tci_write_reg64(regs, r1, r0, T1 + T2);
83
break;
84
+#endif
85
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
86
case INDEX_op_sub2_i32:
87
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
88
T1 = tci_uint64(regs[r3], regs[r2]);
89
T2 = tci_uint64(regs[r5], regs[r4]);
90
tci_write_reg64(regs, r1, r0, T1 - T2);
91
break;
92
-#endif /* TCG_TARGET_REG_BITS == 32 */
93
+#endif
94
#if TCG_TARGET_HAS_mulu2_i32
95
case INDEX_op_mulu2_i32:
96
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
97
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
98
muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
99
break;
100
#endif
101
+#if TCG_TARGET_HAS_add2_i64
102
+ case INDEX_op_add2_i64:
103
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
104
+ T1 = regs[r2] + regs[r4];
105
+ T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
106
+ regs[r0] = T1;
107
+ regs[r1] = T2;
108
+ break;
109
+#endif
110
+#if TCG_TARGET_HAS_add2_i64
111
+ case INDEX_op_sub2_i64:
112
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
113
+ T1 = regs[r2] - regs[r4];
114
+ T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
115
+ regs[r0] = T1;
116
+ regs[r1] = T2;
117
+ break;
118
+#endif
119
120
/* Shift/rotate operations (64 bit). */
121
122
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
123
const char *op_name;
124
uint32_t insn;
125
TCGOpcode op;
126
- TCGReg r0, r1, r2, r3, r4;
127
-#if TCG_TARGET_REG_BITS == 32
128
- TCGReg r5;
129
-#endif
130
+ TCGReg r0, r1, r2, r3, r4, r5;
131
tcg_target_ulong i1;
132
int32_t s2;
133
TCGCond c;
134
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
135
str_r(r2), str_r(r3));
136
break;
137
138
-#if TCG_TARGET_REG_BITS == 32
139
case INDEX_op_add2_i32:
140
+ case INDEX_op_add2_i64:
141
case INDEX_op_sub2_i32:
142
+ case INDEX_op_sub2_i64:
143
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
144
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
145
op_name, str_r(r0), str_r(r1), str_r(r2),
146
str_r(r3), str_r(r4), str_r(r5));
147
break;
148
-#endif
149
150
case INDEX_op_qemu_ld_i64:
151
case INDEX_op_qemu_st_i64:
152
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
153
index XXXXXXX..XXXXXXX 100644
154
--- a/tcg/tci/tcg-target.c.inc
155
+++ b/tcg/tci/tcg-target.c.inc
156
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
157
case INDEX_op_brcond_i64:
158
return C_O0_I2(r, r);
159
160
-#if TCG_TARGET_REG_BITS == 32
161
- /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
162
case INDEX_op_add2_i32:
163
+ case INDEX_op_add2_i64:
164
case INDEX_op_sub2_i32:
165
+ case INDEX_op_sub2_i64:
166
return C_O2_I4(r, r, r, r, r, r);
167
+
168
+#if TCG_TARGET_REG_BITS == 32
169
case INDEX_op_brcond2_i32:
170
return C_O0_I4(r, r, r, r);
171
#endif
172
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
173
tcg_out32(s, insn);
174
}
175
176
-#if TCG_TARGET_REG_BITS == 32
177
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
178
TCGReg r0, TCGReg r1, TCGReg r2,
179
TCGReg r3, TCGReg r4, TCGReg r5)
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
181
insn = deposit32(insn, 28, 4, r5);
182
tcg_out32(s, insn);
183
}
184
-#endif
185
186
static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
187
TCGReg base, intptr_t offset)
188
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
189
tcg_out_op_rr(s, opc, args[0], args[1]);
190
break;
191
192
-#if TCG_TARGET_REG_BITS == 32
193
- case INDEX_op_add2_i32:
194
- case INDEX_op_sub2_i32:
195
+ CASE_32_64(add2)
196
+ CASE_32_64(sub2)
197
tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
198
args[3], args[4], args[5]);
199
break;
200
+
201
+#if TCG_TARGET_REG_BITS == 32
202
case INDEX_op_brcond2_i32:
203
tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
204
args[0], args[1], args[2], args[3], args[4]);
205
--
151
--
206
2.25.1
152
2.17.2
207
153
208
154
diff view generated by jsdifflib
1
This reverts commit dc09f047eddec8f4a1991c4f5f4a428d7aa3f2c0.
1
From: "Emilio G. Cota" <cota@braap.org>
2
2
3
For tcg, tracepoints are expanded inline in tcg opcodes.
3
Updates can come from other threads, so readers that do not
4
Using a helper which generates a second tracepoint is incorrect.
4
take tlb_lock must use atomic_read to avoid undefined
5
5
behaviour (UB).
6
For system mode, the extraction and re-packing of MemOp and mmu_idx
6
7
lost the alignment information from MemOp. So we were no longer
7
This completes the conversion to tlb_lock. This conversion results
8
raising alignment exceptions for !TARGET_ALIGNED_ONLY guests.
8
on average in no performance loss, as the following experiments
9
This can be seen in tests/tcg/xtensa/test_load_store.S.
9
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
10
10
11
For user mode, we must update to the new signature of g2h() so that
11
1. aarch64 bootup+shutdown test:
12
the revert compiles. We can leave set_helper_retaddr for later.
12
13
13
- Before:
14
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
15
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
15
16
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
17
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
18
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
19
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
20
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
21
22
7.504481349 seconds time elapsed ( +- 0.14% )
23
24
- After:
25
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
26
27
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
28
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
29
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
30
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
31
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
32
33
7.474970463 seconds time elapsed ( +- 0.07% )
34
35
2. SPEC06int:
36
SPEC06int (test set)
37
[Y axis: Speedup over master]
38
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
39
| |
40
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
41
| +++ | +++ tlb-lock-v3 (spinl|ck) |
42
| +++ | | +++ +++ | | |
43
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
44
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
45
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
46
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
47
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
48
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
49
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
50
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
51
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
52
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
53
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
54
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
55
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
56
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
57
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
58
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
59
60
png: https://imgur.com/a/BHzpPTW
61
62
Notes:
63
- tlb-lock-v2 corresponds to an implementation with a mutex.
64
- tlb-lock-v3 corresponds to the current implementation, i.e.
65
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
66
67
Signed-off-by: Emilio G. Cota <cota@braap.org>
68
Message-Id: <20181016153840.25877-1-cota@braap.org>
16
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
69
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
---
70
---
18
tcg/tci.c | 73 ++++++++++++++++++++++++++++++++++---------------------
71
accel/tcg/softmmu_template.h | 12 ++++++------
19
1 file changed, 45 insertions(+), 28 deletions(-)
72
include/exec/cpu_ldst.h | 11 ++++++++++-
20
73
include/exec/cpu_ldst_template.h | 2 +-
21
diff --git a/tcg/tci.c b/tcg/tci.c
74
accel/tcg/cputlb.c | 19 +++++++++++++------
22
index XXXXXXX..XXXXXXX 100644
75
4 files changed, 30 insertions(+), 14 deletions(-)
23
--- a/tcg/tci.c
76
24
+++ b/tcg/tci.c
77
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
25
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
78
index XXXXXXX..XXXXXXX 100644
26
return result;
79
--- a/accel/tcg/softmmu_template.h
80
+++ b/accel/tcg/softmmu_template.h
81
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
82
uintptr_t mmu_idx = get_mmuidx(oi);
83
uintptr_t index = tlb_index(env, mmu_idx, addr);
84
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
85
- target_ulong tlb_addr = entry->addr_write;
86
+ target_ulong tlb_addr = tlb_addr_write(entry);
87
unsigned a_bits = get_alignment_bits(get_memop(oi));
88
uintptr_t haddr;
89
90
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
91
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
92
mmu_idx, retaddr);
93
}
94
- tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
95
+ tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
96
}
97
98
/* Handle an IO access. */
99
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
100
cannot evict the first. */
101
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
102
entry2 = tlb_entry(env, mmu_idx, page2);
103
- if (!tlb_hit_page(entry2->addr_write, page2)
104
+ if (!tlb_hit_page(tlb_addr_write(entry2), page2)
105
&& !VICTIM_TLB_HIT(addr_write, page2)) {
106
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
107
mmu_idx, retaddr);
108
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
109
uintptr_t mmu_idx = get_mmuidx(oi);
110
uintptr_t index = tlb_index(env, mmu_idx, addr);
111
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
112
- target_ulong tlb_addr = entry->addr_write;
113
+ target_ulong tlb_addr = tlb_addr_write(entry);
114
unsigned a_bits = get_alignment_bits(get_memop(oi));
115
uintptr_t haddr;
116
117
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
118
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
119
mmu_idx, retaddr);
120
}
121
- tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
122
+ tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
123
}
124
125
/* Handle an IO access. */
126
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
127
cannot evict the first. */
128
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
129
entry2 = tlb_entry(env, mmu_idx, page2);
130
- if (!tlb_hit_page(entry2->addr_write, page2)
131
+ if (!tlb_hit_page(tlb_addr_write(entry2), page2)
132
&& !VICTIM_TLB_HIT(addr_write, page2)) {
133
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
134
mmu_idx, retaddr);
135
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
136
index XXXXXXX..XXXXXXX 100644
137
--- a/include/exec/cpu_ldst.h
138
+++ b/include/exec/cpu_ldst.h
139
@@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr;
140
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
141
#include "tcg.h"
142
143
+static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
144
+{
145
+#if TCG_OVERSIZED_GUEST
146
+ return entry->addr_write;
147
+#else
148
+ return atomic_read(&entry->addr_write);
149
+#endif
150
+}
151
+
152
/* Find the TLB index corresponding to the mmu_idx + address pair. */
153
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
154
target_ulong addr)
155
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
156
tlb_addr = tlbentry->addr_read;
157
break;
158
case 1:
159
- tlb_addr = tlbentry->addr_write;
160
+ tlb_addr = tlb_addr_write(tlbentry);
161
break;
162
case 2:
163
tlb_addr = tlbentry->addr_code;
164
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
165
index XXXXXXX..XXXXXXX 100644
166
--- a/include/exec/cpu_ldst_template.h
167
+++ b/include/exec/cpu_ldst_template.h
168
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
169
addr = ptr;
170
mmu_idx = CPU_MMU_INDEX;
171
entry = tlb_entry(env, mmu_idx, addr);
172
- if (unlikely(entry->addr_write !=
173
+ if (unlikely(tlb_addr_write(entry) !=
174
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
175
oi = make_memop_idx(SHIFT, mmu_idx);
176
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
177
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
178
index XXXXXXX..XXXXXXX 100644
179
--- a/accel/tcg/cputlb.c
180
+++ b/accel/tcg/cputlb.c
181
@@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
182
target_ulong page)
183
{
184
return tlb_hit_page(tlb_entry->addr_read, page) ||
185
- tlb_hit_page(tlb_entry->addr_write, page) ||
186
+ tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
187
tlb_hit_page(tlb_entry->addr_code, page);
27
}
188
}
28
189
29
-#define qemu_ld_ub \
190
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
30
- cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
191
tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
31
-#define qemu_ld_leuw \
192
32
- cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
193
entry = tlb_entry(env, mmu_idx, addr);
33
-#define qemu_ld_leul \
194
- tlb_addr = entry->addr_write;
34
- cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
195
+ tlb_addr = tlb_addr_write(entry);
35
-#define qemu_ld_leq \
196
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
36
- cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
197
/* RAM access */
37
-#define qemu_ld_beuw \
198
uintptr_t haddr = addr + entry->addend;
38
- cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
199
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
39
-#define qemu_ld_beul \
200
assert_cpu_is_self(ENV_GET_CPU(env));
40
- cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
201
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
41
-#define qemu_ld_beq \
202
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
42
- cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
203
- target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
43
-#define qemu_st_b(X) \
204
+ target_ulong cmp;
44
- cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
205
+
45
-#define qemu_st_lew(X) \
206
+ /* elt_ofs might correspond to .addr_write, so use atomic_read */
46
- cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
207
+#if TCG_OVERSIZED_GUEST
47
-#define qemu_st_lel(X) \
208
+ cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
48
- cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
49
-#define qemu_st_leq(X) \
50
- cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
51
-#define qemu_st_bew(X) \
52
- cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
53
-#define qemu_st_bel(X) \
54
- cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
55
-#define qemu_st_beq(X) \
56
- cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
57
+#ifdef CONFIG_SOFTMMU
58
+# define qemu_ld_ub \
59
+ helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
60
+# define qemu_ld_leuw \
61
+ helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
62
+# define qemu_ld_leul \
63
+ helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
64
+# define qemu_ld_leq \
65
+ helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
66
+# define qemu_ld_beuw \
67
+ helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
68
+# define qemu_ld_beul \
69
+ helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
70
+# define qemu_ld_beq \
71
+ helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
72
+# define qemu_st_b(X) \
73
+ helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
74
+# define qemu_st_lew(X) \
75
+ helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
76
+# define qemu_st_lel(X) \
77
+ helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
78
+# define qemu_st_leq(X) \
79
+ helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
80
+# define qemu_st_bew(X) \
81
+ helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
82
+# define qemu_st_bel(X) \
83
+ helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
84
+# define qemu_st_beq(X) \
85
+ helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
86
+#else
209
+#else
87
+# define qemu_ld_ub ldub_p(g2h(env_cpu(env), taddr))
210
+ cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
88
+# define qemu_ld_leuw lduw_le_p(g2h(env_cpu(env), taddr))
89
+# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(env_cpu(env), taddr))
90
+# define qemu_ld_leq ldq_le_p(g2h(env_cpu(env), taddr))
91
+# define qemu_ld_beuw lduw_be_p(g2h(env_cpu(env), taddr))
92
+# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(env_cpu(env), taddr))
93
+# define qemu_ld_beq ldq_be_p(g2h(env_cpu(env), taddr))
94
+# define qemu_st_b(X) stb_p(g2h(env_cpu(env), taddr), X)
95
+# define qemu_st_lew(X) stw_le_p(g2h(env_cpu(env), taddr), X)
96
+# define qemu_st_lel(X) stl_le_p(g2h(env_cpu(env), taddr), X)
97
+# define qemu_st_leq(X) stq_le_p(g2h(env_cpu(env), taddr), X)
98
+# define qemu_st_bew(X) stw_be_p(g2h(env_cpu(env), taddr), X)
99
+# define qemu_st_bel(X) stl_be_p(g2h(env_cpu(env), taddr), X)
100
+# define qemu_st_beq(X) stq_be_p(g2h(env_cpu(env), taddr), X)
101
+#endif
211
+#endif
102
212
103
static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
213
if (cmp == page) {
104
TCGMemOpIdx oi, const void *tb_ptr)
214
/* Found entry in victim tlb, swap tlb and iotlb. */
215
@@ -XXX,XX +XXX,XX @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
216
uintptr_t index = tlb_index(env, mmu_idx, addr);
217
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
218
219
- if (!tlb_hit(entry->addr_write, addr)) {
220
+ if (!tlb_hit(tlb_addr_write(entry), addr)) {
221
/* TLB entry is for a different page */
222
if (!VICTIM_TLB_HIT(addr_write, addr)) {
223
tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
224
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
225
size_t mmu_idx = get_mmuidx(oi);
226
uintptr_t index = tlb_index(env, mmu_idx, addr);
227
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
228
- target_ulong tlb_addr = tlbe->addr_write;
229
+ target_ulong tlb_addr = tlb_addr_write(tlbe);
230
TCGMemOp mop = get_memop(oi);
231
int a_bits = get_alignment_bits(mop);
232
int s_bits = mop & MO_SIZE;
233
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
234
tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
235
mmu_idx, retaddr);
236
}
237
- tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
238
+ tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
239
}
240
241
/* Notice an IO access or a needs-MMU-lookup access */
105
--
242
--
106
2.25.1
243
2.17.2
107
244
108
245
diff view generated by jsdifflib
Deleted patch
1
The longest test at the moment seems to be a (slower)
2
aarch64 host, for which test-mmap takes 64 seconds.
3
1
4
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Acked-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Thomas Huth <thuth@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
configure | 3 +++
11
tests/tcg/Makefile.target | 6 ++++--
12
2 files changed, 7 insertions(+), 2 deletions(-)
13
14
diff --git a/configure b/configure
15
index XXXXXXX..XXXXXXX 100755
16
--- a/configure
17
+++ b/configure
18
@@ -XXX,XX +XXX,XX @@ fi
19
if test "$optreset" = "yes" ; then
20
echo "HAVE_OPTRESET=y" >> $config_host_mak
21
fi
22
+if test "$tcg" = "enabled" -a "$tcg_interpreter" = "true" ; then
23
+ echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
24
+fi
25
if test "$fdatasync" = "yes" ; then
26
echo "CONFIG_FDATASYNC=y" >> $config_host_mak
27
fi
28
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/Makefile.target
31
+++ b/tests/tcg/Makefile.target
32
@@ -XXX,XX +XXX,XX @@ LDFLAGS=
33
QEMU_OPTS=
34
35
36
-# If TCG debugging is enabled things are a lot slower
37
-ifeq ($(CONFIG_DEBUG_TCG),y)
38
+# If TCG debugging, or TCI is enabled things are a lot slower
39
+ifneq ($(CONFIG_TCG_INTERPRETER),)
40
+TIMEOUT=90
41
+else ifneq ($(CONFIG_DEBUG_TCG),)
42
TIMEOUT=60
43
else
44
TIMEOUT=15
45
--
46
2.25.1
47
48
diff view generated by jsdifflib
Deleted patch
1
We should not be aligning the offset in temp_allocate_frame,
2
because the odd offset produces an aligned address in the end.
3
Instead, pass the logical offset into tcg_set_frame and add
4
the stack bias last.
5
1
6
Cc: qemu-stable@nongnu.org
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tcg.c | 9 +++------
11
tcg/sparc/tcg-target.c.inc | 16 ++++++++++------
12
2 files changed, 13 insertions(+), 12 deletions(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s)
19
20
static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
21
{
22
-#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
23
- /* Sparc64 stack is accessed with offset of 2047 */
24
- s->current_frame_offset = (s->current_frame_offset +
25
- (tcg_target_long)sizeof(tcg_target_long) - 1) &
26
- ~(sizeof(tcg_target_long) - 1);
27
-#endif
28
if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
29
s->frame_end) {
30
tcg_abort();
31
}
32
ts->mem_offset = s->current_frame_offset;
33
+#if defined(__sparc__)
34
+ ts->mem_offset += TCG_TARGET_STACK_BIAS;
35
+#endif
36
ts->mem_base = s->frame_temp;
37
ts->mem_allocated = 1;
38
s->current_frame_offset += sizeof(tcg_target_long);
39
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
40
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/sparc/tcg-target.c.inc
42
+++ b/tcg/sparc/tcg-target.c.inc
43
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
44
{
45
int tmp_buf_size, frame_size;
46
47
- /* The TCG temp buffer is at the top of the frame, immediately
48
- below the frame pointer. */
49
+ /*
50
+ * The TCG temp buffer is at the top of the frame, immediately
51
+ * below the frame pointer. Use the logical (aligned) offset here;
52
+ * the stack bias is applied in temp_allocate_frame().
53
+ */
54
tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
55
- tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
56
- tmp_buf_size);
57
+ tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size);
58
59
- /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
60
- otherwise the minimal frame usable by callees. */
61
+ /*
62
+ * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
63
+ * otherwise the minimal frame usable by callees.
64
+ */
65
frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
66
frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
67
frame_size += TCG_TARGET_STACK_ALIGN - 1;
68
--
69
2.25.1
70
71
diff view generated by jsdifflib