1
The following changes since commit ff56877e911782dedc9a424233fd3f62369c258c:
1
v2: Drop a few patches, which showed regressions in CI
2
for jobs that are not run for forks. :-/
2
3
3
Merge remote-tracking branch 'remotes/kraxel/tags/vga-20181015-pull-request' into staging (2018-10-15 15:03:45 +0100)
4
5
r~
6
7
8
The following changes since commit f9d58e0ca53b3f470b84725a7b5e47fcf446a2ea:
9
10
Merge tag 'pull-9p-20230516' of https://github.com/cschoenebeck/qemu into staging (2023-05-16 10:21:44 -0700)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20181016
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230516-2
8
15
9
for you to fetch changes up to e3e9d1ea20c75718ce7c528c588a0a497f12f750:
16
for you to fetch changes up to 44fe8f47fce3bdc8dcf49e3f001519a375ecc88a:
10
17
11
cputlb: read CPUTLBEntry.addr_write atomically (2018-10-16 10:04:27 -0700)
18
tcg: Split out exec/user/guest-base.h (2023-05-16 16:31:05 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Queued tcg patches
21
tcg/i386: Fix tcg_out_addi_ptr for win64
22
tcg: Implement atomicity for TCGv_i128
23
tcg: First quarter of cleanups for building tcg once
15
24
16
----------------------------------------------------------------
25
----------------------------------------------------------------
17
Emilio G. Cota (10):
26
Richard Henderson (74):
18
tcg: access cpu->icount_decr.u16.high with atomics
27
tcg/i386: Set P_REXW in tcg_out_addi_ptr
19
tcg: fix use of uninitialized variable under CONFIG_PROFILER
28
include/exec/memop: Add MO_ATOM_*
20
tcg: plug holes in struct TCGProfile
29
accel/tcg: Honor atomicity of loads
21
tcg: distribute tcg_time into TCG contexts
30
accel/tcg: Honor atomicity of stores
22
target/alpha: remove tlb_flush from alpha_cpu_initfn
31
tcg: Unify helper_{be,le}_{ld,st}*
23
target/unicore32: remove tlb_flush from uc32_init_fn
32
accel/tcg: Implement helper_{ld,st}*_mmu for user-only
24
exec: introduce tlb_init
33
tcg/tci: Use helper_{ld,st}*_mmu for user-only
25
cputlb: fix assert_cpu_is_self macro
34
tcg: Add 128-bit guest memory primitives
26
cputlb: serialize tlb updates with env->tlb_lock
35
meson: Detect atomic128 support with optimization
27
cputlb: read CPUTLBEntry.addr_write atomically
36
tcg/i386: Add have_atomic16
37
tcg/aarch64: Detect have_lse, have_lse2 for linux
38
tcg/aarch64: Detect have_lse, have_lse2 for darwin
39
tcg/i386: Use full load/store helpers in user-only mode
40
tcg/aarch64: Use full load/store helpers in user-only mode
41
tcg/ppc: Use full load/store helpers in user-only mode
42
tcg/loongarch64: Use full load/store helpers in user-only mode
43
tcg/riscv: Use full load/store helpers in user-only mode
44
tcg/arm: Adjust constraints on qemu_ld/st
45
tcg/arm: Use full load/store helpers in user-only mode
46
tcg/mips: Use full load/store helpers in user-only mode
47
tcg/s390x: Use full load/store helpers in user-only mode
48
tcg/sparc64: Allocate %g2 as a third temporary
49
tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13
50
target/sparc64: Remove tcg_out_movi_s13 case from tcg_out_movi_imm32
51
tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32
52
tcg/sparc64: Split out tcg_out_movi_s32
53
tcg/sparc64: Use standard slow path for softmmu
54
accel/tcg: Remove helper_unaligned_{ld,st}
55
tcg/loongarch64: Check the host supports unaligned accesses
56
tcg/loongarch64: Support softmmu unaligned accesses
57
tcg/riscv: Support softmmu unaligned accesses
58
tcg: Introduce tcg_target_has_memory_bswap
59
tcg: Add INDEX_op_qemu_{ld,st}_i128
60
tcg: Introduce tcg_out_movext3
61
tcg: Merge tcg_out_helper_load_regs into caller
62
tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}
63
tcg: Introduce atom_and_align_for_opc
64
tcg/i386: Use atom_and_align_for_opc
65
tcg/aarch64: Use atom_and_align_for_opc
66
tcg/arm: Use atom_and_align_for_opc
67
tcg/loongarch64: Use atom_and_align_for_opc
68
tcg/mips: Use atom_and_align_for_opc
69
tcg/ppc: Use atom_and_align_for_opc
70
tcg/riscv: Use atom_and_align_for_opc
71
tcg/s390x: Use atom_and_align_for_opc
72
tcg/sparc64: Use atom_and_align_for_opc
73
tcg: Split out memory ops to tcg-op-ldst.c
74
tcg: Widen gen_insn_data to uint64_t
75
accel/tcg: Widen tcg-ldst.h addresses to uint64_t
76
tcg: Widen helper_{ld,st}_i128 addresses to uint64_t
77
tcg: Widen helper_atomic_* addresses to uint64_t
78
tcg: Widen tcg_gen_code pc_start argument to uint64_t
79
accel/tcg: Merge gen_mem_wrapped with plugin_gen_empty_mem_callback
80
accel/tcg: Merge do_gen_mem_cb into caller
81
tcg: Reduce copies for plugin_gen_mem_callbacks
82
accel/tcg: Widen plugin_gen_empty_mem_callback to i64
83
tcg: Add addr_type to TCGContext
84
tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
85
tcg: Remove TCGv from tcg_gen_atomic_*
86
tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
87
tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
88
tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
89
tcg/i386: Conditionalize tcg_out_extu_i32_i64
90
tcg/i386: Adjust type of tlb_mask
91
tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
92
tcg/arm: Remove TARGET_LONG_BITS
93
tcg/aarch64: Remove USE_GUEST_BASE
94
tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
95
tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
96
tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
97
tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
98
tcg: Add page_bits and page_mask to TCGContext
99
tcg: Add tlb_dyn_max_bits to TCGContext
100
tcg: Split out exec/user/guest-base.h
28
101
29
Richard Henderson (11):
102
docs/devel/loads-stores.rst | 36 +-
30
tcg: Implement CPU_LOG_TB_NOCHAIN during expansion
103
docs/devel/tcg-ops.rst | 11 +-
31
tcg: Add tlb_index and tlb_entry helpers
104
meson.build | 52 +-
32
tcg: Split CONFIG_ATOMIC128
105
accel/tcg/tcg-runtime.h | 49 +-
33
target/i386: Convert to HAVE_CMPXCHG128
106
include/exec/cpu-all.h | 5 +-
34
target/arm: Convert to HAVE_CMPXCHG128
107
include/exec/memop.h | 37 ++
35
target/arm: Check HAVE_CMPXCHG128 at translate time
108
include/exec/plugin-gen.h | 4 +-
36
target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
109
include/exec/user/guest-base.h | 12 +
37
target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
110
include/qemu/cpuid.h | 18 +
38
target/s390x: Split do_cdsg, do_lpq, do_stpq
111
include/tcg/tcg-ldst.h | 72 +--
39
target/s390x: Skip wout, cout helpers if op helper does not return
112
include/tcg/tcg-op.h | 273 ++++++---
40
target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
113
include/tcg/tcg-opc.h | 41 +-
41
114
include/tcg/tcg.h | 39 +-
42
accel/tcg/atomic_template.h | 20 +++-
115
tcg/aarch64/tcg-target.h | 6 +-
43
accel/tcg/softmmu_template.h | 64 +++++-----
116
tcg/arm/tcg-target-con-set.h | 16 +-
44
include/exec/cpu-defs.h | 3 +
117
tcg/arm/tcg-target-con-str.h | 5 +-
45
include/exec/cpu_ldst.h | 30 ++++-
118
tcg/arm/tcg-target.h | 3 +-
46
include/exec/cpu_ldst_template.h | 25 ++--
119
tcg/i386/tcg-target.h | 12 +-
47
include/exec/exec-all.h | 8 ++
120
tcg/loongarch64/tcg-target.h | 3 +-
48
include/qemu/atomic128.h | 155 ++++++++++++++++++++++++
121
tcg/mips/tcg-target.h | 4 +-
49
include/qemu/timer.h | 1 -
122
tcg/ppc/tcg-target.h | 3 +-
50
target/ppc/helper.h | 2 +-
123
tcg/riscv/tcg-target.h | 4 +-
51
tcg/tcg.h | 20 ++--
124
tcg/s390x/tcg-target.h | 4 +-
52
accel/tcg/cpu-exec.c | 2 +-
125
tcg/sparc64/tcg-target-con-set.h | 2 -
53
accel/tcg/cputlb.c | 235 +++++++++++++++++++-----------------
126
tcg/sparc64/tcg-target-con-str.h | 1 -
54
accel/tcg/tcg-all.c | 2 +-
127
tcg/sparc64/tcg-target.h | 4 +-
55
accel/tcg/translate-all.c | 2 +-
128
tcg/tcg-internal.h | 2 +
56
accel/tcg/user-exec.c | 5 +-
129
tcg/tci/tcg-target.h | 4 +-
57
cpus.c | 3 +-
130
accel/tcg/cputlb.c | 839 ++++++++++++++++---------
58
exec.c | 1 +
131
accel/tcg/plugin-gen.c | 68 +-
59
monitor.c | 13 +-
132
accel/tcg/translate-all.c | 35 +-
60
qom/cpu.c | 2 +-
133
accel/tcg/user-exec.c | 488 ++++++++++-----
61
target/alpha/cpu.c | 1 -
134
tcg/optimize.c | 19 +-
62
target/arm/helper-a64.c | 251 +++++++++++++++++++--------------------
135
tcg/tcg-op-ldst.c | 1234 +++++++++++++++++++++++++++++++++++++
63
target/arm/translate-a64.c | 38 +++---
136
tcg/tcg-op.c | 864 --------------------------
64
target/i386/mem_helper.c | 9 +-
137
tcg/tcg.c | 631 +++++++++++++++----
65
target/ppc/mem_helper.c | 33 ++++-
138
tcg/tci.c | 243 +++-----
66
target/ppc/translate.c | 115 +++++++++---------
139
accel/tcg/atomic_common.c.inc | 14 +-
67
target/s390x/mem_helper.c | 202 +++++++++++++++----------------
140
accel/tcg/ldst_atomicity.c.inc | 1262 ++++++++++++++++++++++++++++++++++++++
68
target/s390x/translate.c | 45 +++++--
141
tcg/aarch64/tcg-target.c.inc | 207 +++----
69
target/unicore32/cpu.c | 2 -
142
tcg/arm/tcg-target.c.inc | 246 +++-----
70
tcg/tcg-op.c | 9 +-
143
tcg/i386/tcg-target.c.inc | 240 ++++----
71
tcg/tcg.c | 25 +++-
144
tcg/loongarch64/tcg-target.c.inc | 123 ++--
72
configure | 19 +++
145
tcg/mips/tcg-target.c.inc | 216 +++----
73
31 files changed, 830 insertions(+), 512 deletions(-)
146
tcg/ppc/tcg-target.c.inc | 189 +++---
74
create mode 100644 include/qemu/atomic128.h
147
tcg/riscv/tcg-target.c.inc | 161 ++---
75
148
tcg/s390x/tcg-target.c.inc | 104 +---
149
tcg/sparc64/tcg-target.c.inc | 731 ++++++++--------------
150
tcg/tci/tcg-target.c.inc | 58 +-
151
tcg/meson.build | 1 +
152
50 files changed, 5345 insertions(+), 3350 deletions(-)
153
create mode 100644 include/exec/user/guest-base.h
154
create mode 100644 tcg/tcg-op-ldst.c
155
create mode 100644 accel/tcg/ldst_atomicity.c.inc
diff view generated by jsdifflib
Deleted patch
1
Rather than test NOCHAIN before linking, do not emit the
2
goto_tb opcode at all. We already do this for goto_ptr.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/cpu-exec.c | 2 +-
7
tcg/tcg-op.c | 9 ++++++++-
8
2 files changed, 9 insertions(+), 2 deletions(-)
9
10
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/cpu-exec.c
13
+++ b/accel/tcg/cpu-exec.c
14
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_find(CPUState *cpu,
15
}
16
#endif
17
/* See if we can patch the calling TB. */
18
- if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
19
+ if (last_tb) {
20
tb_add_jump(last_tb, tb_exit, tb);
21
}
22
return tb;
23
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg-op.c
26
+++ b/tcg/tcg-op.c
27
@@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
28
seen this numbered exit before, via tcg_gen_goto_tb. */
29
tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
30
#endif
31
+ /* When not chaining, exit without indicating a link. */
32
+ if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
33
+ val = 0;
34
+ }
35
} else {
36
/* This is an exit via the exitreq label. */
37
tcg_debug_assert(idx == TB_EXIT_REQUESTED);
38
@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
39
tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
40
tcg_ctx->goto_tb_issue_mask |= 1 << idx;
41
#endif
42
- tcg_gen_op1i(INDEX_op_goto_tb, idx);
43
+ /* When not chaining, we simply fall through to the "fallback" exit. */
44
+ if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
45
+ tcg_gen_op1i(INDEX_op_goto_tb, idx);
46
+ }
47
}
48
49
void tcg_gen_lookup_and_goto_ptr(void)
50
--
51
2.17.2
52
53
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
Consistently access u16.high with atomics to avoid
4
undefined behaviour in MTTCG.
5
6
Note that icount_decr.u16.low is only used in icount mode,
7
so regular accesses to it are OK.
8
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Emilio G. Cota <cota@braap.org>
11
Message-Id: <20181010144853.13005-2-cota@braap.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
accel/tcg/tcg-all.c | 2 +-
15
accel/tcg/translate-all.c | 2 +-
16
qom/cpu.c | 2 +-
17
3 files changed, 3 insertions(+), 3 deletions(-)
18
19
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/accel/tcg/tcg-all.c
22
+++ b/accel/tcg/tcg-all.c
23
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
24
if (!qemu_cpu_is_self(cpu)) {
25
qemu_cpu_kick(cpu);
26
} else {
27
- cpu->icount_decr.u16.high = -1;
28
+ atomic_set(&cpu->icount_decr.u16.high, -1);
29
if (use_icount &&
30
!cpu->can_do_io
31
&& (mask & ~old_mask) != 0) {
32
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/accel/tcg/translate-all.c
35
+++ b/accel/tcg/translate-all.c
36
@@ -XXX,XX +XXX,XX @@ void cpu_interrupt(CPUState *cpu, int mask)
37
{
38
g_assert(qemu_mutex_iothread_locked());
39
cpu->interrupt_request |= mask;
40
- cpu->icount_decr.u16.high = -1;
41
+ atomic_set(&cpu->icount_decr.u16.high, -1);
42
}
43
44
/*
45
diff --git a/qom/cpu.c b/qom/cpu.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/qom/cpu.c
48
+++ b/qom/cpu.c
49
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(CPUState *cpu)
50
cpu->mem_io_pc = 0;
51
cpu->mem_io_vaddr = 0;
52
cpu->icount_extra = 0;
53
- cpu->icount_decr.u32 = 0;
54
+ atomic_set(&cpu->icount_decr.u32, 0);
55
cpu->can_do_io = 1;
56
cpu->exception_index = -1;
57
cpu->crash_occurred = false;
58
--
59
2.17.2
60
61
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
We forgot to initialize n in commit 15fa08f845 ("tcg: Dynamically
4
allocate TCGOps", 2017-12-29).
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181010144853.13005-3-cota@braap.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/tcg.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
19
20
#ifdef CONFIG_PROFILER
21
{
22
- int n;
23
+ int n = 0;
24
25
QTAILQ_FOREACH(op, &s->ops, link) {
26
n++;
27
--
28
2.17.2
29
30
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
This plugs two 4-byte holes in 64-bit.
4
5
Signed-off-by: Emilio G. Cota <cota@braap.org>
6
Message-Id: <20181010144853.13005-4-cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/tcg.h | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/tcg/tcg.h b/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg.h
15
+++ b/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TCGProfile {
17
int64_t tb_count;
18
int64_t op_count; /* total insn count */
19
int op_count_max; /* max insn per TB */
20
- int64_t temp_count;
21
int temp_count_max;
22
+ int64_t temp_count;
23
int64_t del_op_count;
24
int64_t code_in_len;
25
int64_t code_out_len;
26
--
27
2.17.2
28
29
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
When we implemented per-vCPU TCG contexts, we forgot to also
4
distribute the tcg_time counter, which has remained as a global
5
accessed without any serialization, leading to potentially missed
6
counts.
7
8
Fix it by distributing the field over the TCG contexts, embedding
9
it into TCGProfile with a field called "cpu_exec_time", which is more
10
descriptive than "tcg_time". Add a function to query this value
11
directly, and for completeness, fill in the field in
12
tcg_profile_snapshot, even though its callers do not use it.
13
14
Signed-off-by: Emilio G. Cota <cota@braap.org>
15
Message-Id: <20181010144853.13005-5-cota@braap.org>
16
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
---
18
include/qemu/timer.h | 1 -
19
tcg/tcg.h | 2 ++
20
cpus.c | 3 ++-
21
monitor.c | 13 ++++++++++---
22
tcg/tcg.c | 23 +++++++++++++++++++++++
23
5 files changed, 37 insertions(+), 5 deletions(-)
24
25
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/include/qemu/timer.h
28
+++ b/include/qemu/timer.h
29
@@ -XXX,XX +XXX,XX @@ static inline int64_t profile_getclock(void)
30
return get_clock();
31
}
32
33
-extern int64_t tcg_time;
34
extern int64_t dev_time;
35
#endif
36
37
diff --git a/tcg/tcg.h b/tcg/tcg.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tcg/tcg.h
40
+++ b/tcg/tcg.h
41
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOp {
42
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
43
44
typedef struct TCGProfile {
45
+ int64_t cpu_exec_time;
46
int64_t tb_count1;
47
int64_t tb_count;
48
int64_t op_count; /* total insn count */
49
@@ -XXX,XX +XXX,XX @@ int tcg_check_temp_count(void);
50
#define tcg_check_temp_count() 0
51
#endif
52
53
+int64_t tcg_cpu_exec_time(void);
54
void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
55
void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
56
57
diff --git a/cpus.c b/cpus.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/cpus.c
60
+++ b/cpus.c
61
@@ -XXX,XX +XXX,XX @@ static int tcg_cpu_exec(CPUState *cpu)
62
ret = cpu_exec(cpu);
63
cpu_exec_end(cpu);
64
#ifdef CONFIG_PROFILER
65
- tcg_time += profile_getclock() - ti;
66
+ atomic_set(&tcg_ctx->prof.cpu_exec_time,
67
+ tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
68
#endif
69
return ret;
70
}
71
diff --git a/monitor.c b/monitor.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/monitor.c
74
+++ b/monitor.c
75
@@ -XXX,XX +XXX,XX @@
76
#include "sysemu/cpus.h"
77
#include "sysemu/iothread.h"
78
#include "qemu/cutils.h"
79
+#include "tcg/tcg.h"
80
81
#if defined(TARGET_S390X)
82
#include "hw/s390x/storage-keys.h"
83
@@ -XXX,XX +XXX,XX @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict)
84
85
#ifdef CONFIG_PROFILER
86
87
-int64_t tcg_time;
88
int64_t dev_time;
89
90
static void hmp_info_profile(Monitor *mon, const QDict *qdict)
91
{
92
+ static int64_t last_cpu_exec_time;
93
+ int64_t cpu_exec_time;
94
+ int64_t delta;
95
+
96
+ cpu_exec_time = tcg_cpu_exec_time();
97
+ delta = cpu_exec_time - last_cpu_exec_time;
98
+
99
monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n",
100
dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
101
monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n",
102
- tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND);
103
- tcg_time = 0;
104
+ delta, delta / (double)NANOSECONDS_PER_SECOND);
105
+ last_cpu_exec_time = cpu_exec_time;
106
dev_time = 0;
107
}
108
#else
109
diff --git a/tcg/tcg.c b/tcg/tcg.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/tcg/tcg.c
112
+++ b/tcg/tcg.c
113
@@ -XXX,XX +XXX,XX @@
114
/* Define to jump the ELF file used to communicate with GDB. */
115
#undef DEBUG_JIT
116
117
+#include "qemu/error-report.h"
118
#include "qemu/cutils.h"
119
#include "qemu/host-utils.h"
120
#include "qemu/timer.h"
121
@@ -XXX,XX +XXX,XX @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
122
const TCGProfile *orig = &s->prof;
123
124
if (counters) {
125
+ PROF_ADD(prof, orig, cpu_exec_time);
126
PROF_ADD(prof, orig, tb_count1);
127
PROF_ADD(prof, orig, tb_count);
128
PROF_ADD(prof, orig, op_count);
129
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
130
prof.table_op_count[i]);
131
}
132
}
133
+
134
+int64_t tcg_cpu_exec_time(void)
135
+{
136
+ unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
137
+ unsigned int i;
138
+ int64_t ret = 0;
139
+
140
+ for (i = 0; i < n_ctxs; i++) {
141
+ const TCGContext *s = atomic_read(&tcg_ctxs[i]);
142
+ const TCGProfile *prof = &s->prof;
143
+
144
+ ret += atomic_read(&prof->cpu_exec_time);
145
+ }
146
+ return ret;
147
+}
148
#else
149
void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
150
{
151
cpu_fprintf(f, "[TCG profiler not compiled]\n");
152
}
153
+
154
+int64_t tcg_cpu_exec_time(void)
155
+{
156
+ error_report("%s: TCG profiler not compiled", __func__);
157
+ exit(EXIT_FAILURE);
158
+}
159
#endif
160
161
162
--
163
2.17.2
164
165
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
As far as I can tell tlb_flush does not need to be called
4
this early. tlb_flush is eventually called after the CPU
5
has been realized.
6
7
This change paves the way to the introduction of tlb_init,
8
which will be called from cpu_exec_realizefn.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Emilio G. Cota <cota@braap.org>
13
Message-Id: <20181009174557.16125-2-cota@braap.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
target/alpha/cpu.c | 1 -
17
1 file changed, 1 deletion(-)
18
19
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/alpha/cpu.c
22
+++ b/target/alpha/cpu.c
23
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj)
24
CPUAlphaState *env = &cpu->env;
25
26
cs->env_ptr = env;
27
- tlb_flush(cs);
28
29
env->lock_addr = -1;
30
#if defined(CONFIG_USER_ONLY)
31
--
32
2.17.2
33
34
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
As far as I can tell tlb_flush does not need to be called
4
this early. tlb_flush is eventually called after the CPU
5
has been realized.
6
7
This change paves the way to the introduction of tlb_init,
8
which will be called from cpu_exec_realizefn.
9
10
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Emilio G. Cota <cota@braap.org>
14
Message-Id: <20181009174557.16125-3-cota@braap.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
---
17
target/unicore32/cpu.c | 2 --
18
1 file changed, 2 deletions(-)
19
20
diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/unicore32/cpu.c
23
+++ b/target/unicore32/cpu.c
24
@@ -XXX,XX +XXX,XX @@ static void uc32_cpu_initfn(Object *obj)
25
env->uncached_asr = ASR_MODE_PRIV;
26
env->regs[31] = 0x03000000;
27
#endif
28
-
29
- tlb_flush(cs);
30
}
31
32
static const VMStateDescription vmstate_uc32_cpu = {
33
--
34
2.17.2
35
36
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
Paves the way for the addition of a per-TLB lock.
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181009174557.16125-4-cota@braap.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/exec/exec-all.h | 8 ++++++++
12
accel/tcg/cputlb.c | 4 ++++
13
exec.c | 1 +
14
3 files changed, 13 insertions(+)
15
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/exec-all.h
19
+++ b/include/exec/exec-all.h
20
@@ -XXX,XX +XXX,XX @@ void cpu_address_space_init(CPUState *cpu, int asidx,
21
22
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
23
/* cputlb.c */
24
+/**
25
+ * tlb_init - initialize a CPU's TLB
26
+ * @cpu: CPU whose TLB should be initialized
27
+ */
28
+void tlb_init(CPUState *cpu);
29
/**
30
* tlb_flush_page:
31
* @cpu: CPU whose TLB should be flushed
32
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
33
void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
34
uintptr_t retaddr);
35
#else
36
+static inline void tlb_init(CPUState *cpu)
37
+{
38
+}
39
static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
40
{
41
}
42
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/accel/tcg/cputlb.c
45
+++ b/accel/tcg/cputlb.c
46
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
47
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
48
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
49
50
+void tlb_init(CPUState *cpu)
51
+{
52
+}
53
+
54
/* flush_all_helper: run fn across all cpus
55
*
56
* If the wait flag is set then the src cpu's helper will be queued as
57
diff --git a/exec.c b/exec.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/exec.c
60
+++ b/exec.c
61
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
62
tcg_target_initialized = true;
63
cc->tcg_initialize();
64
}
65
+ tlb_init(cpu);
66
67
#ifndef CONFIG_USER_ONLY
68
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
69
--
70
2.17.2
71
72
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Emilio G. Cota <cota@braap.org>
6
Message-Id: <20181009174557.16125-5-cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
accel/tcg/cputlb.c | 4 ++--
10
1 file changed, 2 insertions(+), 2 deletions(-)
11
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@
17
} \
18
} while (0)
19
20
-#define assert_cpu_is_self(this_cpu) do { \
21
+#define assert_cpu_is_self(cpu) do { \
22
if (DEBUG_TLB_GATE) { \
23
- g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
24
+ g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
25
} \
26
} while (0)
27
28
--
29
2.17.2
30
31
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
Currently we rely on atomic operations for cross-CPU invalidations.
4
There are two cases that these atomics miss: cross-CPU invalidations
5
can race with either (1) vCPU threads flushing their TLB, which
6
happens via memset, or (2) vCPUs calling tlb_reset_dirty on their TLB,
7
which updates .addr_write with a regular store. This results in
8
undefined behaviour, since we're mixing regular and atomic ops
9
on concurrent accesses.
10
11
Fix it by using tlb_lock, a per-vCPU lock. All updaters of tlb_table
12
and the corresponding victim cache now hold the lock.
13
The readers that do not hold tlb_lock must use atomic reads when
14
reading .addr_write, since this field can be updated by other threads;
15
the conversion to atomic reads is done in the next patch.
16
17
Note that an alternative fix would be to expand the use of atomic ops.
18
However, in the case of TLB flushes this would have a huge performance
19
impact, since (1) TLB flushes can happen very frequently and (2) we
20
currently use a full memory barrier to flush each TLB entry, and a TLB
21
has many entries. Instead, acquiring the lock is barely slower than a
22
full memory barrier since it is uncontended, and with a single lock
23
acquisition we can flush the entire TLB.
24
25
Tested-by: Alex Bennée <alex.bennee@linaro.org>
26
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
27
Signed-off-by: Emilio G. Cota <cota@braap.org>
28
Message-Id: <20181009174557.16125-6-cota@braap.org>
29
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
30
---
31
include/exec/cpu-defs.h | 3 +
32
accel/tcg/cputlb.c | 155 ++++++++++++++++++++++------------------
33
2 files changed, 87 insertions(+), 71 deletions(-)
34
35
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/include/exec/cpu-defs.h
38
+++ b/include/exec/cpu-defs.h
39
@@ -XXX,XX +XXX,XX @@
40
#endif
41
42
#include "qemu/host-utils.h"
43
+#include "qemu/thread.h"
44
#include "qemu/queue.h"
45
#ifdef CONFIG_TCG
46
#include "tcg-target.h"
47
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
48
49
#define CPU_COMMON_TLB \
50
/* The meaning of the MMU modes is defined in the target code. */ \
51
+ /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \
52
+ QemuSpin tlb_lock; \
53
CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \
54
CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \
55
CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \
56
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/accel/tcg/cputlb.c
59
+++ b/accel/tcg/cputlb.c
60
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
61
62
void tlb_init(CPUState *cpu)
63
{
64
+ CPUArchState *env = cpu->env_ptr;
65
+
66
+ qemu_spin_init(&env->tlb_lock);
67
}
68
69
/* flush_all_helper: run fn across all cpus
70
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_nocheck(CPUState *cpu)
71
atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
72
tlb_debug("(count: %zu)\n", tlb_flush_count());
73
74
+ /*
75
+ * tlb_table/tlb_v_table updates from any thread must hold tlb_lock.
76
+ * However, updates from the owner thread (as is the case here; see the
77
+ * above assert_cpu_is_self) do not need atomic_set because all reads
78
+ * that do not hold the lock are performed by the same owner thread.
79
+ */
80
+ qemu_spin_lock(&env->tlb_lock);
81
memset(env->tlb_table, -1, sizeof(env->tlb_table));
82
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
83
+ qemu_spin_unlock(&env->tlb_lock);
84
+
85
cpu_tb_jmp_cache_clear(cpu);
86
87
env->vtlb_index = 0;
88
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
89
90
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
91
92
+ qemu_spin_lock(&env->tlb_lock);
93
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
94
95
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
96
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
97
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
98
}
99
}
100
+ qemu_spin_unlock(&env->tlb_lock);
101
102
cpu_tb_jmp_cache_clear(cpu);
103
104
@@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
105
tlb_hit_page(tlb_entry->addr_code, page);
106
}
107
108
-static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page)
109
+/* Called with tlb_lock held */
110
+static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
111
+ target_ulong page)
112
{
113
if (tlb_hit_page_anyprot(tlb_entry, page)) {
114
memset(tlb_entry, -1, sizeof(*tlb_entry));
115
}
116
}
117
118
-static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx,
119
- target_ulong page)
120
+/* Called with tlb_lock held */
121
+static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
122
+ target_ulong page)
123
{
124
int k;
125
+
126
+ assert_cpu_is_self(ENV_GET_CPU(env));
127
for (k = 0; k < CPU_VTLB_SIZE; k++) {
128
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page);
129
+ tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
134
135
addr &= TARGET_PAGE_MASK;
136
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
137
+ qemu_spin_lock(&env->tlb_lock);
138
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
139
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
140
- tlb_flush_vtlb_page(env, mmu_idx, addr);
141
+ tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr);
142
+ tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
143
}
144
+ qemu_spin_unlock(&env->tlb_lock);
145
146
tb_flush_jmp_cache(cpu, addr);
147
}
148
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
149
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
150
page, addr, mmu_idx_bitmap);
151
152
+ qemu_spin_lock(&env->tlb_lock);
153
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
154
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
155
- tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
156
- tlb_flush_vtlb_page(env, mmu_idx, addr);
157
+ tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr);
158
+ tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
159
}
160
}
161
+ qemu_spin_unlock(&env->tlb_lock);
162
163
tb_flush_jmp_cache(cpu, addr);
164
}
165
@@ -XXX,XX +XXX,XX @@ void tlb_unprotect_code(ram_addr_t ram_addr)
166
* most usual is detecting writes to code regions which may invalidate
167
* generated code.
168
*
169
- * Because we want other vCPUs to respond to changes straight away we
170
- * update the te->addr_write field atomically. If the TLB entry has
171
- * been changed by the vCPU in the mean time we skip the update.
172
+ * Other vCPUs might be reading their TLBs during guest execution, so we update
173
+ * te->addr_write with atomic_set. We don't need to worry about this for
174
+ * oversized guests as MTTCG is disabled for them.
175
*
176
- * As this function uses atomic accesses we also need to ensure
177
- * updates to tlb_entries follow the same access rules. We don't need
178
- * to worry about this for oversized guests as MTTCG is disabled for
179
- * them.
180
+ * Called with tlb_lock held.
181
*/
182
-
183
-static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
184
- uintptr_t length)
185
+static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
186
+ uintptr_t start, uintptr_t length)
187
{
188
-#if TCG_OVERSIZED_GUEST
189
uintptr_t addr = tlb_entry->addr_write;
190
191
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
192
addr &= TARGET_PAGE_MASK;
193
addr += tlb_entry->addend;
194
if ((addr - start) < length) {
195
+#if TCG_OVERSIZED_GUEST
196
tlb_entry->addr_write |= TLB_NOTDIRTY;
197
- }
198
- }
199
#else
200
- /* paired with atomic_mb_set in tlb_set_page_with_attrs */
201
- uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
202
- uintptr_t addr = orig_addr;
203
-
204
- if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
205
- addr &= TARGET_PAGE_MASK;
206
- addr += atomic_read(&tlb_entry->addend);
207
- if ((addr - start) < length) {
208
- uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
209
- atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
210
+ atomic_set(&tlb_entry->addr_write,
211
+ tlb_entry->addr_write | TLB_NOTDIRTY);
212
+#endif
213
}
214
}
215
-#endif
216
}
217
218
-/* For atomic correctness when running MTTCG we need to use the right
219
- * primitives when copying entries */
220
-static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
221
- bool atomic_set)
222
+/*
223
+ * Called with tlb_lock held.
224
+ * Called only from the vCPU context, i.e. the TLB's owner thread.
225
+ */
226
+static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
227
{
228
-#if TCG_OVERSIZED_GUEST
229
*d = *s;
230
-#else
231
- if (atomic_set) {
232
- d->addr_read = s->addr_read;
233
- d->addr_code = s->addr_code;
234
- atomic_set(&d->addend, atomic_read(&s->addend));
235
- /* Pairs with flag setting in tlb_reset_dirty_range */
236
- atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
237
- } else {
238
- d->addr_read = s->addr_read;
239
- d->addr_write = atomic_read(&s->addr_write);
240
- d->addr_code = s->addr_code;
241
- d->addend = atomic_read(&s->addend);
242
- }
243
-#endif
244
}
245
246
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
247
- * the target vCPU). As such care needs to be taken that we don't
248
- * dangerously race with another vCPU update. The only thing actually
249
- * updated is the target TLB entry ->addr_write flags.
250
+ * the target vCPU).
251
+ * We must take tlb_lock to avoid racing with another vCPU update. The only
252
+ * thing actually updated is the target TLB entry ->addr_write flags.
253
*/
254
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
255
{
256
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
257
int mmu_idx;
258
259
env = cpu->env_ptr;
260
+ qemu_spin_lock(&env->tlb_lock);
261
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
262
unsigned int i;
263
264
for (i = 0; i < CPU_TLB_SIZE; i++) {
265
- tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
266
- start1, length);
267
+ tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
268
+ length);
269
}
270
271
for (i = 0; i < CPU_VTLB_SIZE; i++) {
272
- tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
273
- start1, length);
274
+ tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
275
+ length);
276
}
277
}
278
+ qemu_spin_unlock(&env->tlb_lock);
279
}
280
281
-static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
282
+/* Called with tlb_lock held */
283
+static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
284
+ target_ulong vaddr)
285
{
286
if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
287
tlb_entry->addr_write = vaddr;
288
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
289
290
vaddr &= TARGET_PAGE_MASK;
291
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
292
+ qemu_spin_lock(&env->tlb_lock);
293
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
294
- tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
295
+ tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr);
296
}
297
298
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
299
int k;
300
for (k = 0; k < CPU_VTLB_SIZE; k++) {
301
- tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
302
+ tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
303
}
304
}
305
+ qemu_spin_unlock(&env->tlb_lock);
306
}
307
308
/* Our TLB does not support large pages, so remember the area covered by
309
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
310
addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
311
}
312
313
- /* Make sure there's no cached translation for the new page. */
314
- tlb_flush_vtlb_page(env, mmu_idx, vaddr_page);
315
-
316
code_address = address;
317
iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
318
paddr_page, xlat, prot, &address);
319
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
320
index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
321
te = &env->tlb_table[mmu_idx][index];
322
323
+ /*
324
+ * Hold the TLB lock for the rest of the function. We could acquire/release
325
+ * the lock several times in the function, but it is faster to amortize the
326
+ * acquisition cost by acquiring it just once. Note that this leads to
327
+ * a longer critical section, but this is not a concern since the TLB lock
328
+ * is unlikely to be contended.
329
+ */
330
+ qemu_spin_lock(&env->tlb_lock);
331
+
332
+ /* Make sure there's no cached translation for the new page. */
333
+ tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
334
+
335
/*
336
* Only evict the old entry to the victim tlb if it's for a
337
* different page; otherwise just overwrite the stale data.
338
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
339
CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
340
341
/* Evict the old entry into the victim tlb. */
342
- copy_tlb_helper(tv, te, true);
343
+ copy_tlb_helper_locked(tv, te);
344
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
345
}
346
347
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
348
}
349
}
350
351
- /* Pairs with flag setting in tlb_reset_dirty_range */
352
- copy_tlb_helper(te, &tn, true);
353
- /* atomic_mb_set(&te->addr_write, write_address); */
354
+ copy_tlb_helper_locked(te, &tn);
355
+ qemu_spin_unlock(&env->tlb_lock);
356
}
357
358
/* Add a new TLB entry, but without specifying the memory
359
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
360
size_t elt_ofs, target_ulong page)
361
{
362
size_t vidx;
363
+
364
+ assert_cpu_is_self(ENV_GET_CPU(env));
365
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
366
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
367
target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
368
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
369
/* Found entry in victim tlb, swap tlb and iotlb. */
370
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
371
372
- copy_tlb_helper(&tmptlb, tlb, false);
373
- copy_tlb_helper(tlb, vtlb, true);
374
- copy_tlb_helper(vtlb, &tmptlb, true);
375
+ qemu_spin_lock(&env->tlb_lock);
376
+ copy_tlb_helper_locked(&tmptlb, tlb);
377
+ copy_tlb_helper_locked(tlb, vtlb);
378
+ copy_tlb_helper_locked(vtlb, &tmptlb);
379
+ qemu_spin_unlock(&env->tlb_lock);
380
381
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
382
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
383
--
384
2.17.2
385
386
diff view generated by jsdifflib
Deleted patch
1
Isolate the computation of an index from an address into a
2
helper before we change that function.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
[ cota: convert tlb_vaddr_to_host; use atomic_read on addr_write ]
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Message-Id: <20181009175129.17888-2-cota@braap.org>
9
---
10
accel/tcg/softmmu_template.h | 64 +++++++++++++++++---------------
11
include/exec/cpu_ldst.h | 19 ++++++++--
12
include/exec/cpu_ldst_template.h | 25 +++++++------
13
accel/tcg/cputlb.c | 60 ++++++++++++++----------------
14
4 files changed, 90 insertions(+), 78 deletions(-)
15
16
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/softmmu_template.h
19
+++ b/accel/tcg/softmmu_template.h
20
@@ -XXX,XX +XXX,XX @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
21
WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
22
TCGMemOpIdx oi, uintptr_t retaddr)
23
{
24
- unsigned mmu_idx = get_mmuidx(oi);
25
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
26
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
27
+ uintptr_t mmu_idx = get_mmuidx(oi);
28
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
29
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
30
+ target_ulong tlb_addr = entry->ADDR_READ;
31
unsigned a_bits = get_alignment_bits(get_memop(oi));
32
uintptr_t haddr;
33
DATA_TYPE res;
34
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
35
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
36
mmu_idx, retaddr);
37
}
38
- tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
39
+ tlb_addr = entry->ADDR_READ;
40
}
41
42
/* Handle an IO access. */
43
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
44
return res;
45
}
46
47
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
48
+ haddr = addr + entry->addend;
49
#if DATA_SIZE == 1
50
res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
51
#else
52
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
53
WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
54
TCGMemOpIdx oi, uintptr_t retaddr)
55
{
56
- unsigned mmu_idx = get_mmuidx(oi);
57
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
58
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
59
+ uintptr_t mmu_idx = get_mmuidx(oi);
60
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
61
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
62
+ target_ulong tlb_addr = entry->ADDR_READ;
63
unsigned a_bits = get_alignment_bits(get_memop(oi));
64
uintptr_t haddr;
65
DATA_TYPE res;
66
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
67
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
68
mmu_idx, retaddr);
69
}
70
- tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
71
+ tlb_addr = entry->ADDR_READ;
72
}
73
74
/* Handle an IO access. */
75
@@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
76
return res;
77
}
78
79
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
80
+ haddr = addr + entry->addend;
81
res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
82
return res;
83
}
84
@@ -XXX,XX +XXX,XX @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
85
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
86
TCGMemOpIdx oi, uintptr_t retaddr)
87
{
88
- unsigned mmu_idx = get_mmuidx(oi);
89
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
90
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
91
+ uintptr_t mmu_idx = get_mmuidx(oi);
92
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
93
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
94
+ target_ulong tlb_addr = entry->addr_write;
95
unsigned a_bits = get_alignment_bits(get_memop(oi));
96
uintptr_t haddr;
97
98
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
99
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
100
mmu_idx, retaddr);
101
}
102
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
103
+ tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
104
}
105
106
/* Handle an IO access. */
107
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
108
if (DATA_SIZE > 1
109
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
110
>= TARGET_PAGE_SIZE)) {
111
- int i, index2;
112
- target_ulong page2, tlb_addr2;
113
+ int i;
114
+ target_ulong page2;
115
+ CPUTLBEntry *entry2;
116
do_unaligned_access:
117
/* Ensure the second page is in the TLB. Note that the first page
118
is already guaranteed to be filled, and that the second page
119
cannot evict the first. */
120
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
121
- index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
122
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
123
- if (!tlb_hit_page(tlb_addr2, page2)
124
+ entry2 = tlb_entry(env, mmu_idx, page2);
125
+ if (!tlb_hit_page(entry2->addr_write, page2)
126
&& !VICTIM_TLB_HIT(addr_write, page2)) {
127
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
128
mmu_idx, retaddr);
129
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
130
return;
131
}
132
133
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
134
+ haddr = addr + entry->addend;
135
#if DATA_SIZE == 1
136
glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
137
#else
138
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
139
void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
140
TCGMemOpIdx oi, uintptr_t retaddr)
141
{
142
- unsigned mmu_idx = get_mmuidx(oi);
143
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
144
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
145
+ uintptr_t mmu_idx = get_mmuidx(oi);
146
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
147
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
148
+ target_ulong tlb_addr = entry->addr_write;
149
unsigned a_bits = get_alignment_bits(get_memop(oi));
150
uintptr_t haddr;
151
152
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
153
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
154
mmu_idx, retaddr);
155
}
156
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
157
+ tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
158
}
159
160
/* Handle an IO access. */
161
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
162
if (DATA_SIZE > 1
163
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
164
>= TARGET_PAGE_SIZE)) {
165
- int i, index2;
166
- target_ulong page2, tlb_addr2;
167
+ int i;
168
+ target_ulong page2;
169
+ CPUTLBEntry *entry2;
170
do_unaligned_access:
171
/* Ensure the second page is in the TLB. Note that the first page
172
is already guaranteed to be filled, and that the second page
173
cannot evict the first. */
174
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
175
- index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
176
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
177
- if (!tlb_hit_page(tlb_addr2, page2)
178
+ entry2 = tlb_entry(env, mmu_idx, page2);
179
+ if (!tlb_hit_page(entry2->addr_write, page2)
180
&& !VICTIM_TLB_HIT(addr_write, page2)) {
181
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
182
mmu_idx, retaddr);
183
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
184
return;
185
}
186
187
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
188
+ haddr = addr + entry->addend;
189
glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
190
}
191
#endif /* DATA_SIZE > 1 */
192
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
193
index XXXXXXX..XXXXXXX 100644
194
--- a/include/exec/cpu_ldst.h
195
+++ b/include/exec/cpu_ldst.h
196
@@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr;
197
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
198
#include "tcg.h"
199
200
+/* Find the TLB index corresponding to the mmu_idx + address pair. */
201
+static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
202
+ target_ulong addr)
203
+{
204
+ return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
205
+}
206
+
207
+/* Find the TLB entry corresponding to the mmu_idx + address pair. */
208
+static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
209
+ target_ulong addr)
210
+{
211
+ return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
212
+}
213
+
214
#ifdef MMU_MODE0_SUFFIX
215
#define CPU_MMU_INDEX 0
216
#define MEMSUFFIX MMU_MODE0_SUFFIX
217
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
218
#if defined(CONFIG_USER_ONLY)
219
return g2h(addr);
220
#else
221
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
222
- CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
223
+ CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr);
224
abi_ptr tlb_addr;
225
uintptr_t haddr;
226
227
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
228
return NULL;
229
}
230
231
- haddr = addr + env->tlb_table[mmu_idx][index].addend;
232
+ haddr = addr + tlbentry->addend;
233
return (void *)haddr;
234
#endif /* defined(CONFIG_USER_ONLY) */
235
}
236
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
237
index XXXXXXX..XXXXXXX 100644
238
--- a/include/exec/cpu_ldst_template.h
239
+++ b/include/exec/cpu_ldst_template.h
240
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
241
target_ulong ptr,
242
uintptr_t retaddr)
243
{
244
- int page_index;
245
+ CPUTLBEntry *entry;
246
RES_TYPE res;
247
target_ulong addr;
248
int mmu_idx;
249
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
250
#endif
251
252
addr = ptr;
253
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
254
mmu_idx = CPU_MMU_INDEX;
255
- if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
256
+ entry = tlb_entry(env, mmu_idx, addr);
257
+ if (unlikely(entry->ADDR_READ !=
258
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
259
oi = make_memop_idx(SHIFT, mmu_idx);
260
res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
261
oi, retaddr);
262
} else {
263
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
264
+ uintptr_t hostaddr = addr + entry->addend;
265
res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr);
266
}
267
return res;
268
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
269
target_ulong ptr,
270
uintptr_t retaddr)
271
{
272
- int res, page_index;
273
+ CPUTLBEntry *entry;
274
+ int res;
275
target_ulong addr;
276
int mmu_idx;
277
TCGMemOpIdx oi;
278
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
279
#endif
280
281
addr = ptr;
282
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
283
mmu_idx = CPU_MMU_INDEX;
284
- if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
285
+ entry = tlb_entry(env, mmu_idx, addr);
286
+ if (unlikely(entry->ADDR_READ !=
287
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
288
oi = make_memop_idx(SHIFT, mmu_idx);
289
res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
290
MMUSUFFIX)(env, addr, oi, retaddr);
291
} else {
292
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
293
+ uintptr_t hostaddr = addr + entry->addend;
294
res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr);
295
}
296
return res;
297
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
298
target_ulong ptr,
299
RES_TYPE v, uintptr_t retaddr)
300
{
301
- int page_index;
302
+ CPUTLBEntry *entry;
303
target_ulong addr;
304
int mmu_idx;
305
TCGMemOpIdx oi;
306
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
307
#endif
308
309
addr = ptr;
310
- page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
311
mmu_idx = CPU_MMU_INDEX;
312
- if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
313
+ entry = tlb_entry(env, mmu_idx, addr);
314
+ if (unlikely(entry->addr_write !=
315
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
316
oi = make_memop_idx(SHIFT, mmu_idx);
317
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
318
retaddr);
319
} else {
320
- uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
321
+ uintptr_t hostaddr = addr + entry->addend;
322
glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v);
323
}
324
}
325
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
326
index XXXXXXX..XXXXXXX 100644
327
--- a/accel/tcg/cputlb.c
328
+++ b/accel/tcg/cputlb.c
329
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
330
{
331
CPUArchState *env = cpu->env_ptr;
332
target_ulong addr = (target_ulong) data.target_ptr;
333
- int i;
334
int mmu_idx;
335
336
assert_cpu_is_self(cpu);
337
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
338
}
339
340
addr &= TARGET_PAGE_MASK;
341
- i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
342
qemu_spin_lock(&env->tlb_lock);
343
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
344
- tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr);
345
+ tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
346
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
347
}
348
qemu_spin_unlock(&env->tlb_lock);
349
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
350
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
351
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
352
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
353
- int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
354
int mmu_idx;
355
356
assert_cpu_is_self(cpu);
357
358
- tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
359
- page, addr, mmu_idx_bitmap);
360
+ tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
361
+ addr, mmu_idx_bitmap);
362
363
qemu_spin_lock(&env->tlb_lock);
364
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
365
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
366
- tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr);
367
+ tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
368
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
369
}
370
}
371
@@ -XXX,XX +XXX,XX @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
372
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
373
{
374
CPUArchState *env = cpu->env_ptr;
375
- int i;
376
int mmu_idx;
377
378
assert_cpu_is_self(cpu);
379
380
vaddr &= TARGET_PAGE_MASK;
381
- i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
382
qemu_spin_lock(&env->tlb_lock);
383
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
384
- tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr);
385
+ tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
386
}
387
388
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
389
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
390
iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
391
paddr_page, xlat, prot, &address);
392
393
- index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
394
- te = &env->tlb_table[mmu_idx][index];
395
+ index = tlb_index(env, mmu_idx, vaddr_page);
396
+ te = tlb_entry(env, mmu_idx, vaddr_page);
397
398
/*
399
* Hold the TLB lock for the rest of the function. We could acquire/release
400
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
401
* repeat the MMU check here. This tlb_fill() call might
402
* longjump out if this access should cause a guest exception.
403
*/
404
- int index;
405
+ CPUTLBEntry *entry;
406
target_ulong tlb_addr;
407
408
tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
409
410
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
411
- tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
412
+ entry = tlb_entry(env, mmu_idx, addr);
413
+ tlb_addr = entry->addr_read;
414
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
415
/* RAM access */
416
- uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
417
+ uintptr_t haddr = addr + entry->addend;
418
419
return ldn_p((void *)haddr, size);
420
}
421
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
422
* repeat the MMU check here. This tlb_fill() call might
423
* longjump out if this access should cause a guest exception.
424
*/
425
- int index;
426
+ CPUTLBEntry *entry;
427
target_ulong tlb_addr;
428
429
tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
430
431
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
432
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
433
+ entry = tlb_entry(env, mmu_idx, addr);
434
+ tlb_addr = entry->addr_write;
435
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
436
/* RAM access */
437
- uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
438
+ uintptr_t haddr = addr + entry->addend;
439
440
stn_p((void *)haddr, size, val);
441
return;
442
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
443
*/
444
tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
445
{
446
- int mmu_idx, index;
447
+ uintptr_t mmu_idx = cpu_mmu_index(env, true);
448
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
449
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
450
void *p;
451
452
- index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
453
- mmu_idx = cpu_mmu_index(env, true);
454
- if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) {
455
+ if (unlikely(!tlb_hit(entry->addr_code, addr))) {
456
if (!VICTIM_TLB_HIT(addr_code, addr)) {
457
tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
458
}
459
- assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr));
460
+ assert(tlb_hit(entry->addr_code, addr));
461
}
462
463
- if (unlikely(env->tlb_table[mmu_idx][index].addr_code &
464
- (TLB_RECHECK | TLB_MMIO))) {
465
+ if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
466
/*
467
* Return -1 if we can't translate and execute from an entire
468
* page of RAM here, which will cause us to execute by loading
469
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
470
return -1;
471
}
472
473
- p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
474
+ p = (void *)((uintptr_t)addr + entry->addend);
475
return qemu_ram_addr_from_host_nofail(p);
476
}
477
478
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
479
void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
480
uintptr_t retaddr)
481
{
482
- int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
483
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
484
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
485
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
486
487
- if (!tlb_hit(tlb_addr, addr)) {
488
+ if (!tlb_hit(entry->addr_write, addr)) {
489
/* TLB entry is for a different page */
490
if (!VICTIM_TLB_HIT(addr_write, addr)) {
491
tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
492
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
493
NotDirtyInfo *ndi)
494
{
495
size_t mmu_idx = get_mmuidx(oi);
496
- size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
497
- CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
498
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
499
+ CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
500
target_ulong tlb_addr = tlbe->addr_write;
501
TCGMemOp mop = get_memop(oi);
502
int a_bits = get_alignment_bits(mop);
503
--
504
2.17.2
505
506
diff view generated by jsdifflib
Deleted patch
1
GCC7+ will no longer advertise support for 16-byte __atomic operations
2
if only cmpxchg is supported, as for x86_64. Fortunately, x86_64 still
3
has support for __sync_compare_and_swap_16 and we can make use of that.
4
AArch64 does not have, nor ever has had such support, so open-code it.
5
1
6
Reviewed-by: Emilio G. Cota <cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
accel/tcg/atomic_template.h | 20 ++++-
10
include/qemu/atomic128.h | 155 ++++++++++++++++++++++++++++++++++++
11
tcg/tcg.h | 16 ++--
12
accel/tcg/cputlb.c | 3 +-
13
accel/tcg/user-exec.c | 5 +-
14
configure | 19 +++++
15
6 files changed, 204 insertions(+), 14 deletions(-)
16
create mode 100644 include/qemu/atomic128.h
17
18
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/atomic_template.h
21
+++ b/accel/tcg/atomic_template.h
22
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
23
DATA_TYPE ret;
24
25
ATOMIC_TRACE_RMW;
26
+#if DATA_SIZE == 16
27
+ ret = atomic16_cmpxchg(haddr, cmpv, newv);
28
+#else
29
ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
30
+#endif
31
ATOMIC_MMU_CLEANUP;
32
return ret;
33
}
34
35
#if DATA_SIZE >= 16
36
+#if HAVE_ATOMIC128
37
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
38
{
39
ATOMIC_MMU_DECLS;
40
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
41
42
ATOMIC_TRACE_LD;
43
- __atomic_load(haddr, &val, __ATOMIC_RELAXED);
44
+ val = atomic16_read(haddr);
45
ATOMIC_MMU_CLEANUP;
46
return val;
47
}
48
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
49
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
50
51
ATOMIC_TRACE_ST;
52
- __atomic_store(haddr, &val, __ATOMIC_RELAXED);
53
+ atomic16_set(haddr, val);
54
ATOMIC_MMU_CLEANUP;
55
}
56
+#endif
57
#else
58
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
59
ABI_TYPE val EXTRA_ARGS)
60
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
61
DATA_TYPE ret;
62
63
ATOMIC_TRACE_RMW;
64
+#if DATA_SIZE == 16
65
+ ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
66
+#else
67
ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
68
+#endif
69
ATOMIC_MMU_CLEANUP;
70
return BSWAP(ret);
71
}
72
73
#if DATA_SIZE >= 16
74
+#if HAVE_ATOMIC128
75
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
76
{
77
ATOMIC_MMU_DECLS;
78
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
79
80
ATOMIC_TRACE_LD;
81
- __atomic_load(haddr, &val, __ATOMIC_RELAXED);
82
+ val = atomic16_read(haddr);
83
ATOMIC_MMU_CLEANUP;
84
return BSWAP(val);
85
}
86
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
87
88
ATOMIC_TRACE_ST;
89
val = BSWAP(val);
90
- __atomic_store(haddr, &val, __ATOMIC_RELAXED);
91
+ atomic16_set(haddr, val);
92
ATOMIC_MMU_CLEANUP;
93
}
94
+#endif
95
#else
96
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
97
ABI_TYPE val EXTRA_ARGS)
98
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
99
new file mode 100644
100
index XXXXXXX..XXXXXXX
101
--- /dev/null
102
+++ b/include/qemu/atomic128.h
103
@@ -XXX,XX +XXX,XX @@
104
+/*
105
+ * Simple interface for 128-bit atomic operations.
106
+ *
107
+ * Copyright (C) 2018 Linaro, Ltd.
108
+ *
109
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
110
+ * See the COPYING file in the top-level directory.
111
+ *
112
+ * See docs/devel/atomics.txt for discussion about the guarantees each
113
+ * atomic primitive is meant to provide.
114
+ */
115
+
116
+#ifndef QEMU_ATOMIC128_H
117
+#define QEMU_ATOMIC128_H
118
+
119
+/*
120
+ * GCC is a house divided about supporting large atomic operations.
121
+ *
122
+ * For hosts that only have large compare-and-swap, a legalistic reading
123
+ * of the C++ standard means that one cannot implement __atomic_read on
124
+ * read-only memory, and thus all atomic operations must synchronize
125
+ * through libatomic.
126
+ *
127
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878
128
+ *
129
+ * This interpretation is not especially helpful for QEMU.
130
+ * For softmmu, all RAM is always read/write from the hypervisor.
131
+ * For user-only, if the guest doesn't implement such an __atomic_read
132
+ * then the host need not worry about it either.
133
+ *
134
+ * Moreover, using libatomic is not an option, because its interface is
135
+ * built for std::atomic<T>, and requires that *all* accesses to such an
136
+ * object go through the library. In our case we do not have an object
137
+ * in the C/C++ sense, but a view of memory as seen by the guest.
138
+ * The guest may issue a large atomic operation and then access those
139
+ * pieces using word-sized accesses. From the hypervisor, we have no
140
+ * way to connect those two actions.
141
+ *
142
+ * Therefore, special case each platform.
143
+ */
144
+
145
+#if defined(CONFIG_ATOMIC128)
146
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
147
+{
148
+ return atomic_cmpxchg__nocheck(ptr, cmp, new);
149
+}
150
+# define HAVE_CMPXCHG128 1
151
+#elif defined(CONFIG_CMPXCHG128)
152
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
153
+{
154
+ return __sync_val_compare_and_swap_16(ptr, cmp, new);
155
+}
156
+# define HAVE_CMPXCHG128 1
157
+#elif defined(__aarch64__)
158
+/* Through gcc 8, aarch64 has no support for 128-bit at all. */
159
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
160
+{
161
+ uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
162
+ uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
163
+ uint64_t oldl, oldh;
164
+ uint32_t tmp;
165
+
166
+ asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
167
+ "cmp %[oldl], %[cmpl]\n\t"
168
+ "ccmp %[oldh], %[cmph], #0, eq\n\t"
169
+ "b.ne 1f\n\t"
170
+ "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
171
+ "cbnz %w[tmp], 0b\n"
172
+ "1:"
173
+ : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
174
+ [oldl] "=&r"(oldl), [oldh] "=r"(oldh)
175
+ : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
176
+ [newl] "r"(newl), [newh] "r"(newh)
177
+ : "memory", "cc");
178
+
179
+ return int128_make128(oldl, oldh);
180
+}
181
+# define HAVE_CMPXCHG128 1
182
+#else
183
+/* Fallback definition that must be optimized away, or error. */
184
+Int128 __attribute__((error("unsupported atomic")))
185
+ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
186
+# define HAVE_CMPXCHG128 0
187
+#endif /* Some definition for HAVE_CMPXCHG128 */
188
+
189
+
190
+#if defined(CONFIG_ATOMIC128)
191
+static inline Int128 atomic16_read(Int128 *ptr)
192
+{
193
+ return atomic_read__nocheck(ptr);
194
+}
195
+
196
+static inline void atomic16_set(Int128 *ptr, Int128 val)
197
+{
198
+ atomic_set__nocheck(ptr, val);
199
+}
200
+
201
+# define HAVE_ATOMIC128 1
202
+#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
203
+/* We can do better than cmpxchg for AArch64. */
204
+static inline Int128 atomic16_read(Int128 *ptr)
205
+{
206
+ uint64_t l, h;
207
+ uint32_t tmp;
208
+
209
+ /* The load must be paired with the store to guarantee not tearing. */
210
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
211
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
212
+ "cbnz %w[tmp], 0b"
213
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
214
+
215
+ return int128_make128(l, h);
216
+}
217
+
218
+static inline void atomic16_set(Int128 *ptr, Int128 val)
219
+{
220
+ uint64_t l = int128_getlo(val), h = int128_gethi(val);
221
+ uint64_t t1, t2;
222
+
223
+ /* Load into temporaries to acquire the exclusive access lock. */
224
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
225
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
226
+ "cbnz %w[t1], 0b"
227
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
228
+ : [l] "r"(l), [h] "r"(h));
229
+}
230
+
231
+# define HAVE_ATOMIC128 1
232
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
233
+static inline Int128 atomic16_read(Int128 *ptr)
234
+{
235
+ /* Maybe replace 0 with 0, returning the old value. */
236
+ return atomic16_cmpxchg(ptr, 0, 0);
237
+}
238
+
239
+static inline void atomic16_set(Int128 *ptr, Int128 val)
240
+{
241
+ Int128 old = *ptr, cmp;
242
+ do {
243
+ cmp = old;
244
+ old = atomic16_cmpxchg(ptr, cmp, val);
245
+ } while (old != cmp);
246
+}
247
+
248
+# define HAVE_ATOMIC128 1
249
+#else
250
+/* Fallback definitions that must be optimized away, or error. */
251
+Int128 __attribute__((error("unsupported atomic")))
252
+ atomic16_read(Int128 *ptr);
253
+void __attribute__((error("unsupported atomic")))
254
+ atomic16_set(Int128 *ptr, Int128 val);
255
+# define HAVE_ATOMIC128 0
256
+#endif /* Some definition for HAVE_ATOMIC128 */
257
+
258
+#endif /* QEMU_ATOMIC128_H */
259
diff --git a/tcg/tcg.h b/tcg/tcg.h
260
index XXXXXXX..XXXXXXX 100644
261
--- a/tcg/tcg.h
262
+++ b/tcg/tcg.h
263
@@ -XXX,XX +XXX,XX @@
264
#include "qemu/queue.h"
265
#include "tcg-mo.h"
266
#include "tcg-target.h"
267
+#include "qemu/int128.h"
268
269
/* XXX: make safe guess about sizes */
270
#define MAX_OP_PER_INSTR 266
271
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(xchg)
272
#undef GEN_ATOMIC_HELPER
273
#endif /* CONFIG_SOFTMMU */
274
275
-#ifdef CONFIG_ATOMIC128
276
-#include "qemu/int128.h"
277
-
278
-/* These aren't really a "proper" helpers because TCG cannot manage Int128.
279
- However, use the same format as the others, for use by the backends. */
280
+/*
281
+ * These aren't really a "proper" helpers because TCG cannot manage Int128.
282
+ * However, use the same format as the others, for use by the backends.
283
+ *
284
+ * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
285
+ * the ld/st functions are only defined if HAVE_ATOMIC128,
286
+ * as defined by <qemu/atomic128.h>.
287
+ */
288
Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
289
Int128 cmpv, Int128 newv,
290
TCGMemOpIdx oi, uintptr_t retaddr);
291
@@ -XXX,XX +XXX,XX @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
292
void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
293
TCGMemOpIdx oi, uintptr_t retaddr);
294
295
-#endif /* CONFIG_ATOMIC128 */
296
-
297
#endif /* TCG_H */
298
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
299
index XXXXXXX..XXXXXXX 100644
300
--- a/accel/tcg/cputlb.c
301
+++ b/accel/tcg/cputlb.c
302
@@ -XXX,XX +XXX,XX @@
303
#include "exec/log.h"
304
#include "exec/helper-proto.h"
305
#include "qemu/atomic.h"
306
+#include "qemu/atomic128.h"
307
308
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
309
/* #define DEBUG_TLB */
310
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
311
#include "atomic_template.h"
312
#endif
313
314
-#ifdef CONFIG_ATOMIC128
315
+#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
316
#define DATA_SIZE 16
317
#include "atomic_template.h"
318
#endif
319
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
320
index XXXXXXX..XXXXXXX 100644
321
--- a/accel/tcg/user-exec.c
322
+++ b/accel/tcg/user-exec.c
323
@@ -XXX,XX +XXX,XX @@
324
#include "exec/cpu_ldst.h"
325
#include "translate-all.h"
326
#include "exec/helper-proto.h"
327
+#include "qemu/atomic128.h"
328
329
#undef EAX
330
#undef ECX
331
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
332
/* The following is only callable from other helpers, and matches up
333
with the softmmu version. */
334
335
-#ifdef CONFIG_ATOMIC128
336
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
337
338
#undef EXTRA_ARGS
339
#undef ATOMIC_NAME
340
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
341
342
#define DATA_SIZE 16
343
#include "atomic_template.h"
344
-#endif /* CONFIG_ATOMIC128 */
345
+#endif
346
diff --git a/configure b/configure
347
index XXXXXXX..XXXXXXX 100755
348
--- a/configure
349
+++ b/configure
350
@@ -XXX,XX +XXX,XX @@ EOF
351
fi
352
fi
353
354
+cmpxchg128=no
355
+if test "$int128" = yes -a "$atomic128" = no; then
356
+ cat > $TMPC << EOF
357
+int main(void)
358
+{
359
+ unsigned __int128 x = 0, y = 0;
360
+ __sync_val_compare_and_swap_16(&x, y, x);
361
+ return 0;
362
+}
363
+EOF
364
+ if compile_prog "" "" ; then
365
+ cmpxchg128=yes
366
+ fi
367
+fi
368
+
369
#########################################
370
# See if 64-bit atomic operations are supported.
371
# Note that without __atomic builtins, we can only
372
@@ -XXX,XX +XXX,XX @@ if test "$atomic128" = "yes" ; then
373
echo "CONFIG_ATOMIC128=y" >> $config_host_mak
374
fi
375
376
+if test "$cmpxchg128" = "yes" ; then
377
+ echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
378
+fi
379
+
380
if test "$atomic64" = "yes" ; then
381
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
382
fi
383
--
384
2.17.2
385
386
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/i386/mem_helper.c | 9 ++++-----
6
1 file changed, 4 insertions(+), 5 deletions(-)
7
1
8
diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/i386/mem_helper.c
11
+++ b/target/i386/mem_helper.c
12
@@ -XXX,XX +XXX,XX @@
13
#include "exec/exec-all.h"
14
#include "exec/cpu_ldst.h"
15
#include "qemu/int128.h"
16
+#include "qemu/atomic128.h"
17
#include "tcg.h"
18
19
void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
20
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
21
22
if ((a0 & 0xf) != 0) {
23
raise_exception_ra(env, EXCP0D_GPF, ra);
24
- } else {
25
-#ifndef CONFIG_ATOMIC128
26
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
27
-#else
28
+ } else if (HAVE_CMPXCHG128) {
29
int eflags = cpu_cc_compute_all(env, CC_OP);
30
31
Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
32
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
33
eflags &= ~CC_Z;
34
}
35
CC_SRC = eflags;
36
-#endif
37
+ } else {
38
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
39
}
40
}
41
#endif
42
--
43
2.17.2
44
45
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/arm/helper-a64.c | 259 +++++++++++++++++++++-------------------
5
1 file changed, 133 insertions(+), 126 deletions(-)
6
1
7
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/arm/helper-a64.c
10
+++ b/target/arm/helper-a64.c
11
@@ -XXX,XX +XXX,XX @@
12
#include "exec/exec-all.h"
13
#include "exec/cpu_ldst.h"
14
#include "qemu/int128.h"
15
+#include "qemu/atomic128.h"
16
#include "tcg.h"
17
#include "fpu/softfloat.h"
18
#include <zlib.h> /* For crc32 */
19
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
20
return crc32c(acc, buf, bytes) ^ 0xffffffff;
21
}
22
23
-/* Returns 0 on success; 1 otherwise. */
24
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
25
- uint64_t new_lo, uint64_t new_hi,
26
- bool parallel, uintptr_t ra)
27
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
28
+ uint64_t new_lo, uint64_t new_hi)
29
{
30
- Int128 oldv, cmpv, newv;
31
+ Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
32
+ Int128 newv = int128_make128(new_lo, new_hi);
33
+ Int128 oldv;
34
+ uintptr_t ra = GETPC();
35
+ uint64_t o0, o1;
36
bool success;
37
38
- cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
39
- newv = int128_make128(new_lo, new_hi);
40
-
41
- if (parallel) {
42
-#ifndef CONFIG_ATOMIC128
43
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
44
-#else
45
- int mem_idx = cpu_mmu_index(env, false);
46
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
47
- oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
48
- success = int128_eq(oldv, cmpv);
49
-#endif
50
- } else {
51
- uint64_t o0, o1;
52
-
53
#ifdef CONFIG_USER_ONLY
54
- /* ??? Enforce alignment. */
55
- uint64_t *haddr = g2h(addr);
56
+ /* ??? Enforce alignment. */
57
+ uint64_t *haddr = g2h(addr);
58
59
- helper_retaddr = ra;
60
- o0 = ldq_le_p(haddr + 0);
61
- o1 = ldq_le_p(haddr + 1);
62
- oldv = int128_make128(o0, o1);
63
+ helper_retaddr = ra;
64
+ o0 = ldq_le_p(haddr + 0);
65
+ o1 = ldq_le_p(haddr + 1);
66
+ oldv = int128_make128(o0, o1);
67
68
- success = int128_eq(oldv, cmpv);
69
- if (success) {
70
- stq_le_p(haddr + 0, int128_getlo(newv));
71
- stq_le_p(haddr + 1, int128_gethi(newv));
72
- }
73
- helper_retaddr = 0;
74
-#else
75
- int mem_idx = cpu_mmu_index(env, false);
76
- TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
77
- TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
78
-
79
- o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
80
- o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
81
- oldv = int128_make128(o0, o1);
82
-
83
- success = int128_eq(oldv, cmpv);
84
- if (success) {
85
- helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
86
- helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
87
- }
88
-#endif
89
+ success = int128_eq(oldv, cmpv);
90
+ if (success) {
91
+ stq_le_p(haddr + 0, int128_getlo(newv));
92
+ stq_le_p(haddr + 1, int128_gethi(newv));
93
}
94
+ helper_retaddr = 0;
95
+#else
96
+ int mem_idx = cpu_mmu_index(env, false);
97
+ TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
98
+ TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
99
+
100
+ o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
101
+ o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
102
+ oldv = int128_make128(o0, o1);
103
+
104
+ success = int128_eq(oldv, cmpv);
105
+ if (success) {
106
+ helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
107
+ helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
108
+ }
109
+#endif
110
111
return !success;
112
}
113
114
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
115
- uint64_t new_lo, uint64_t new_hi)
116
-{
117
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
118
-}
119
-
120
uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
121
uint64_t new_lo, uint64_t new_hi)
122
-{
123
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
124
-}
125
-
126
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
127
- uint64_t new_lo, uint64_t new_hi,
128
- bool parallel, uintptr_t ra)
129
{
130
Int128 oldv, cmpv, newv;
131
+ uintptr_t ra = GETPC();
132
bool success;
133
+ int mem_idx;
134
+ TCGMemOpIdx oi;
135
136
- /* high and low need to be switched here because this is not actually a
137
- * 128bit store but two doublewords stored consecutively
138
- */
139
- cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
140
- newv = int128_make128(new_hi, new_lo);
141
-
142
- if (parallel) {
143
-#ifndef CONFIG_ATOMIC128
144
+ if (!HAVE_CMPXCHG128) {
145
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
146
-#else
147
- int mem_idx = cpu_mmu_index(env, false);
148
- TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
149
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
150
- success = int128_eq(oldv, cmpv);
151
-#endif
152
- } else {
153
- uint64_t o0, o1;
154
-
155
-#ifdef CONFIG_USER_ONLY
156
- /* ??? Enforce alignment. */
157
- uint64_t *haddr = g2h(addr);
158
-
159
- helper_retaddr = ra;
160
- o1 = ldq_be_p(haddr + 0);
161
- o0 = ldq_be_p(haddr + 1);
162
- oldv = int128_make128(o0, o1);
163
-
164
- success = int128_eq(oldv, cmpv);
165
- if (success) {
166
- stq_be_p(haddr + 0, int128_gethi(newv));
167
- stq_be_p(haddr + 1, int128_getlo(newv));
168
- }
169
- helper_retaddr = 0;
170
-#else
171
- int mem_idx = cpu_mmu_index(env, false);
172
- TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
173
- TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
174
-
175
- o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
176
- o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
177
- oldv = int128_make128(o0, o1);
178
-
179
- success = int128_eq(oldv, cmpv);
180
- if (success) {
181
- helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
182
- helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
183
- }
184
-#endif
185
}
186
187
+ mem_idx = cpu_mmu_index(env, false);
188
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
189
+
190
+ cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
191
+ newv = int128_make128(new_lo, new_hi);
192
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
193
+
194
+ success = int128_eq(oldv, cmpv);
195
return !success;
196
}
197
198
uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
199
uint64_t new_lo, uint64_t new_hi)
200
{
201
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
202
+ /*
203
+ * High and low need to be switched here because this is not actually a
204
+ * 128bit store but two doublewords stored consecutively
205
+ */
206
+ Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
207
+ Int128 newv = int128_make128(new_lo, new_hi);
208
+ Int128 oldv;
209
+ uintptr_t ra = GETPC();
210
+ uint64_t o0, o1;
211
+ bool success;
212
+
213
+#ifdef CONFIG_USER_ONLY
214
+ /* ??? Enforce alignment. */
215
+ uint64_t *haddr = g2h(addr);
216
+
217
+ helper_retaddr = ra;
218
+ o1 = ldq_be_p(haddr + 0);
219
+ o0 = ldq_be_p(haddr + 1);
220
+ oldv = int128_make128(o0, o1);
221
+
222
+ success = int128_eq(oldv, cmpv);
223
+ if (success) {
224
+ stq_be_p(haddr + 0, int128_gethi(newv));
225
+ stq_be_p(haddr + 1, int128_getlo(newv));
226
+ }
227
+ helper_retaddr = 0;
228
+#else
229
+ int mem_idx = cpu_mmu_index(env, false);
230
+ TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
231
+ TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
232
+
233
+ o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
234
+ o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
235
+ oldv = int128_make128(o0, o1);
236
+
237
+ success = int128_eq(oldv, cmpv);
238
+ if (success) {
239
+ helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
240
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
241
+ }
242
+#endif
243
+
244
+ return !success;
245
}
246
247
uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
248
- uint64_t new_lo, uint64_t new_hi)
249
+ uint64_t new_lo, uint64_t new_hi)
250
{
251
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
252
+ Int128 oldv, cmpv, newv;
253
+ uintptr_t ra = GETPC();
254
+ bool success;
255
+ int mem_idx;
256
+ TCGMemOpIdx oi;
257
+
258
+ if (!HAVE_CMPXCHG128) {
259
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
260
+ }
261
+
262
+ mem_idx = cpu_mmu_index(env, false);
263
+ oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
264
+
265
+ /*
266
+ * High and low need to be switched here because this is not actually a
267
+ * 128bit store but two doublewords stored consecutively
268
+ */
269
+ cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
270
+ newv = int128_make128(new_hi, new_lo);
271
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
272
+
273
+ success = int128_eq(oldv, cmpv);
274
+ return !success;
275
}
276
277
/* Writes back the old data into Rs. */
278
void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
279
uint64_t new_lo, uint64_t new_hi)
280
{
281
- uintptr_t ra = GETPC();
282
-#ifndef CONFIG_ATOMIC128
283
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
284
-#else
285
Int128 oldv, cmpv, newv;
286
+ uintptr_t ra = GETPC();
287
+ int mem_idx;
288
+ TCGMemOpIdx oi;
289
+
290
+ if (!HAVE_CMPXCHG128) {
291
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
292
+ }
293
+
294
+ mem_idx = cpu_mmu_index(env, false);
295
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
296
297
cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
298
newv = int128_make128(new_lo, new_hi);
299
-
300
- int mem_idx = cpu_mmu_index(env, false);
301
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
302
oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
303
304
env->xregs[rs] = int128_getlo(oldv);
305
env->xregs[rs + 1] = int128_gethi(oldv);
306
-#endif
307
}
308
309
void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
310
uint64_t new_hi, uint64_t new_lo)
311
{
312
- uintptr_t ra = GETPC();
313
-#ifndef CONFIG_ATOMIC128
314
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
315
-#else
316
Int128 oldv, cmpv, newv;
317
+ uintptr_t ra = GETPC();
318
+ int mem_idx;
319
+ TCGMemOpIdx oi;
320
+
321
+ if (!HAVE_CMPXCHG128) {
322
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
323
+ }
324
+
325
+ mem_idx = cpu_mmu_index(env, false);
326
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
327
328
cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
329
newv = int128_make128(new_lo, new_hi);
330
-
331
- int mem_idx = cpu_mmu_index(env, false);
332
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
333
oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
334
335
env->xregs[rs + 1] = int128_getlo(oldv);
336
env->xregs[rs] = int128_gethi(oldv);
337
-#endif
338
}
339
340
/*
341
--
342
2.17.2
343
344
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/arm/helper-a64.c | 16 ++++------------
6
target/arm/translate-a64.c | 38 ++++++++++++++++++++++----------------
7
2 files changed, 26 insertions(+), 28 deletions(-)
8
1
9
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/helper-a64.c
12
+++ b/target/arm/helper-a64.c
13
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
14
int mem_idx;
15
TCGMemOpIdx oi;
16
17
- if (!HAVE_CMPXCHG128) {
18
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
19
- }
20
+ assert(HAVE_CMPXCHG128);
21
22
mem_idx = cpu_mmu_index(env, false);
23
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
24
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
25
int mem_idx;
26
TCGMemOpIdx oi;
27
28
- if (!HAVE_CMPXCHG128) {
29
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
30
- }
31
+ assert(HAVE_CMPXCHG128);
32
33
mem_idx = cpu_mmu_index(env, false);
34
oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
35
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
36
int mem_idx;
37
TCGMemOpIdx oi;
38
39
- if (!HAVE_CMPXCHG128) {
40
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
41
- }
42
+ assert(HAVE_CMPXCHG128);
43
44
mem_idx = cpu_mmu_index(env, false);
45
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
46
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
47
int mem_idx;
48
TCGMemOpIdx oi;
49
50
- if (!HAVE_CMPXCHG128) {
51
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
52
- }
53
+ assert(HAVE_CMPXCHG128);
54
55
mem_idx = cpu_mmu_index(env, false);
56
oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
57
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate-a64.c
60
+++ b/target/arm/translate-a64.c
61
@@ -XXX,XX +XXX,XX @@
62
63
#include "trace-tcg.h"
64
#include "translate-a64.h"
65
+#include "qemu/atomic128.h"
66
67
static TCGv_i64 cpu_X[32];
68
static TCGv_i64 cpu_pc;
69
@@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
70
get_mem_index(s),
71
MO_64 | MO_ALIGN | s->be_data);
72
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
73
- } else if (s->be_data == MO_LE) {
74
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
75
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
76
+ if (!HAVE_CMPXCHG128) {
77
+ gen_helper_exit_atomic(cpu_env);
78
+ s->base.is_jmp = DISAS_NORETURN;
79
+ } else if (s->be_data == MO_LE) {
80
gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
81
cpu_exclusive_addr,
82
cpu_reg(s, rt),
83
cpu_reg(s, rt2));
84
} else {
85
- gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
86
- cpu_reg(s, rt), cpu_reg(s, rt2));
87
- }
88
- } else {
89
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
90
gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
91
cpu_exclusive_addr,
92
cpu_reg(s, rt),
93
cpu_reg(s, rt2));
94
- } else {
95
- gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
96
- cpu_reg(s, rt), cpu_reg(s, rt2));
97
}
98
+ } else if (s->be_data == MO_LE) {
99
+ gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
100
+ cpu_reg(s, rt), cpu_reg(s, rt2));
101
+ } else {
102
+ gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
103
+ cpu_reg(s, rt), cpu_reg(s, rt2));
104
}
105
} else {
106
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
107
@@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
108
}
109
tcg_temp_free_i64(cmp);
110
} else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
111
- TCGv_i32 tcg_rs = tcg_const_i32(rs);
112
-
113
- if (s->be_data == MO_LE) {
114
- gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
115
+ if (HAVE_CMPXCHG128) {
116
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
117
+ if (s->be_data == MO_LE) {
118
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
119
+ } else {
120
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
121
+ }
122
+ tcg_temp_free_i32(tcg_rs);
123
} else {
124
- gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
125
+ gen_helper_exit_atomic(cpu_env);
126
+ s->base.is_jmp = DISAS_NORETURN;
127
}
128
- tcg_temp_free_i32(tcg_rs);
129
} else {
130
TCGv_i64 d1 = tcg_temp_new_i64();
131
TCGv_i64 d2 = tcg_temp_new_i64();
132
--
133
2.17.2
134
135
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/ppc/helper.h | 2 +-
5
target/ppc/mem_helper.c | 33 ++++++++++--
6
target/ppc/translate.c | 115 +++++++++++++++++++++-------------------
7
3 files changed, 88 insertions(+), 62 deletions(-)
8
1
9
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/ppc/helper.h
12
+++ b/target/ppc/helper.h
13
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
14
DEF_HELPER_1(tbegin, void, env)
15
DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
16
17
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
18
+#ifdef TARGET_PPC64
19
DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
20
DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
21
DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
22
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/ppc/mem_helper.c
25
+++ b/target/ppc/mem_helper.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "exec/cpu_ldst.h"
28
#include "tcg.h"
29
#include "internal.h"
30
+#include "qemu/atomic128.h"
31
32
//#define DEBUG_OP
33
34
@@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
35
return i;
36
}
37
38
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
39
+#ifdef TARGET_PPC64
40
uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
41
uint32_t opidx)
42
{
43
- Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
44
+ Int128 ret;
45
+
46
+ /* We will have raised EXCP_ATOMIC from the translator. */
47
+ assert(HAVE_ATOMIC128);
48
+ ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
49
env->retxh = int128_gethi(ret);
50
return int128_getlo(ret);
51
}
52
@@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
53
uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
54
uint32_t opidx)
55
{
56
- Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
57
+ Int128 ret;
58
+
59
+ /* We will have raised EXCP_ATOMIC from the translator. */
60
+ assert(HAVE_ATOMIC128);
61
+ ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
62
env->retxh = int128_gethi(ret);
63
return int128_getlo(ret);
64
}
65
@@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
66
void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
67
uint64_t lo, uint64_t hi, uint32_t opidx)
68
{
69
- Int128 val = int128_make128(lo, hi);
70
+ Int128 val;
71
+
72
+ /* We will have raised EXCP_ATOMIC from the translator. */
73
+ assert(HAVE_ATOMIC128);
74
+ val = int128_make128(lo, hi);
75
helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
76
}
77
78
void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
79
uint64_t lo, uint64_t hi, uint32_t opidx)
80
{
81
- Int128 val = int128_make128(lo, hi);
82
+ Int128 val;
83
+
84
+ /* We will have raised EXCP_ATOMIC from the translator. */
85
+ assert(HAVE_ATOMIC128);
86
+ val = int128_make128(lo, hi);
87
helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
88
}
89
90
@@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
91
{
92
bool success = false;
93
94
+ /* We will have raised EXCP_ATOMIC from the translator. */
95
+ assert(HAVE_CMPXCHG128);
96
+
97
if (likely(addr == env->reserve_addr)) {
98
Int128 oldv, cmpv, newv;
99
100
@@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
101
{
102
bool success = false;
103
104
+ /* We will have raised EXCP_ATOMIC from the translator. */
105
+ assert(HAVE_CMPXCHG128);
106
+
107
if (likely(addr == env->reserve_addr)) {
108
Int128 oldv, cmpv, newv;
109
110
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/ppc/translate.c
113
+++ b/target/ppc/translate.c
114
@@ -XXX,XX +XXX,XX @@
115
#include "trace-tcg.h"
116
#include "exec/translator.h"
117
#include "exec/log.h"
118
+#include "qemu/atomic128.h"
119
120
121
#define CPU_SINGLE_STEP 0x1
122
@@ -XXX,XX +XXX,XX @@ static void gen_lq(DisasContext *ctx)
123
hi = cpu_gpr[rd];
124
125
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
126
-#ifdef CONFIG_ATOMIC128
127
- TCGv_i32 oi = tcg_temp_new_i32();
128
- if (ctx->le_mode) {
129
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
130
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
131
+ if (HAVE_ATOMIC128) {
132
+ TCGv_i32 oi = tcg_temp_new_i32();
133
+ if (ctx->le_mode) {
134
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
135
+ gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
136
+ } else {
137
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
138
+ gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
139
+ }
140
+ tcg_temp_free_i32(oi);
141
+ tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
142
} else {
143
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
144
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
145
+ /* Restart with exclusive lock. */
146
+ gen_helper_exit_atomic(cpu_env);
147
+ ctx->base.is_jmp = DISAS_NORETURN;
148
}
149
- tcg_temp_free_i32(oi);
150
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
151
-#else
152
- /* Restart with exclusive lock. */
153
- gen_helper_exit_atomic(cpu_env);
154
- ctx->base.is_jmp = DISAS_NORETURN;
155
-#endif
156
} else if (ctx->le_mode) {
157
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
158
gen_addr_add(ctx, EA, EA, 8);
159
@@ -XXX,XX +XXX,XX @@ static void gen_std(DisasContext *ctx)
160
hi = cpu_gpr[rs];
161
162
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
163
-#ifdef CONFIG_ATOMIC128
164
- TCGv_i32 oi = tcg_temp_new_i32();
165
- if (ctx->le_mode) {
166
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
167
- gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
168
+ if (HAVE_ATOMIC128) {
169
+ TCGv_i32 oi = tcg_temp_new_i32();
170
+ if (ctx->le_mode) {
171
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
172
+ gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
173
+ } else {
174
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
175
+ gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
176
+ }
177
+ tcg_temp_free_i32(oi);
178
} else {
179
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
180
- gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
181
+ /* Restart with exclusive lock. */
182
+ gen_helper_exit_atomic(cpu_env);
183
+ ctx->base.is_jmp = DISAS_NORETURN;
184
}
185
- tcg_temp_free_i32(oi);
186
-#else
187
- /* Restart with exclusive lock. */
188
- gen_helper_exit_atomic(cpu_env);
189
- ctx->base.is_jmp = DISAS_NORETURN;
190
-#endif
191
} else if (ctx->le_mode) {
192
tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
193
gen_addr_add(ctx, EA, EA, 8);
194
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
195
hi = cpu_gpr[rd];
196
197
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
198
-#ifdef CONFIG_ATOMIC128
199
- TCGv_i32 oi = tcg_temp_new_i32();
200
- if (ctx->le_mode) {
201
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
202
- ctx->mem_idx));
203
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
204
+ if (HAVE_ATOMIC128) {
205
+ TCGv_i32 oi = tcg_temp_new_i32();
206
+ if (ctx->le_mode) {
207
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
208
+ ctx->mem_idx));
209
+ gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
210
+ } else {
211
+ tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
212
+ ctx->mem_idx));
213
+ gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
214
+ }
215
+ tcg_temp_free_i32(oi);
216
+ tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
217
} else {
218
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
219
- ctx->mem_idx));
220
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
221
+ /* Restart with exclusive lock. */
222
+ gen_helper_exit_atomic(cpu_env);
223
+ ctx->base.is_jmp = DISAS_NORETURN;
224
+ tcg_temp_free(EA);
225
+ return;
226
}
227
- tcg_temp_free_i32(oi);
228
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
229
-#else
230
- /* Restart with exclusive lock. */
231
- gen_helper_exit_atomic(cpu_env);
232
- ctx->base.is_jmp = DISAS_NORETURN;
233
- tcg_temp_free(EA);
234
- return;
235
-#endif
236
} else if (ctx->le_mode) {
237
tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
238
tcg_gen_mov_tl(cpu_reserve, EA);
239
@@ -XXX,XX +XXX,XX @@ static void gen_stqcx_(DisasContext *ctx)
240
hi = cpu_gpr[rs];
241
242
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
243
- TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
244
-#ifdef CONFIG_ATOMIC128
245
- if (ctx->le_mode) {
246
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
247
+ if (HAVE_CMPXCHG128) {
248
+ TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
249
+ if (ctx->le_mode) {
250
+ gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
251
+ EA, lo, hi, oi);
252
+ } else {
253
+ gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
254
+ EA, lo, hi, oi);
255
+ }
256
+ tcg_temp_free_i32(oi);
257
} else {
258
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
259
+ /* Restart with exclusive lock. */
260
+ gen_helper_exit_atomic(cpu_env);
261
+ ctx->base.is_jmp = DISAS_NORETURN;
262
}
263
-#else
264
- /* Restart with exclusive lock. */
265
- gen_helper_exit_atomic(cpu_env);
266
- ctx->base.is_jmp = DISAS_NORETURN;
267
-#endif
268
tcg_temp_free(EA);
269
- tcg_temp_free_i32(oi);
270
} else {
271
TCGLabel *lab_fail = gen_new_label();
272
TCGLabel *lab_over = gen_new_label();
273
--
274
2.17.2
275
276
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/s390x/mem_helper.c | 92 +++++++++++++++++----------------------
5
1 file changed, 41 insertions(+), 51 deletions(-)
6
1
7
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/s390x/mem_helper.c
10
+++ b/target/s390x/mem_helper.c
11
@@ -XXX,XX +XXX,XX @@
12
#include "exec/exec-all.h"
13
#include "exec/cpu_ldst.h"
14
#include "qemu/int128.h"
15
+#include "qemu/atomic128.h"
16
17
#if !defined(CONFIG_USER_ONLY)
18
#include "hw/s390x/storage-keys.h"
19
@@ -XXX,XX +XXX,XX @@ static void do_cdsg(CPUS390XState *env, uint64_t addr,
20
bool fail;
21
22
if (parallel) {
23
-#ifndef CONFIG_ATOMIC128
24
+#if !HAVE_CMPXCHG128
25
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
26
#else
27
int mem_idx = cpu_mmu_index(env, false);
28
@@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
29
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
30
uint64_t a2, bool parallel)
31
{
32
-#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
33
uint32_t mem_idx = cpu_mmu_index(env, false);
34
-#endif
35
uintptr_t ra = GETPC();
36
uint32_t fc = extract32(env->regs[0], 0, 8);
37
uint32_t sc = extract32(env->regs[0], 8, 8);
38
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
39
probe_write(env, a2, 0, mem_idx, ra);
40
#endif
41
42
- /* Note that the compare-and-swap is atomic, and the store is atomic, but
43
- the complete operation is not. Therefore we do not need to assert serial
44
- context in order to implement this. That said, restart early if we can't
45
- support either operation that is supposed to be atomic. */
46
+ /*
47
+ * Note that the compare-and-swap is atomic, and the store is atomic,
48
+ * but the complete operation is not. Therefore we do not need to
49
+ * assert serial context in order to implement this. That said,
50
+ * restart early if we can't support either operation that is supposed
51
+ * to be atomic.
52
+ */
53
if (parallel) {
54
- int mask = 0;
55
-#if !defined(CONFIG_ATOMIC64)
56
- mask = -8;
57
-#elif !defined(CONFIG_ATOMIC128)
58
- mask = -16;
59
+ uint32_t max = 2;
60
+#ifdef CONFIG_ATOMIC64
61
+ max = 3;
62
#endif
63
- if (((4 << fc) | (1 << sc)) & mask) {
64
+ if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
65
+ (HAVE_ATOMIC128 ? 0 : sc > max)) {
66
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
67
}
68
}
69
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
70
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
71
Int128 ov;
72
73
- if (parallel) {
74
-#ifdef CONFIG_ATOMIC128
75
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
76
- ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
77
- cc = !int128_eq(ov, cv);
78
-#else
79
- /* Note that we asserted !parallel above. */
80
- g_assert_not_reached();
81
-#endif
82
- } else {
83
+ if (!parallel) {
84
uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
85
uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
86
87
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
88
89
cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
90
cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
91
+ } else if (HAVE_CMPXCHG128) {
92
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
93
+ ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
94
+ cc = !int128_eq(ov, cv);
95
+ } else {
96
+ /* Note that we asserted !parallel above. */
97
+ g_assert_not_reached();
98
}
99
100
env->regs[r3 + 0] = int128_gethi(ov);
101
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
102
cpu_stq_data_ra(env, a2, svh, ra);
103
break;
104
case 4:
105
- if (parallel) {
106
-#ifdef CONFIG_ATOMIC128
107
+ if (!parallel) {
108
+ cpu_stq_data_ra(env, a2 + 0, svh, ra);
109
+ cpu_stq_data_ra(env, a2 + 8, svl, ra);
110
+ } else if (HAVE_ATOMIC128) {
111
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
112
Int128 sv = int128_make128(svl, svh);
113
helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
114
-#else
115
+ } else {
116
/* Note that we asserted !parallel above. */
117
g_assert_not_reached();
118
-#endif
119
- } else {
120
- cpu_stq_data_ra(env, a2 + 0, svh, ra);
121
- cpu_stq_data_ra(env, a2 + 8, svl, ra);
122
}
123
break;
124
default:
125
@@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
126
uintptr_t ra = GETPC();
127
uint64_t hi, lo;
128
129
- if (parallel) {
130
-#ifndef CONFIG_ATOMIC128
131
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
132
-#else
133
+ if (!parallel) {
134
+ check_alignment(env, addr, 16, ra);
135
+ hi = cpu_ldq_data_ra(env, addr + 0, ra);
136
+ lo = cpu_ldq_data_ra(env, addr + 8, ra);
137
+ } else if (HAVE_ATOMIC128) {
138
int mem_idx = cpu_mmu_index(env, false);
139
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
140
Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
141
hi = int128_gethi(v);
142
lo = int128_getlo(v);
143
-#endif
144
} else {
145
- check_alignment(env, addr, 16, ra);
146
-
147
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
148
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
149
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
150
}
151
152
env->retxl = lo;
153
@@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
154
{
155
uintptr_t ra = GETPC();
156
157
- if (parallel) {
158
-#ifndef CONFIG_ATOMIC128
159
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
160
-#else
161
- int mem_idx = cpu_mmu_index(env, false);
162
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
163
-
164
- Int128 v = int128_make128(low, high);
165
- helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
166
-#endif
167
- } else {
168
+ if (!parallel) {
169
check_alignment(env, addr, 16, ra);
170
-
171
cpu_stq_data_ra(env, addr + 0, high, ra);
172
cpu_stq_data_ra(env, addr + 8, low, ra);
173
+ } else if (HAVE_ATOMIC128) {
174
+ int mem_idx = cpu_mmu_index(env, false);
175
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
176
+ Int128 v = int128_make128(low, high);
177
+ helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
178
+ } else {
179
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
180
}
181
}
182
183
--
184
2.17.2
185
186
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/s390x/mem_helper.c | 128 ++++++++++++++++++--------------------
5
1 file changed, 61 insertions(+), 67 deletions(-)
6
1
7
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/s390x/mem_helper.c
10
+++ b/target/s390x/mem_helper.c
11
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
12
return cc;
13
}
14
15
-static void do_cdsg(CPUS390XState *env, uint64_t addr,
16
- uint32_t r1, uint32_t r3, bool parallel)
17
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
18
+ uint32_t r1, uint32_t r3)
19
{
20
uintptr_t ra = GETPC();
21
Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
22
Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
23
Int128 oldv;
24
+ uint64_t oldh, oldl;
25
bool fail;
26
27
- if (parallel) {
28
-#if !HAVE_CMPXCHG128
29
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
30
-#else
31
- int mem_idx = cpu_mmu_index(env, false);
32
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
33
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
34
- fail = !int128_eq(oldv, cmpv);
35
-#endif
36
- } else {
37
- uint64_t oldh, oldl;
38
+ check_alignment(env, addr, 16, ra);
39
40
- check_alignment(env, addr, 16, ra);
41
+ oldh = cpu_ldq_data_ra(env, addr + 0, ra);
42
+ oldl = cpu_ldq_data_ra(env, addr + 8, ra);
43
44
- oldh = cpu_ldq_data_ra(env, addr + 0, ra);
45
- oldl = cpu_ldq_data_ra(env, addr + 8, ra);
46
-
47
- oldv = int128_make128(oldl, oldh);
48
- fail = !int128_eq(oldv, cmpv);
49
- if (fail) {
50
- newv = oldv;
51
- }
52
-
53
- cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
54
- cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
55
+ oldv = int128_make128(oldl, oldh);
56
+ fail = !int128_eq(oldv, cmpv);
57
+ if (fail) {
58
+ newv = oldv;
59
}
60
61
+ cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
62
+ cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
63
+
64
env->cc_op = fail;
65
env->regs[r1] = int128_gethi(oldv);
66
env->regs[r1 + 1] = int128_getlo(oldv);
67
}
68
69
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
70
- uint32_t r1, uint32_t r3)
71
-{
72
- do_cdsg(env, addr, r1, r3, false);
73
-}
74
-
75
void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
76
uint32_t r1, uint32_t r3)
77
{
78
- do_cdsg(env, addr, r1, r3, true);
79
+ uintptr_t ra = GETPC();
80
+ Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
81
+ Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
82
+ int mem_idx;
83
+ TCGMemOpIdx oi;
84
+ Int128 oldv;
85
+ bool fail;
86
+
87
+ if (!HAVE_CMPXCHG128) {
88
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
89
+ }
90
+
91
+ mem_idx = cpu_mmu_index(env, false);
92
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
93
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
94
+ fail = !int128_eq(oldv, cmpv);
95
+
96
+ env->cc_op = fail;
97
+ env->regs[r1] = int128_gethi(oldv);
98
+ env->regs[r1 + 1] = int128_getlo(oldv);
99
}
100
101
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
102
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
103
#endif
104
105
/* load pair from quadword */
106
-static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
107
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
108
{
109
uintptr_t ra = GETPC();
110
uint64_t hi, lo;
111
112
- if (!parallel) {
113
- check_alignment(env, addr, 16, ra);
114
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
115
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
116
- } else if (HAVE_ATOMIC128) {
117
+ check_alignment(env, addr, 16, ra);
118
+ hi = cpu_ldq_data_ra(env, addr + 0, ra);
119
+ lo = cpu_ldq_data_ra(env, addr + 8, ra);
120
+
121
+ env->retxl = lo;
122
+ return hi;
123
+}
124
+
125
+uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
126
+{
127
+ uintptr_t ra = GETPC();
128
+ uint64_t hi, lo;
129
+
130
+ if (HAVE_ATOMIC128) {
131
int mem_idx = cpu_mmu_index(env, false);
132
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
133
Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
134
@@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
135
return hi;
136
}
137
138
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
139
-{
140
- return do_lpq(env, addr, false);
141
-}
142
-
143
-uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
144
-{
145
- return do_lpq(env, addr, true);
146
-}
147
-
148
/* store pair to quadword */
149
-static void do_stpq(CPUS390XState *env, uint64_t addr,
150
- uint64_t low, uint64_t high, bool parallel)
151
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
152
+ uint64_t low, uint64_t high)
153
{
154
uintptr_t ra = GETPC();
155
156
- if (!parallel) {
157
- check_alignment(env, addr, 16, ra);
158
- cpu_stq_data_ra(env, addr + 0, high, ra);
159
- cpu_stq_data_ra(env, addr + 8, low, ra);
160
- } else if (HAVE_ATOMIC128) {
161
+ check_alignment(env, addr, 16, ra);
162
+ cpu_stq_data_ra(env, addr + 0, high, ra);
163
+ cpu_stq_data_ra(env, addr + 8, low, ra);
164
+}
165
+
166
+void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
167
+ uint64_t low, uint64_t high)
168
+{
169
+ uintptr_t ra = GETPC();
170
+
171
+ if (HAVE_ATOMIC128) {
172
int mem_idx = cpu_mmu_index(env, false);
173
TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
174
Int128 v = int128_make128(low, high);
175
@@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
176
}
177
}
178
179
-void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
180
- uint64_t low, uint64_t high)
181
-{
182
- do_stpq(env, addr, low, high, false);
183
-}
184
-
185
-void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
186
- uint64_t low, uint64_t high)
187
-{
188
- do_stpq(env, addr, low, high, true);
189
-}
190
-
191
/* Execute instruction. This instruction executes an insn modified with
192
the contents of r1. It does not change the executed instruction in memory;
193
it does not change the program counter.
194
--
195
2.17.2
196
197
diff view generated by jsdifflib
Deleted patch
1
When op raises an exception, it may not have initialized the output
2
temps that would be written back by wout or cout.
3
1
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/s390x/translate.c | 20 +++++++++++++++-----
8
1 file changed, 15 insertions(+), 5 deletions(-)
9
10
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/translate.c
13
+++ b/target/s390x/translate.c
14
@@ -XXX,XX +XXX,XX @@ struct DisasInsn {
15
16
const char *name;
17
18
+ /* Pre-process arguments before HELP_OP. */
19
void (*help_in1)(DisasContext *, DisasFields *, DisasOps *);
20
void (*help_in2)(DisasContext *, DisasFields *, DisasOps *);
21
void (*help_prep)(DisasContext *, DisasFields *, DisasOps *);
22
+
23
+ /*
24
+ * Post-process output after HELP_OP.
25
+ * Note that these are not called if HELP_OP returns DISAS_NORETURN.
26
+ */
27
void (*help_wout)(DisasContext *, DisasFields *, DisasOps *);
28
void (*help_cout)(DisasContext *, DisasOps *);
29
+
30
+ /* Implement the operation itself. */
31
DisasJumpType (*help_op)(DisasContext *, DisasOps *);
32
33
uint64_t data;
34
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
35
if (insn->help_op) {
36
ret = insn->help_op(s, &o);
37
}
38
- if (insn->help_wout) {
39
- insn->help_wout(s, &f, &o);
40
- }
41
- if (insn->help_cout) {
42
- insn->help_cout(s, &o);
43
+ if (ret != DISAS_NORETURN) {
44
+ if (insn->help_wout) {
45
+ insn->help_wout(s, &f, &o);
46
+ }
47
+ if (insn->help_cout) {
48
+ insn->help_cout(s, &o);
49
+ }
50
}
51
52
/* Free any temporaries created by the helpers. */
53
--
54
2.17.2
55
56
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: David Hildenbrand <david@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/s390x/mem_helper.c | 40 +++++++++++++++++++--------------------
5
target/s390x/translate.c | 25 +++++++++++++++++-------
6
2 files changed, 38 insertions(+), 27 deletions(-)
7
1
8
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/s390x/mem_helper.c
11
+++ b/target/s390x/mem_helper.c
12
@@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
13
Int128 oldv;
14
bool fail;
15
16
- if (!HAVE_CMPXCHG128) {
17
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
18
- }
19
+ assert(HAVE_CMPXCHG128);
20
21
mem_idx = cpu_mmu_index(env, false);
22
oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
23
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
24
{
25
uintptr_t ra = GETPC();
26
uint64_t hi, lo;
27
+ int mem_idx;
28
+ TCGMemOpIdx oi;
29
+ Int128 v;
30
31
- if (HAVE_ATOMIC128) {
32
- int mem_idx = cpu_mmu_index(env, false);
33
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
34
- Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
35
- hi = int128_gethi(v);
36
- lo = int128_getlo(v);
37
- } else {
38
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
39
- }
40
+ assert(HAVE_ATOMIC128);
41
+
42
+ mem_idx = cpu_mmu_index(env, false);
43
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
44
+ v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
45
+ hi = int128_gethi(v);
46
+ lo = int128_getlo(v);
47
48
env->retxl = lo;
49
return hi;
50
@@ -XXX,XX +XXX,XX @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
51
uint64_t low, uint64_t high)
52
{
53
uintptr_t ra = GETPC();
54
+ int mem_idx;
55
+ TCGMemOpIdx oi;
56
+ Int128 v;
57
58
- if (HAVE_ATOMIC128) {
59
- int mem_idx = cpu_mmu_index(env, false);
60
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
61
- Int128 v = int128_make128(low, high);
62
- helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
63
- } else {
64
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
65
- }
66
+ assert(HAVE_ATOMIC128);
67
+
68
+ mem_idx = cpu_mmu_index(env, false);
69
+ oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
70
+ v = int128_make128(low, high);
71
+ helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
72
}
73
74
/* Execute instruction. This instruction executes an insn modified with
75
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/s390x/translate.c
78
+++ b/target/s390x/translate.c
79
@@ -XXX,XX +XXX,XX @@
80
#include "trace-tcg.h"
81
#include "exec/translator.h"
82
#include "exec/log.h"
83
+#include "qemu/atomic128.h"
84
85
86
/* Information that (most) every instruction needs to manipulate. */
87
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
88
int r3 = get_field(s->fields, r3);
89
int d2 = get_field(s->fields, d2);
90
int b2 = get_field(s->fields, b2);
91
+ DisasJumpType ret = DISAS_NEXT;
92
TCGv_i64 addr;
93
TCGv_i32 t_r1, t_r3;
94
95
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
96
addr = get_address(s, 0, b2, d2);
97
t_r1 = tcg_const_i32(r1);
98
t_r3 = tcg_const_i32(r3);
99
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
100
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
101
+ gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
102
+ } else if (HAVE_CMPXCHG128) {
103
gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
104
} else {
105
- gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
106
+ gen_helper_exit_atomic(cpu_env);
107
+ ret = DISAS_NORETURN;
108
}
109
tcg_temp_free_i64(addr);
110
tcg_temp_free_i32(t_r1);
111
tcg_temp_free_i32(t_r3);
112
113
set_cc_static(s);
114
- return DISAS_NEXT;
115
+ return ret;
116
}
117
118
static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
119
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
120
121
static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
122
{
123
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
124
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
125
+ gen_helper_lpq(o->out, cpu_env, o->in2);
126
+ } else if (HAVE_ATOMIC128) {
127
gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
128
} else {
129
- gen_helper_lpq(o->out, cpu_env, o->in2);
130
+ gen_helper_exit_atomic(cpu_env);
131
+ return DISAS_NORETURN;
132
}
133
return_low128(o->out2);
134
return DISAS_NEXT;
135
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
136
137
static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
138
{
139
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
140
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
141
+ gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
142
+ } else if (HAVE_ATOMIC128) {
143
gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
144
} else {
145
- gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
146
+ gen_helper_exit_atomic(cpu_env);
147
+ return DISAS_NORETURN;
148
}
149
return DISAS_NEXT;
150
}
151
--
152
2.17.2
153
154
diff view generated by jsdifflib
Deleted patch
1
From: "Emilio G. Cota" <cota@braap.org>
2
1
3
Updates can come from other threads, so readers that do not
4
take tlb_lock must use atomic_read to avoid undefined
5
behaviour (UB).
6
7
This completes the conversion to tlb_lock. This conversion results
8
on average in no performance loss, as the following experiments
9
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
10
11
1. aarch64 bootup+shutdown test:
12
13
- Before:
14
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
15
16
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
17
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
18
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
19
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
20
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
21
22
7.504481349 seconds time elapsed ( +- 0.14% )
23
24
- After:
25
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
26
27
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
28
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
29
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
30
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
31
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
32
33
7.474970463 seconds time elapsed ( +- 0.07% )
34
35
2. SPEC06int:
36
SPEC06int (test set)
37
[Y axis: Speedup over master]
38
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
39
| |
40
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
41
| +++ | +++ tlb-lock-v3 (spinl|ck) |
42
| +++ | | +++ +++ | | |
43
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
44
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
45
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
46
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
47
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
48
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
49
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
50
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
51
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
52
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
53
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
54
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
55
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
56
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
57
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
58
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
59
60
png: https://imgur.com/a/BHzpPTW
61
62
Notes:
63
- tlb-lock-v2 corresponds to an implementation with a mutex.
64
- tlb-lock-v3 corresponds to the current implementation, i.e.
65
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
66
67
Signed-off-by: Emilio G. Cota <cota@braap.org>
68
Message-Id: <20181016153840.25877-1-cota@braap.org>
69
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
70
---
71
accel/tcg/softmmu_template.h | 12 ++++++------
72
include/exec/cpu_ldst.h | 11 ++++++++++-
73
include/exec/cpu_ldst_template.h | 2 +-
74
accel/tcg/cputlb.c | 19 +++++++++++++------
75
4 files changed, 30 insertions(+), 14 deletions(-)
76
77
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/accel/tcg/softmmu_template.h
80
+++ b/accel/tcg/softmmu_template.h
81
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
82
uintptr_t mmu_idx = get_mmuidx(oi);
83
uintptr_t index = tlb_index(env, mmu_idx, addr);
84
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
85
- target_ulong tlb_addr = entry->addr_write;
86
+ target_ulong tlb_addr = tlb_addr_write(entry);
87
unsigned a_bits = get_alignment_bits(get_memop(oi));
88
uintptr_t haddr;
89
90
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
91
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
92
mmu_idx, retaddr);
93
}
94
- tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
95
+ tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
96
}
97
98
/* Handle an IO access. */
99
@@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
100
cannot evict the first. */
101
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
102
entry2 = tlb_entry(env, mmu_idx, page2);
103
- if (!tlb_hit_page(entry2->addr_write, page2)
104
+ if (!tlb_hit_page(tlb_addr_write(entry2), page2)
105
&& !VICTIM_TLB_HIT(addr_write, page2)) {
106
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
107
mmu_idx, retaddr);
108
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
109
uintptr_t mmu_idx = get_mmuidx(oi);
110
uintptr_t index = tlb_index(env, mmu_idx, addr);
111
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
112
- target_ulong tlb_addr = entry->addr_write;
113
+ target_ulong tlb_addr = tlb_addr_write(entry);
114
unsigned a_bits = get_alignment_bits(get_memop(oi));
115
uintptr_t haddr;
116
117
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
118
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
119
mmu_idx, retaddr);
120
}
121
- tlb_addr = entry->addr_write & ~TLB_INVALID_MASK;
122
+ tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
123
}
124
125
/* Handle an IO access. */
126
@@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
127
cannot evict the first. */
128
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
129
entry2 = tlb_entry(env, mmu_idx, page2);
130
- if (!tlb_hit_page(entry2->addr_write, page2)
131
+ if (!tlb_hit_page(tlb_addr_write(entry2), page2)
132
&& !VICTIM_TLB_HIT(addr_write, page2)) {
133
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
134
mmu_idx, retaddr);
135
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
136
index XXXXXXX..XXXXXXX 100644
137
--- a/include/exec/cpu_ldst.h
138
+++ b/include/exec/cpu_ldst.h
139
@@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr;
140
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
141
#include "tcg.h"
142
143
+static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
144
+{
145
+#if TCG_OVERSIZED_GUEST
146
+ return entry->addr_write;
147
+#else
148
+ return atomic_read(&entry->addr_write);
149
+#endif
150
+}
151
+
152
/* Find the TLB index corresponding to the mmu_idx + address pair. */
153
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
154
target_ulong addr)
155
@@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
156
tlb_addr = tlbentry->addr_read;
157
break;
158
case 1:
159
- tlb_addr = tlbentry->addr_write;
160
+ tlb_addr = tlb_addr_write(tlbentry);
161
break;
162
case 2:
163
tlb_addr = tlbentry->addr_code;
164
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
165
index XXXXXXX..XXXXXXX 100644
166
--- a/include/exec/cpu_ldst_template.h
167
+++ b/include/exec/cpu_ldst_template.h
168
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
169
addr = ptr;
170
mmu_idx = CPU_MMU_INDEX;
171
entry = tlb_entry(env, mmu_idx, addr);
172
- if (unlikely(entry->addr_write !=
173
+ if (unlikely(tlb_addr_write(entry) !=
174
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
175
oi = make_memop_idx(SHIFT, mmu_idx);
176
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
177
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
178
index XXXXXXX..XXXXXXX 100644
179
--- a/accel/tcg/cputlb.c
180
+++ b/accel/tcg/cputlb.c
181
@@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
182
target_ulong page)
183
{
184
return tlb_hit_page(tlb_entry->addr_read, page) ||
185
- tlb_hit_page(tlb_entry->addr_write, page) ||
186
+ tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
187
tlb_hit_page(tlb_entry->addr_code, page);
188
}
189
190
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
191
tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
192
193
entry = tlb_entry(env, mmu_idx, addr);
194
- tlb_addr = entry->addr_write;
195
+ tlb_addr = tlb_addr_write(entry);
196
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
197
/* RAM access */
198
uintptr_t haddr = addr + entry->addend;
199
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
200
assert_cpu_is_self(ENV_GET_CPU(env));
201
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
202
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
203
- target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
204
+ target_ulong cmp;
205
+
206
+ /* elt_ofs might correspond to .addr_write, so use atomic_read */
207
+#if TCG_OVERSIZED_GUEST
208
+ cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
209
+#else
210
+ cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
211
+#endif
212
213
if (cmp == page) {
214
/* Found entry in victim tlb, swap tlb and iotlb. */
215
@@ -XXX,XX +XXX,XX @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
216
uintptr_t index = tlb_index(env, mmu_idx, addr);
217
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
218
219
- if (!tlb_hit(entry->addr_write, addr)) {
220
+ if (!tlb_hit(tlb_addr_write(entry), addr)) {
221
/* TLB entry is for a different page */
222
if (!VICTIM_TLB_HIT(addr_write, addr)) {
223
tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
224
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
225
size_t mmu_idx = get_mmuidx(oi);
226
uintptr_t index = tlb_index(env, mmu_idx, addr);
227
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
228
- target_ulong tlb_addr = tlbe->addr_write;
229
+ target_ulong tlb_addr = tlb_addr_write(tlbe);
230
TCGMemOp mop = get_memop(oi);
231
int a_bits = get_alignment_bits(mop);
232
int s_bits = mop & MO_SIZE;
233
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
234
tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
235
mmu_idx, retaddr);
236
}
237
- tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
238
+ tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
239
}
240
241
/* Notice an IO access or a needs-MMU-lookup access */
242
--
243
2.17.2
244
245
diff view generated by jsdifflib