1
The following changes since commit 05de778b5b8ab0b402996769117b88c7ea5c7c61:
1
The following changes since commit aa33508196f4e2da04625bee36e1f7be5b9267e7:
2
2
3
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging (2021-07-09 14:30:01 +0100)
3
Merge tag 'mem-2023-05-23' of https://github.com/davidhildenbrand/qemu into staging (2023-05-23 10:57:25 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210710
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230523
8
8
9
for you to fetch changes up to ad1a706f386c2281adb0b09257d892735e405834:
9
for you to fetch changes up to 30d56836f98c7ed2d309bff1dde8854f3d0b5634:
10
10
11
cpu: Add breakpoint tracepoints (2021-07-09 21:31:11 -0700)
11
tcg: Remove USE_TCG_OPTIMIZATIONS (2023-05-23 16:52:39 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Add translator_use_goto_tb.
14
util: Host cpu detection for x86 and aa64
15
Cleanups in prep of breakpoint fixes.
15
util: Use cpu detection for bufferiszero
16
Misc fixes.
16
migration: Use cpu detection for xbzrle
17
tcg: Replace and remove cpu_atomic_{ld,st}o*
18
host/include: Split qemu/atomic128.h
19
tcg: Remove DEBUG_DISAS
20
tcg: Remove USE_TCG_OPTIMIZATIONS
17
21
18
----------------------------------------------------------------
22
----------------------------------------------------------------
19
Liren Wei (2):
23
Richard Henderson (28):
20
accel/tcg: Hoist tcg_tb_insert() up above tb_link_page()
24
util: Introduce host-specific cpuinfo.h
21
tcg: Bake tb_destroy() into tcg_region_tree
25
util: Add cpuinfo-i386.c
26
util: Add i386 CPUINFO_ATOMIC_VMOVDQU
27
tcg/i386: Use host/cpuinfo.h
28
util/bufferiszero: Use i386 host/cpuinfo.h
29
migration/xbzrle: Shuffle function order
30
migration/xbzrle: Use i386 host/cpuinfo.h
31
migration: Build migration_files once
32
util: Add cpuinfo-aarch64.c
33
include/host: Split out atomic128-cas.h
34
include/host: Split out atomic128-ldst.h
35
meson: Fix detect atomic128 support with optimization
36
include/qemu: Move CONFIG_ATOMIC128_OPT handling to atomic128.h
37
target/ppc: Use tcg_gen_qemu_{ld,st}_i128 for LQARX, LQ, STQ
38
target/s390x: Use tcg_gen_qemu_{ld,st}_i128 for LPQ, STPQ
39
accel/tcg: Unify cpu_{ld,st}*_{be,le}_mmu
40
target/s390x: Use cpu_{ld,st}*_mmu in do_csst
41
target/s390x: Always use cpu_atomic_cmpxchgl_be_mmu in do_csst
42
accel/tcg: Remove cpu_atomic_{ld,st}o_*_mmu
43
accel/tcg: Remove prot argument to atomic_mmu_lookup
44
accel/tcg: Eliminate #if on HAVE_ATOMIC128 and HAVE_CMPXCHG128
45
qemu/atomic128: Split atomic16_read
46
accel/tcg: Correctly use atomic128.h in ldst_atomicity.c.inc
47
tcg: Split out tcg/debug-assert.h
48
qemu/atomic128: Improve cmpxchg fallback for atomic16_set
49
qemu/atomic128: Add runtime test for FEAT_LSE2
50
tcg: Remove DEBUG_DISAS
51
tcg: Remove USE_TCG_OPTIMIZATIONS
22
52
23
Philippe Mathieu-Daudé (1):
53
accel/tcg/atomic_template.h | 93 +-----
24
tcg: Avoid including 'trace-tcg.h' in target translate.c
54
host/include/aarch64/host/atomic128-cas.h | 45 +++
25
55
host/include/aarch64/host/atomic128-ldst.h | 79 +++++
26
Richard Henderson (38):
56
host/include/aarch64/host/cpuinfo.h | 22 ++
27
tcg: Add separator in INDEX_op_call dump
57
host/include/generic/host/atomic128-cas.h | 47 +++
28
tcg: Move tb_phys_invalidate_count to tb_ctx
58
host/include/generic/host/atomic128-ldst.h | 81 +++++
29
accel/tcg: Introduce translator_use_goto_tb
59
host/include/generic/host/cpuinfo.h | 4 +
30
target/alpha: Remove use_exit_tb
60
host/include/i386/host/cpuinfo.h | 39 +++
31
target/alpha: Remove in_superpage
61
host/include/x86_64/host/cpuinfo.h | 1 +
32
target/alpha: Use translator_use_goto_tb
62
include/exec/cpu_ldst.h | 67 +----
33
target/arm: Use DISAS_TOO_MANY for ISB and SB
63
include/exec/exec-all.h | 3 -
34
target/arm: Use translator_use_goto_tb for aarch64
64
include/qemu/atomic128.h | 146 ++-------
35
target/arm: Use translator_use_goto_tb for aarch32
65
include/tcg/debug-assert.h | 17 ++
36
target/avr: Use translator_use_goto_tb
66
include/tcg/tcg.h | 9 +-
37
target/avr: Mark some helpers noreturn
67
migration/xbzrle.h | 5 +-
38
target/cris: Use translator_use_goto_tb
68
target/ppc/cpu.h | 1 -
39
target/hppa: Use translator_use_goto_tb
69
target/ppc/helper.h | 9 -
40
target/i386: Use translator_use_goto_tb
70
target/s390x/cpu.h | 3 -
41
target/m68k: Use translator_use_goto_tb
71
target/s390x/helper.h | 4 -
42
target/microblaze: Use translator_use_goto_tb
72
tcg/aarch64/tcg-target.h | 6 +-
43
target/mips: Use translator_use_goto_tb
73
tcg/i386/tcg-target.h | 28 +-
44
target/mips: Fix missing else in gen_goto_tb
74
accel/tcg/cpu-exec.c | 2 -
45
target/nios2: Use translator_use_goto_tb
75
accel/tcg/cputlb.c | 211 ++++---------
46
target/openrisc: Use translator_use_goto_tb
76
accel/tcg/translate-all.c | 2 -
47
target/ppc: Use translator_use_goto_tb
77
accel/tcg/translator.c | 2 -
48
target/riscv: Use translator_use_goto_tb
78
accel/tcg/user-exec.c | 332 ++++++--------------
49
target/rx: Use translator_use_goto_tb
79
migration/ram.c | 34 +--
50
target/s390x: Use translator_use_goto_tb
80
migration/xbzrle.c | 268 +++++++++--------
51
target/s390x: Remove use_exit_tb
81
target/arm/tcg/m_helper.c | 4 +-
52
target/sh4: Use translator_use_goto_tb
82
target/ppc/mem_helper.c | 48 ---
53
target/sparc: Use translator_use_goto_tb
83
target/ppc/translate.c | 34 +--
54
target/tricore: Use translator_use_goto_tb
84
target/s390x/tcg/mem_helper.c | 137 ++-------
55
target/tricore: Use tcg_gen_lookup_and_goto_ptr
85
target/s390x/tcg/translate.c | 30 +-
56
target/xtensa: Use translator_use_goto_tb
86
target/sh4/translate.c | 2 -
57
tcg: Fix prologue disassembly
87
target/sparc/ldst_helper.c | 18 +-
58
target/i386: Use cpu_breakpoint_test in breakpoint_handler
88
target/sparc/translate.c | 2 -
59
accel/tcg: Move helper_lookup_tb_ptr to cpu-exec.c
89
tcg/tcg.c | 14 +-
60
accel/tcg: Move tb_lookup to cpu-exec.c
90
tests/bench/xbzrle-bench.c | 469 -----------------------------
61
accel/tcg: Split out log_cpu_exec
91
tests/unit/test-xbzrle.c | 49 +--
62
accel/tcg: Log tb->cflags with -d exec
92
util/bufferiszero.c | 127 +++-----
63
tcg: Remove TCG_TARGET_HAS_goto_ptr
93
util/cpuinfo-aarch64.c | 67 +++++
64
cpu: Add breakpoint tracepoints
94
util/cpuinfo-i386.c | 99 ++++++
65
95
MAINTAINERS | 3 +
66
accel/tcg/tb-context.h | 1 +
96
accel/tcg/atomic_common.c.inc | 14 -
67
accel/tcg/tb-lookup.h | 49 ----------------
97
accel/tcg/ldst_atomicity.c.inc | 135 ++-------
68
include/exec/translator.h | 10 ++++
98
accel/tcg/ldst_common.c.inc | 24 +-
69
include/tcg/tcg-opc.h | 3 +-
99
meson.build | 12 +-
70
include/tcg/tcg.h | 4 --
100
migration/meson.build | 1 -
71
target/avr/helper.h | 8 +--
101
target/ppc/translate/fixedpoint-impl.c.inc | 51 +---
72
tcg/aarch64/tcg-target.h | 1 -
102
target/s390x/tcg/insn-data.h.inc | 2 +-
73
tcg/arm/tcg-target.h | 1 -
103
tcg/aarch64/tcg-target.c.inc | 40 ---
74
tcg/i386/tcg-target.h | 1 -
104
tcg/i386/tcg-target.c.inc | 123 +-------
75
tcg/mips/tcg-target.h | 1 -
105
tests/bench/meson.build | 6 -
76
tcg/ppc/tcg-target.h | 1 -
106
util/meson.build | 6 +
77
tcg/riscv/tcg-target.h | 1 -
107
54 files changed, 1035 insertions(+), 2042 deletions(-)
78
tcg/s390/tcg-target.h | 1 -
108
create mode 100644 host/include/aarch64/host/atomic128-cas.h
79
tcg/sparc/tcg-target.h | 1 -
109
create mode 100644 host/include/aarch64/host/atomic128-ldst.h
80
tcg/tci/tcg-target.h | 1 -
110
create mode 100644 host/include/aarch64/host/cpuinfo.h
81
accel/tcg/cpu-exec.c | 112 ++++++++++++++++++++++++++++--------
111
create mode 100644 host/include/generic/host/atomic128-cas.h
82
accel/tcg/tcg-runtime.c | 22 -------
112
create mode 100644 host/include/generic/host/atomic128-ldst.h
83
accel/tcg/translate-all.c | 23 ++++----
113
create mode 100644 host/include/generic/host/cpuinfo.h
84
accel/tcg/translator.c | 11 ++++
114
create mode 100644 host/include/i386/host/cpuinfo.h
85
cpu.c | 13 +++--
115
create mode 100644 host/include/x86_64/host/cpuinfo.h
86
target/alpha/translate.c | 47 ++-------------
116
create mode 100644 include/tcg/debug-assert.h
87
target/arm/translate-a64.c | 26 ++-------
117
delete mode 100644 tests/bench/xbzrle-bench.c
88
target/arm/translate-sve.c | 1 -
118
create mode 100644 util/cpuinfo-aarch64.c
89
target/arm/translate.c | 17 +-----
119
create mode 100644 util/cpuinfo-i386.c
90
target/avr/translate.c | 9 ++-
91
target/cris/translate.c | 6 +-
92
target/hppa/translate.c | 6 +-
93
target/i386/tcg/sysemu/bpt_helper.c | 12 +---
94
target/i386/tcg/translate.c | 15 +----
95
target/m68k/translate.c | 13 +----
96
target/microblaze/translate.c | 12 +---
97
target/mips/tcg/translate.c | 21 ++-----
98
target/nios2/translate.c | 15 +----
99
target/openrisc/translate.c | 16 +++---
100
target/ppc/translate.c | 11 +---
101
target/riscv/translate.c | 20 +------
102
target/rx/translate.c | 12 +---
103
target/s390x/translate.c | 19 +-----
104
target/sh4/translate.c | 12 +---
105
target/sparc/translate.c | 20 ++-----
106
target/tricore/translate.c | 20 ++-----
107
target/xtensa/translate.c | 7 +--
108
tcg/region.c | 33 +++--------
109
tcg/tcg-op.c | 2 +-
110
tcg/tcg.c | 14 ++---
111
trace-events | 5 ++
112
46 files changed, 217 insertions(+), 439 deletions(-)
113
delete mode 100644 accel/tcg/tb-lookup.h
114
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
The entire contents of the header is host-specific, but the
2
existence of such a header is not, which could prevent some
3
host specific ifdefs at the top of the file for the include.
4
5
Add host/include/{arch,generic} to the project arguments.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Juan Quintela <quintela@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
11
---
4
cpu.c | 13 +++++++++----
12
host/include/generic/host/cpuinfo.h | 4 ++++
5
trace-events | 5 +++++
13
meson.build | 10 ++++++++++
6
2 files changed, 14 insertions(+), 4 deletions(-)
14
2 files changed, 14 insertions(+)
15
create mode 100644 host/include/generic/host/cpuinfo.h
7
16
8
diff --git a/cpu.c b/cpu.c
17
diff --git a/host/include/generic/host/cpuinfo.h b/host/include/generic/host/cpuinfo.h
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/host/include/generic/host/cpuinfo.h
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * No host specific cpu indentification.
25
+ * SPDX-License-Identifier: GPL-2.0-or-later
26
+ */
27
diff --git a/meson.build b/meson.build
9
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
10
--- a/cpu.c
29
--- a/meson.build
11
+++ b/cpu.c
30
+++ b/meson.build
12
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ add_project_arguments('-iquote', '.',
13
#include "exec/translate-all.h"
32
'-iquote', meson.current_source_dir() / 'include',
14
#include "exec/log.h"
33
language: all_languages)
15
#include "hw/core/accel-cpu.h"
34
16
+#include "trace/trace-root.h"
35
+# If a host-specific include directory exists, list that first...
17
36
+host_include = meson.current_source_dir() / 'host/include/'
18
uintptr_t qemu_host_page_size;
37
+if fs.is_dir(host_include / host_arch)
19
intptr_t qemu_host_page_mask;
38
+ add_project_arguments('-iquote', host_include / host_arch,
20
@@ -XXX,XX +XXX,XX @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
39
+ language: all_languages)
21
if (breakpoint) {
40
+endif
22
*breakpoint = bp;
41
+# ... followed by the generic fallback.
23
}
42
+add_project_arguments('-iquote', host_include / 'generic',
43
+ language: all_languages)
24
+
44
+
25
+ trace_breakpoint_insert(cpu->cpu_index, pc, flags);
45
sparse = find_program('cgcc', required: get_option('sparse'))
26
return 0;
46
if sparse.found()
27
}
47
run_target('sparse',
28
29
@@ -XXX,XX +XXX,XX @@ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
30
}
31
32
/* Remove a specific breakpoint by reference. */
33
-void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
34
+void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *bp)
35
{
36
- QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
37
+ QTAILQ_REMOVE(&cpu->breakpoints, bp, entry);
38
39
- breakpoint_invalidate(cpu, breakpoint->pc);
40
+ breakpoint_invalidate(cpu, bp->pc);
41
42
- g_free(breakpoint);
43
+ trace_breakpoint_remove(cpu->cpu_index, bp->pc, bp->flags);
44
+ g_free(bp);
45
}
46
47
/* Remove all matching breakpoints. */
48
@@ -XXX,XX +XXX,XX @@ void cpu_single_step(CPUState *cpu, int enabled)
49
/* XXX: only flush what is necessary */
50
tb_flush(cpu);
51
}
52
+ trace_breakpoint_singlestep(cpu->cpu_index, enabled);
53
}
54
}
55
56
diff --git a/trace-events b/trace-events
57
index XXXXXXX..XXXXXXX 100644
58
--- a/trace-events
59
+++ b/trace-events
60
@@ -XXX,XX +XXX,XX @@
61
#
62
# The <format-string> should be a sprintf()-compatible format string.
63
64
+# cpu.c
65
+breakpoint_insert(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
66
+breakpoint_remove(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
67
+breakpoint_singlestep(int cpu_index, int enabled) "cpu=%d enable=%d"
68
+
69
# dma-helpers.c
70
dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
71
dma_aio_cancel(void *dbs) "dbs=%p"
72
--
48
--
73
2.25.1
49
2.34.1
74
50
75
51
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Add cpuinfo.h for i386 and x86_64, and the initialization
2
for that in util/. Populate that with a slightly altered
3
copy of the tcg host probing code. Other uses of cpuid.h
4
will be adjusted one patch at a time.
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Juan Quintela <quintela@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
9
---
4
accel/tcg/cpu-exec.c | 6 +++---
10
host/include/i386/host/cpuinfo.h | 38 ++++++++++++
5
1 file changed, 3 insertions(+), 3 deletions(-)
11
host/include/x86_64/host/cpuinfo.h | 1 +
6
12
util/cpuinfo-i386.c | 97 ++++++++++++++++++++++++++++++
7
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
13
MAINTAINERS | 2 +
14
util/meson.build | 4 ++
15
5 files changed, 142 insertions(+)
16
create mode 100644 host/include/i386/host/cpuinfo.h
17
create mode 100644 host/include/x86_64/host/cpuinfo.h
18
create mode 100644 util/cpuinfo-i386.c
19
20
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
21
new file mode 100644
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/host/include/i386/host/cpuinfo.h
25
@@ -XXX,XX +XXX,XX @@
26
+/*
27
+ * SPDX-License-Identifier: GPL-2.0-or-later
28
+ * Host specific cpu indentification for x86.
29
+ */
30
+
31
+#ifndef HOST_CPUINFO_H
32
+#define HOST_CPUINFO_H
33
+
34
+/* Digested version of <cpuid.h> */
35
+
36
+#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
37
+#define CPUINFO_CMOV (1u << 1)
38
+#define CPUINFO_MOVBE (1u << 2)
39
+#define CPUINFO_LZCNT (1u << 3)
40
+#define CPUINFO_POPCNT (1u << 4)
41
+#define CPUINFO_BMI1 (1u << 5)
42
+#define CPUINFO_BMI2 (1u << 6)
43
+#define CPUINFO_SSE2 (1u << 7)
44
+#define CPUINFO_SSE4 (1u << 8)
45
+#define CPUINFO_AVX1 (1u << 9)
46
+#define CPUINFO_AVX2 (1u << 10)
47
+#define CPUINFO_AVX512F (1u << 11)
48
+#define CPUINFO_AVX512VL (1u << 12)
49
+#define CPUINFO_AVX512BW (1u << 13)
50
+#define CPUINFO_AVX512DQ (1u << 14)
51
+#define CPUINFO_AVX512VBMI2 (1u << 15)
52
+#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
53
+
54
+/* Initialized with a constructor. */
55
+extern unsigned cpuinfo;
56
+
57
+/*
58
+ * We cannot rely on constructor ordering, so other constructors must
59
+ * use the function interface rather than the variable above.
60
+ */
61
+unsigned cpuinfo_init(void);
62
+
63
+#endif /* HOST_CPUINFO_H */
64
diff --git a/host/include/x86_64/host/cpuinfo.h b/host/include/x86_64/host/cpuinfo.h
65
new file mode 100644
66
index XXXXXXX..XXXXXXX
67
--- /dev/null
68
+++ b/host/include/x86_64/host/cpuinfo.h
69
@@ -0,0 +1 @@
70
+#include "host/include/i386/host/cpuinfo.h"
71
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
72
new file mode 100644
73
index XXXXXXX..XXXXXXX
74
--- /dev/null
75
+++ b/util/cpuinfo-i386.c
76
@@ -XXX,XX +XXX,XX @@
77
+/*
78
+ * SPDX-License-Identifier: GPL-2.0-or-later
79
+ * Host specific cpu indentification for x86.
80
+ */
81
+
82
+#include "qemu/osdep.h"
83
+#include "host/cpuinfo.h"
84
+#ifdef CONFIG_CPUID_H
85
+# include "qemu/cpuid.h"
86
+#endif
87
+
88
+unsigned cpuinfo;
89
+
90
+/* Called both as constructor and (possibly) via other constructors. */
91
+unsigned __attribute__((constructor)) cpuinfo_init(void)
92
+{
93
+ unsigned info = cpuinfo;
94
+
95
+ if (info) {
96
+ return info;
97
+ }
98
+
99
+#ifdef CONFIG_CPUID_H
100
+ unsigned max, a, b, c, d, b7 = 0, c7 = 0;
101
+
102
+ max = __get_cpuid_max(0, 0);
103
+
104
+ if (max >= 7) {
105
+ __cpuid_count(7, 0, a, b7, c7, d);
106
+ info |= (b7 & bit_BMI ? CPUINFO_BMI1 : 0);
107
+ info |= (b7 & bit_BMI2 ? CPUINFO_BMI2 : 0);
108
+ }
109
+
110
+ if (max >= 1) {
111
+ __cpuid(1, a, b, c, d);
112
+
113
+ info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
114
+ info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
115
+ info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
116
+ info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
117
+ info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
118
+
119
+ /* For AVX features, we must check available and usable. */
120
+ if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
121
+ unsigned bv = xgetbv_low(0);
122
+
123
+ if ((bv & 6) == 6) {
124
+ info |= CPUINFO_AVX1;
125
+ info |= (b7 & bit_AVX2 ? CPUINFO_AVX2 : 0);
126
+
127
+ if ((bv & 0xe0) == 0xe0) {
128
+ info |= (b7 & bit_AVX512F ? CPUINFO_AVX512F : 0);
129
+ info |= (b7 & bit_AVX512VL ? CPUINFO_AVX512VL : 0);
130
+ info |= (b7 & bit_AVX512BW ? CPUINFO_AVX512BW : 0);
131
+ info |= (b7 & bit_AVX512DQ ? CPUINFO_AVX512DQ : 0);
132
+ info |= (c7 & bit_AVX512VBMI2 ? CPUINFO_AVX512VBMI2 : 0);
133
+ }
134
+
135
+ /*
136
+ * The Intel SDM has added:
137
+ * Processors that enumerate support for Intel® AVX
138
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
139
+ * guarantee that the 16-byte memory operations performed
140
+ * by the following instructions will always be carried
141
+ * out atomically:
142
+ * - MOVAPD, MOVAPS, and MOVDQA.
143
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
144
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
145
+ * with EVEX.128 and k0 (masking disabled).
146
+ * Note that these instructions require the linear addresses
147
+ * of their memory operands to be 16-byte aligned.
148
+ *
149
+ * AMD has provided an even stronger guarantee that processors
150
+ * with AVX provide 16-byte atomicity for all cachable,
151
+ * naturally aligned single loads and stores, e.g. MOVDQU.
152
+ *
153
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
154
+ */
155
+ __cpuid(0, a, b, c, d);
156
+ if (c == signature_INTEL_ecx || c == signature_AMD_ecx) {
157
+ info |= CPUINFO_ATOMIC_VMOVDQA;
158
+ }
159
+ }
160
+ }
161
+ }
162
+
163
+ max = __get_cpuid_max(0x8000000, 0);
164
+ if (max >= 1) {
165
+ __cpuid(0x80000001, a, b, c, d);
166
+ info |= (c & bit_LZCNT ? CPUINFO_LZCNT : 0);
167
+ }
168
+#endif
169
+
170
+ info |= CPUINFO_ALWAYS;
171
+ cpuinfo = info;
172
+ return info;
173
+}
174
diff --git a/MAINTAINERS b/MAINTAINERS
8
index XXXXXXX..XXXXXXX 100644
175
index XXXXXXX..XXXXXXX 100644
9
--- a/accel/tcg/cpu-exec.c
176
--- a/MAINTAINERS
10
+++ b/accel/tcg/cpu-exec.c
177
+++ b/MAINTAINERS
11
@@ -XXX,XX +XXX,XX @@ static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
178
@@ -XXX,XX +XXX,XX @@ F: include/exec/helper*.h
12
179
F: include/sysemu/cpus.h
13
qemu_log_mask(CPU_LOG_EXEC,
180
F: include/sysemu/tcg.h
14
"Trace %d: %p [" TARGET_FMT_lx
181
F: include/hw/core/tcg-cpu-ops.h
15
- "/" TARGET_FMT_lx "/%#x] %s\n",
182
+F: host/include/*/host/cpuinfo.h
16
- cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, tb->flags,
183
+F: util/cpuinfo-*.c
17
- lookup_symbol(pc));
184
18
+ "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
185
FPU emulation
19
+ cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
186
M: Aurelien Jarno <aurelien@aurel32.net>
20
+ tb->flags, tb->cflags, lookup_symbol(pc));
187
diff --git a/util/meson.build b/util/meson.build
21
188
index XXXXXXX..XXXXXXX 100644
22
#if defined(DEBUG_DISAS)
189
--- a/util/meson.build
23
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
190
+++ b/util/meson.build
191
@@ -XXX,XX +XXX,XX @@ if have_block
192
endif
193
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
194
endif
195
+
196
+if cpu in ['x86', 'x86_64']
197
+ util_ss.add(files('cpuinfo-i386.c'))
198
+endif
24
--
199
--
25
2.25.1
200
2.34.1
26
201
27
202
diff view generated by jsdifflib
1
We have not needed to end a TB for I/O since ba3e7926691
1
Add a bit to indicate when VMOVDQU is also atomic if aligned.
2
("icount: clean up cpu_can_io at the entry to the block").
3
We do not need to use exit_tb for singlestep, which only
4
means generate one insn per TB.
5
2
6
Which leaves only singlestep_enabled, which means raise a
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
debug trap after every TB, which does not use exit_tb,
8
which would leave the function mis-named.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
target/alpha/translate.c | 15 ++-------------
7
host/include/i386/host/cpuinfo.h | 1 +
14
1 file changed, 2 insertions(+), 13 deletions(-)
8
util/cpuinfo-i386.c | 4 +++-
9
2 files changed, 4 insertions(+), 1 deletion(-)
15
10
16
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
11
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
17
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
18
--- a/target/alpha/translate.c
13
--- a/host/include/i386/host/cpuinfo.h
19
+++ b/target/alpha/translate.c
14
+++ b/host/include/i386/host/cpuinfo.h
20
@@ -XXX,XX +XXX,XX @@ static bool in_superpage(DisasContext *ctx, int64_t addr)
15
@@ -XXX,XX +XXX,XX @@
21
#endif
16
#define CPUINFO_AVX512DQ (1u << 14)
22
}
17
#define CPUINFO_AVX512VBMI2 (1u << 15)
23
18
#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
24
-static bool use_exit_tb(DisasContext *ctx)
19
+#define CPUINFO_ATOMIC_VMOVDQU (1u << 17)
25
-{
20
26
- return ((tb_cflags(ctx->base.tb) & CF_LAST_IO)
21
/* Initialized with a constructor. */
27
- || ctx->base.singlestep_enabled
22
extern unsigned cpuinfo;
28
- || singlestep);
23
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
29
-}
24
index XXXXXXX..XXXXXXX 100644
30
-
25
--- a/util/cpuinfo-i386.c
31
static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
26
+++ b/util/cpuinfo-i386.c
32
{
27
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
33
- /* Suppress goto_tb in the case of single-steping and IO. */
28
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
34
- if (unlikely(use_exit_tb(ctx))) {
29
*/
35
- return false;
30
__cpuid(0, a, b, c, d);
36
- }
31
- if (c == signature_INTEL_ecx || c == signature_AMD_ecx) {
37
#ifndef CONFIG_USER_ONLY
32
+ if (c == signature_INTEL_ecx) {
38
/* If the destination is in the superpage, the page perms can't change. */
33
info |= CPUINFO_ATOMIC_VMOVDQA;
39
if (in_superpage(ctx, dest)) {
34
+ } else if (c == signature_AMD_ecx) {
40
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode)
35
+ info |= CPUINFO_ATOMIC_VMOVDQA | CPUINFO_ATOMIC_VMOVDQU;
41
need the page permissions check. We'll see the existence of
36
}
42
the page when we create the TB, and we'll flush all TBs if
37
}
43
we change the PAL base register. */
44
- if (!use_exit_tb(ctx)) {
45
+ if (!ctx->base.singlestep_enabled) {
46
tcg_gen_goto_tb(0);
47
tcg_gen_movi_i64(cpu_pc, entry);
48
tcg_gen_exit_tb(ctx->base.tb, 0);
49
@@ -XXX,XX +XXX,XX @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
50
tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
51
/* FALLTHRU */
52
case DISAS_PC_UPDATED:
53
- if (!use_exit_tb(ctx)) {
54
+ if (!ctx->base.singlestep_enabled) {
55
tcg_gen_lookup_and_goto_ptr();
56
break;
57
}
38
}
58
--
39
--
59
2.25.1
40
2.34.1
60
41
61
42
diff view generated by jsdifflib
1
Since 6eea04347eb6, all tcg backends support goto_ptr.
1
Use the CPUINFO_* bits instead of the individual boolean
2
Remove the conditional, making support mandatory.
2
variables that we had been using. Remove all of the init
3
3
code that was moved over to cpuinfo-i386.c.
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
5
Note that have_avx512* check both AVX512{F,VL}, as we had
6
previously done during tcg_target_init.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
include/tcg/tcg-opc.h | 3 +--
12
tcg/i386/tcg-target.h | 28 +++++----
8
tcg/aarch64/tcg-target.h | 1 -
13
tcg/i386/tcg-target.c.inc | 123 ++------------------------------------
9
tcg/arm/tcg-target.h | 1 -
14
2 files changed, 22 insertions(+), 129 deletions(-)
10
tcg/i386/tcg-target.h | 1 -
15
11
tcg/mips/tcg-target.h | 1 -
12
tcg/ppc/tcg-target.h | 1 -
13
tcg/riscv/tcg-target.h | 1 -
14
tcg/s390/tcg-target.h | 1 -
15
tcg/sparc/tcg-target.h | 1 -
16
tcg/tci/tcg-target.h | 1 -
17
tcg/tcg-op.c | 2 +-
18
tcg/tcg.c | 8 ++------
19
12 files changed, 4 insertions(+), 18 deletions(-)
20
21
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/tcg/tcg-opc.h
24
+++ b/include/tcg/tcg-opc.h
25
@@ -XXX,XX +XXX,XX @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
26
TCG_OPF_NOT_PRESENT)
27
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
28
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
29
-DEF(goto_ptr, 0, 1, 0,
30
- TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
31
+DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
32
33
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
34
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
35
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/aarch64/tcg-target.h
38
+++ b/tcg/aarch64/tcg-target.h
39
@@ -XXX,XX +XXX,XX @@ typedef enum {
40
#define TCG_TARGET_HAS_mulsh_i32 0
41
#define TCG_TARGET_HAS_extrl_i64_i32 0
42
#define TCG_TARGET_HAS_extrh_i64_i32 0
43
-#define TCG_TARGET_HAS_goto_ptr 1
44
#define TCG_TARGET_HAS_qemu_st8_i32 0
45
46
#define TCG_TARGET_HAS_div_i64 1
47
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tcg/arm/tcg-target.h
50
+++ b/tcg/arm/tcg-target.h
51
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
52
#define TCG_TARGET_HAS_mulsh_i32 0
53
#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
54
#define TCG_TARGET_HAS_rem_i32 0
55
-#define TCG_TARGET_HAS_goto_ptr 1
56
#define TCG_TARGET_HAS_direct_jump 0
57
#define TCG_TARGET_HAS_qemu_st8_i32 0
58
59
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
16
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
60
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/i386/tcg-target.h
18
--- a/tcg/i386/tcg-target.h
62
+++ b/tcg/i386/tcg-target.h
19
+++ b/tcg/i386/tcg-target.h
63
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
20
@@ -XXX,XX +XXX,XX @@
64
#define TCG_TARGET_HAS_muls2_i32 1
21
#ifndef I386_TCG_TARGET_H
65
#define TCG_TARGET_HAS_muluh_i32 0
22
#define I386_TCG_TARGET_H
66
#define TCG_TARGET_HAS_mulsh_i32 0
23
67
-#define TCG_TARGET_HAS_goto_ptr 1
24
+#include "host/cpuinfo.h"
68
#define TCG_TARGET_HAS_direct_jump 1
25
+
69
26
#define TCG_TARGET_INSN_UNIT_SIZE 1
27
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
28
29
@@ -XXX,XX +XXX,XX @@ typedef enum {
30
# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
31
#endif
32
33
-extern bool have_bmi1;
34
-extern bool have_popcnt;
35
-extern bool have_avx1;
36
-extern bool have_avx2;
37
-extern bool have_avx512bw;
38
-extern bool have_avx512dq;
39
-extern bool have_avx512vbmi2;
40
-extern bool have_avx512vl;
41
-extern bool have_movbe;
42
-extern bool have_atomic16;
43
+#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
44
+#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
45
+#define have_avx1 (cpuinfo & CPUINFO_AVX1)
46
+#define have_avx2 (cpuinfo & CPUINFO_AVX2)
47
+#define have_movbe (cpuinfo & CPUINFO_MOVBE)
48
+#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
49
+
50
+/*
51
+ * There are interesting instructions in AVX512, so long as we have AVX512VL,
52
+ * which indicates support for EVEX on sizes smaller than 512 bits.
53
+ */
54
+#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && \
55
+ (cpuinfo & CPUINFO_AVX512F))
56
+#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
57
+#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
58
+#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
59
60
/* optional instructions */
61
#define TCG_TARGET_HAS_div2_i32 1
62
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tcg/i386/tcg-target.c.inc
65
+++ b/tcg/i386/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
67
# define SOFTMMU_RESERVE_REGS 0
68
#endif
69
70
-/* The host compiler should supply <cpuid.h> to enable runtime features
71
- detection, as we're not going to go so far as our own inline assembly.
72
- If not available, default values will be assumed. */
73
-#if defined(CONFIG_CPUID_H)
74
-#include "qemu/cpuid.h"
75
-#endif
76
-
77
/* For 64-bit, we always know that CMOV is available. */
70
#if TCG_TARGET_REG_BITS == 64
78
#if TCG_TARGET_REG_BITS == 64
71
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
79
-# define have_cmov 1
72
index XXXXXXX..XXXXXXX 100644
80
-#elif defined(CONFIG_CPUID_H)
73
--- a/tcg/mips/tcg-target.h
81
-static bool have_cmov;
74
+++ b/tcg/mips/tcg-target.h
82
+# define have_cmov true
75
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
83
#else
76
#define TCG_TARGET_HAS_muluh_i32 1
84
-# define have_cmov 0
77
#define TCG_TARGET_HAS_mulsh_i32 1
85
-#endif
78
#define TCG_TARGET_HAS_bswap32_i32 1
86
-
79
-#define TCG_TARGET_HAS_goto_ptr 1
87
-/* We need these symbols in tcg-target.h, and we can't properly conditionalize
80
#define TCG_TARGET_HAS_direct_jump 1
88
- it there. Therefore we always define the variable. */
81
89
-bool have_bmi1;
82
#if TCG_TARGET_REG_BITS == 64
90
-bool have_popcnt;
83
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
91
-bool have_avx1;
84
index XXXXXXX..XXXXXXX 100644
92
-bool have_avx2;
85
--- a/tcg/ppc/tcg-target.h
93
-bool have_avx512bw;
86
+++ b/tcg/ppc/tcg-target.h
94
-bool have_avx512dq;
87
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
95
-bool have_avx512vbmi2;
88
#define TCG_TARGET_HAS_muls2_i32 0
96
-bool have_avx512vl;
89
#define TCG_TARGET_HAS_muluh_i32 1
97
-bool have_movbe;
90
#define TCG_TARGET_HAS_mulsh_i32 1
98
-bool have_atomic16;
91
-#define TCG_TARGET_HAS_goto_ptr 1
99
-
92
#define TCG_TARGET_HAS_direct_jump 1
100
-#ifdef CONFIG_CPUID_H
93
#define TCG_TARGET_HAS_qemu_st8_i32 0
101
-static bool have_bmi2;
94
102
-static bool have_lzcnt;
95
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
103
-#else
96
index XXXXXXX..XXXXXXX 100644
104
-# define have_bmi2 0
97
--- a/tcg/riscv/tcg-target.h
105
-# define have_lzcnt 0
98
+++ b/tcg/riscv/tcg-target.h
106
+# define have_cmov (cpuinfo & CPUINFO_CMOV)
99
@@ -XXX,XX +XXX,XX @@ typedef enum {
107
#endif
100
#define TCG_TARGET_CALL_STACK_OFFSET 0
108
+#define have_bmi2 (cpuinfo & CPUINFO_BMI2)
101
109
+#define have_lzcnt (cpuinfo & CPUINFO_LZCNT)
102
/* optional instructions */
110
103
-#define TCG_TARGET_HAS_goto_ptr 1
111
static const tcg_insn_unit *tb_ret_addr;
104
#define TCG_TARGET_HAS_movcond_i32 0
112
105
#define TCG_TARGET_HAS_div_i32 1
113
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
106
#define TCG_TARGET_HAS_rem_i32 1
114
107
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
115
static void tcg_target_init(TCGContext *s)
108
index XXXXXXX..XXXXXXX 100644
109
--- a/tcg/s390/tcg-target.h
110
+++ b/tcg/s390/tcg-target.h
111
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities;
112
#define TCG_TARGET_HAS_mulsh_i32 0
113
#define TCG_TARGET_HAS_extrl_i64_i32 0
114
#define TCG_TARGET_HAS_extrh_i64_i32 0
115
-#define TCG_TARGET_HAS_goto_ptr 1
116
#define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT)
117
#define TCG_TARGET_HAS_qemu_st8_i32 0
118
119
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/sparc/tcg-target.h
122
+++ b/tcg/sparc/tcg-target.h
123
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
124
#define TCG_TARGET_HAS_muls2_i32 1
125
#define TCG_TARGET_HAS_muluh_i32 0
126
#define TCG_TARGET_HAS_mulsh_i32 0
127
-#define TCG_TARGET_HAS_goto_ptr 1
128
#define TCG_TARGET_HAS_direct_jump 1
129
#define TCG_TARGET_HAS_qemu_st8_i32 0
130
131
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/tci/tcg-target.h
134
+++ b/tcg/tci/tcg-target.h
135
@@ -XXX,XX +XXX,XX @@
136
#define TCG_TARGET_HAS_muls2_i32 1
137
#define TCG_TARGET_HAS_muluh_i32 0
138
#define TCG_TARGET_HAS_mulsh_i32 0
139
-#define TCG_TARGET_HAS_goto_ptr 1
140
#define TCG_TARGET_HAS_direct_jump 0
141
#define TCG_TARGET_HAS_qemu_st8_i32 0
142
143
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/tcg/tcg-op.c
146
+++ b/tcg/tcg-op.c
147
@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
148
149
void tcg_gen_lookup_and_goto_ptr(void)
150
{
116
{
151
- if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
117
-#ifdef CONFIG_CPUID_H
152
+ if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
118
- unsigned a, b, c, d, b7 = 0, c7 = 0;
153
TCGv_ptr ptr;
119
- unsigned max = __get_cpuid_max(0, 0);
154
120
-
155
plugin_gen_disable_mem_helpers();
121
- if (max >= 7) {
156
diff --git a/tcg/tcg.c b/tcg/tcg.c
122
- /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
157
index XXXXXXX..XXXXXXX 100644
123
- __cpuid_count(7, 0, a, b7, c7, d);
158
--- a/tcg/tcg.c
124
- have_bmi1 = (b7 & bit_BMI) != 0;
159
+++ b/tcg/tcg.c
125
- have_bmi2 = (b7 & bit_BMI2) != 0;
160
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
161
* For tci, we use NULL as the signal to return from the interpreter,
162
* so skip this check.
163
*/
164
- if (TCG_TARGET_HAS_goto_ptr) {
165
- tcg_debug_assert(tcg_code_gen_epilogue != NULL);
166
- }
126
- }
167
+ tcg_debug_assert(tcg_code_gen_epilogue != NULL);
127
-
168
#endif
128
- if (max >= 1) {
169
129
- __cpuid(1, a, b, c, d);
170
tcg_region_prologue_set(s);
130
-#ifndef have_cmov
171
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
131
- /* For 32-bit, 99% certainty that we're running on hardware that
172
case INDEX_op_insn_start:
132
- supports cmov, but we still need to check. In case cmov is not
173
case INDEX_op_exit_tb:
133
- available, we'll use a small forward branch. */
174
case INDEX_op_goto_tb:
134
- have_cmov = (d & bit_CMOV) != 0;
175
+ case INDEX_op_goto_ptr:
135
-#endif
176
case INDEX_op_qemu_ld_i32:
136
-
177
case INDEX_op_qemu_st_i32:
137
- /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
178
case INDEX_op_qemu_ld_i64:
138
- need to probe for it. */
179
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
139
- have_movbe = (c & bit_MOVBE) != 0;
180
case INDEX_op_qemu_st8_i32:
140
- have_popcnt = (c & bit_POPCNT) != 0;
181
return TCG_TARGET_HAS_qemu_st8_i32;
141
-
182
142
- /* There are a number of things we must check before we can be
183
- case INDEX_op_goto_ptr:
143
- sure of not hitting invalid opcode. */
184
- return TCG_TARGET_HAS_goto_ptr;
144
- if (c & bit_OSXSAVE) {
185
-
145
- unsigned bv = xgetbv_low(0);
186
case INDEX_op_mov_i32:
146
-
187
case INDEX_op_setcond_i32:
147
- if ((bv & 6) == 6) {
188
case INDEX_op_brcond_i32:
148
- have_avx1 = (c & bit_AVX) != 0;
149
- have_avx2 = (b7 & bit_AVX2) != 0;
150
-
151
- /*
152
- * There are interesting instructions in AVX512, so long
153
- * as we have AVX512VL, which indicates support for EVEX
154
- * on sizes smaller than 512 bits. We are required to
155
- * check that OPMASK and all extended ZMM state are enabled
156
- * even if we're not using them -- the insns will fault.
157
- */
158
- if ((bv & 0xe0) == 0xe0
159
- && (b7 & bit_AVX512F)
160
- && (b7 & bit_AVX512VL)) {
161
- have_avx512vl = true;
162
- have_avx512bw = (b7 & bit_AVX512BW) != 0;
163
- have_avx512dq = (b7 & bit_AVX512DQ) != 0;
164
- have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
165
- }
166
-
167
- /*
168
- * The Intel SDM has added:
169
- * Processors that enumerate support for Intel® AVX
170
- * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
171
- * guarantee that the 16-byte memory operations performed
172
- * by the following instructions will always be carried
173
- * out atomically:
174
- * - MOVAPD, MOVAPS, and MOVDQA.
175
- * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
176
- * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
177
- * with EVEX.128 and k0 (masking disabled).
178
- * Note that these instructions require the linear addresses
179
- * of their memory operands to be 16-byte aligned.
180
- *
181
- * AMD has provided an even stronger guarantee that processors
182
- * with AVX provide 16-byte atomicity for all cachable,
183
- * naturally aligned single loads and stores, e.g. MOVDQU.
184
- *
185
- * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
186
- */
187
- if (have_avx1) {
188
- __cpuid(0, a, b, c, d);
189
- have_atomic16 = (c == signature_INTEL_ecx ||
190
- c == signature_AMD_ecx);
191
- }
192
- }
193
- }
194
- }
195
-
196
- max = __get_cpuid_max(0x8000000, 0);
197
- if (max >= 1) {
198
- __cpuid(0x80000001, a, b, c, d);
199
- /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
200
- have_lzcnt = (c & bit_LZCNT) != 0;
201
- }
202
-#endif /* CONFIG_CPUID_H */
203
-
204
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
205
if (TCG_TARGET_REG_BITS == 64) {
206
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
189
--
207
--
190
2.25.1
208
2.34.1
191
209
192
210
diff view generated by jsdifflib
1
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
1
Use cpuinfo_init() during init_accel(), and the variable cpuinfo
2
during test_buffer_is_zero_next_accel(). Adjust the logic that
3
cycles through the set of accelerators for testing.
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
7
---
4
target/sparc/translate.c | 19 +++++--------------
8
util/bufferiszero.c | 127 ++++++++++++++++----------------------------
5
1 file changed, 5 insertions(+), 14 deletions(-)
9
1 file changed, 46 insertions(+), 81 deletions(-)
6
10
7
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
11
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
8
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
9
--- a/target/sparc/translate.c
13
--- a/util/bufferiszero.c
10
+++ b/target/sparc/translate.c
14
+++ b/util/bufferiszero.c
11
@@ -XXX,XX +XXX,XX @@ static inline TCGv gen_dest_gpr(DisasContext *dc, int reg)
15
@@ -XXX,XX +XXX,XX @@
12
}
16
#include "qemu/osdep.h"
17
#include "qemu/cutils.h"
18
#include "qemu/bswap.h"
19
+#include "host/cpuinfo.h"
20
21
static bool
22
buffer_zero_int(const void *buf, size_t len)
23
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx512(const void *buf, size_t len)
13
}
24
}
14
25
#endif /* CONFIG_AVX512F_OPT */
15
-static inline bool use_goto_tb(DisasContext *s, target_ulong pc,
26
16
- target_ulong npc)
27
-
17
+static bool use_goto_tb(DisasContext *s, target_ulong pc, target_ulong npc)
28
-/* Note that for test_buffer_is_zero_next_accel, the most preferred
29
- * ISA must have the least significant bit.
30
- */
31
-#define CACHE_AVX512F 1
32
-#define CACHE_AVX2 2
33
-#define CACHE_SSE4 4
34
-#define CACHE_SSE2 8
35
-
36
-/* Make sure that these variables are appropriately initialized when
37
+/*
38
+ * Make sure that these variables are appropriately initialized when
39
* SSE2 is enabled on the compiler command-line, but the compiler is
40
* too old to support CONFIG_AVX2_OPT.
41
*/
42
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
43
-# define INIT_CACHE 0
44
-# define INIT_ACCEL buffer_zero_int
45
+# define INIT_USED 0
46
+# define INIT_LENGTH 0
47
+# define INIT_ACCEL buffer_zero_int
48
#else
49
# ifndef __SSE2__
50
# error "ISA selection confusion"
51
# endif
52
-# define INIT_CACHE CACHE_SSE2
53
-# define INIT_ACCEL buffer_zero_sse2
54
+# define INIT_USED CPUINFO_SSE2
55
+# define INIT_LENGTH 64
56
+# define INIT_ACCEL buffer_zero_sse2
57
#endif
58
59
-static unsigned cpuid_cache = INIT_CACHE;
60
+static unsigned used_accel = INIT_USED;
61
+static unsigned length_to_accel = INIT_LENGTH;
62
static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
63
-static int length_to_accel = 64;
64
65
-static void init_accel(unsigned cache)
66
+static unsigned __attribute__((noinline))
67
+select_accel_cpuinfo(unsigned info)
18
{
68
{
19
- if (unlikely(s->base.singlestep_enabled || singlestep)) {
69
- bool (*fn)(const void *, size_t) = buffer_zero_int;
70
- if (cache & CACHE_SSE2) {
71
- fn = buffer_zero_sse2;
72
- length_to_accel = 64;
73
- }
74
-#ifdef CONFIG_AVX2_OPT
75
- if (cache & CACHE_SSE4) {
76
- fn = buffer_zero_sse4;
77
- length_to_accel = 64;
78
- }
79
- if (cache & CACHE_AVX2) {
80
- fn = buffer_zero_avx2;
81
- length_to_accel = 128;
82
- }
83
-#endif
84
+ /* Array is sorted in order of algorithm preference. */
85
+ static const struct {
86
+ unsigned bit;
87
+ unsigned len;
88
+ bool (*fn)(const void *, size_t);
89
+ } all[] = {
90
#ifdef CONFIG_AVX512F_OPT
91
- if (cache & CACHE_AVX512F) {
92
- fn = buffer_zero_avx512;
93
- length_to_accel = 256;
94
- }
95
+ { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
96
#endif
97
- buffer_accel = fn;
98
+#ifdef CONFIG_AVX2_OPT
99
+ { CPUINFO_AVX2, 128, buffer_zero_avx2 },
100
+ { CPUINFO_SSE4, 64, buffer_zero_sse4 },
101
+#endif
102
+ { CPUINFO_SSE2, 64, buffer_zero_sse2 },
103
+ { CPUINFO_ALWAYS, 0, buffer_zero_int },
104
+ };
105
+
106
+ for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
107
+ if (info & all[i].bit) {
108
+ length_to_accel = all[i].len;
109
+ buffer_accel = all[i].fn;
110
+ return all[i].bit;
111
+ }
112
+ }
113
+ return 0;
114
}
115
116
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
117
-#include "qemu/cpuid.h"
118
-
119
-static void __attribute__((constructor)) init_cpuid_cache(void)
120
+static void __attribute__((constructor)) init_accel(void)
121
{
122
- unsigned max = __get_cpuid_max(0, NULL);
123
- int a, b, c, d;
124
- unsigned cache = 0;
125
-
126
- if (max >= 1) {
127
- __cpuid(1, a, b, c, d);
128
- if (d & bit_SSE2) {
129
- cache |= CACHE_SSE2;
130
- }
131
- if (c & bit_SSE4_1) {
132
- cache |= CACHE_SSE4;
133
- }
134
-
135
- /* We must check that AVX is not just available, but usable. */
136
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
137
- unsigned bv = xgetbv_low(0);
138
- __cpuid_count(7, 0, a, b, c, d);
139
- if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
140
- cache |= CACHE_AVX2;
141
- }
142
- /* 0xe6:
143
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
144
- * and ZMM16-ZMM31 state are enabled by OS)
145
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
146
- */
147
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
148
- cache |= CACHE_AVX512F;
149
- }
150
- }
151
- }
152
- cpuid_cache = cache;
153
- init_accel(cache);
154
+ used_accel = select_accel_cpuinfo(cpuinfo_init());
155
}
156
#endif /* CONFIG_AVX2_OPT */
157
158
bool test_buffer_is_zero_next_accel(void)
159
{
160
- /* If no bits set, we just tested buffer_zero_int, and there
161
- are no more acceleration options to test. */
162
- if (cpuid_cache == 0) {
20
- return false;
163
- return false;
21
- }
164
- }
22
-
165
- /* Disable the accelerator we used before and select a new one. */
23
-#ifndef CONFIG_USER_ONLY
166
- cpuid_cache &= cpuid_cache - 1;
24
- return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) &&
167
- init_accel(cpuid_cache);
25
- (npc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK);
26
-#else
27
- return true;
168
- return true;
28
-#endif
169
+ /*
29
+ return translator_use_goto_tb(&s->base, pc) &&
170
+ * Accumulate the accelerators that we've already tested, and
30
+ translator_use_goto_tb(&s->base, npc);
171
+ * remove them from the set to test this round. We'll get back
172
+ * a zero from select_accel_cpuinfo when there are no more.
173
+ */
174
+ unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
175
+ used_accel |= used;
176
+ return used;
31
}
177
}
32
178
33
-static inline void gen_goto_tb(DisasContext *s, int tb_num,
179
static bool select_accel_fn(const void *buf, size_t len)
34
- target_ulong pc, target_ulong npc)
35
+static void gen_goto_tb(DisasContext *s, int tb_num,
36
+ target_ulong pc, target_ulong npc)
37
{
38
if (use_goto_tb(s, pc, npc)) {
39
/* jump to same page: we can use a direct jump */
40
--
180
--
41
2.25.1
181
2.34.1
42
182
43
183
diff view generated by jsdifflib
1
Just use translator_use_goto_tb directly at the one call site,
1
Place the CONFIG_AVX512BW_OPT block at the top,
2
rather than maintaining a local wrapper.
2
which will aid function selection in the next patch.
3
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Juan Quintela <quintela@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
target/nios2/translate.c | 15 +--------------
8
migration/xbzrle.c | 244 ++++++++++++++++++++++-----------------------
8
1 file changed, 1 insertion(+), 14 deletions(-)
9
1 file changed, 122 insertions(+), 122 deletions(-)
9
10
10
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
11
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/target/nios2/translate.c
13
--- a/migration/xbzrle.c
13
+++ b/target/nios2/translate.c
14
+++ b/migration/xbzrle.c
14
@@ -XXX,XX +XXX,XX @@ static void t_gen_helper_raise_exception(DisasContext *dc,
15
@@ -XXX,XX +XXX,XX @@
15
dc->base.is_jmp = DISAS_NORETURN;
16
#include "qemu/host-utils.h"
17
#include "xbzrle.h"
18
19
+#if defined(CONFIG_AVX512BW_OPT)
20
+#include <immintrin.h>
21
+
22
+int __attribute__((target("avx512bw")))
23
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
24
+ uint8_t *dst, int dlen)
25
+{
26
+ uint32_t zrun_len = 0, nzrun_len = 0;
27
+ int d = 0, i = 0, num = 0;
28
+ uint8_t *nzrun_start = NULL;
29
+ /* add 1 to include residual part in main loop */
30
+ uint32_t count512s = (slen >> 6) + 1;
31
+ /* countResidual is tail of data, i.e., countResidual = slen % 64 */
32
+ uint32_t count_residual = slen & 0b111111;
33
+ bool never_same = true;
34
+ uint64_t mask_residual = 1;
35
+ mask_residual <<= count_residual;
36
+ mask_residual -= 1;
37
+ __m512i r = _mm512_set1_epi32(0);
38
+
39
+ while (count512s) {
40
+ int bytes_to_check = 64;
41
+ uint64_t mask = 0xffffffffffffffff;
42
+ if (count512s == 1) {
43
+ bytes_to_check = count_residual;
44
+ mask = mask_residual;
45
+ }
46
+ __m512i old_data = _mm512_mask_loadu_epi8(r,
47
+ mask, old_buf + i);
48
+ __m512i new_data = _mm512_mask_loadu_epi8(r,
49
+ mask, new_buf + i);
50
+ uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
51
+ count512s--;
52
+
53
+ bool is_same = (comp & 0x1);
54
+ while (bytes_to_check) {
55
+ if (d + 2 > dlen) {
56
+ return -1;
57
+ }
58
+ if (is_same) {
59
+ if (nzrun_len) {
60
+ d += uleb128_encode_small(dst + d, nzrun_len);
61
+ if (d + nzrun_len > dlen) {
62
+ return -1;
63
+ }
64
+ nzrun_start = new_buf + i - nzrun_len;
65
+ memcpy(dst + d, nzrun_start, nzrun_len);
66
+ d += nzrun_len;
67
+ nzrun_len = 0;
68
+ }
69
+ /* 64 data at a time for speed */
70
+ if (count512s && (comp == 0xffffffffffffffff)) {
71
+ i += 64;
72
+ zrun_len += 64;
73
+ break;
74
+ }
75
+ never_same = false;
76
+ num = ctz64(~comp);
77
+ num = (num < bytes_to_check) ? num : bytes_to_check;
78
+ zrun_len += num;
79
+ bytes_to_check -= num;
80
+ comp >>= num;
81
+ i += num;
82
+ if (bytes_to_check) {
83
+ /* still has different data after same data */
84
+ d += uleb128_encode_small(dst + d, zrun_len);
85
+ zrun_len = 0;
86
+ } else {
87
+ break;
88
+ }
89
+ }
90
+ if (never_same || zrun_len) {
91
+ /*
92
+ * never_same only acts if
93
+ * data begins with diff in first count512s
94
+ */
95
+ d += uleb128_encode_small(dst + d, zrun_len);
96
+ zrun_len = 0;
97
+ never_same = false;
98
+ }
99
+ /* has diff, 64 data at a time for speed */
100
+ if ((bytes_to_check == 64) && (comp == 0x0)) {
101
+ i += 64;
102
+ nzrun_len += 64;
103
+ break;
104
+ }
105
+ num = ctz64(comp);
106
+ num = (num < bytes_to_check) ? num : bytes_to_check;
107
+ nzrun_len += num;
108
+ bytes_to_check -= num;
109
+ comp >>= num;
110
+ i += num;
111
+ if (bytes_to_check) {
112
+ /* mask like 111000 */
113
+ d += uleb128_encode_small(dst + d, nzrun_len);
114
+ /* overflow */
115
+ if (d + nzrun_len > dlen) {
116
+ return -1;
117
+ }
118
+ nzrun_start = new_buf + i - nzrun_len;
119
+ memcpy(dst + d, nzrun_start, nzrun_len);
120
+ d += nzrun_len;
121
+ nzrun_len = 0;
122
+ is_same = true;
123
+ }
124
+ }
125
+ }
126
+
127
+ if (nzrun_len != 0) {
128
+ d += uleb128_encode_small(dst + d, nzrun_len);
129
+ /* overflow */
130
+ if (d + nzrun_len > dlen) {
131
+ return -1;
132
+ }
133
+ nzrun_start = new_buf + i - nzrun_len;
134
+ memcpy(dst + d, nzrun_start, nzrun_len);
135
+ d += nzrun_len;
136
+ }
137
+ return d;
138
+}
139
+#endif
140
+
141
/*
142
page = zrun nzrun
143
| zrun nzrun page
144
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
145
146
return d;
16
}
147
}
17
148
-
18
-static bool use_goto_tb(DisasContext *dc, uint32_t dest)
149
-#if defined(CONFIG_AVX512BW_OPT)
150
-#include <immintrin.h>
151
-
152
-int __attribute__((target("avx512bw")))
153
-xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
154
- uint8_t *dst, int dlen)
19
-{
155
-{
20
- if (unlikely(dc->base.singlestep_enabled)) {
156
- uint32_t zrun_len = 0, nzrun_len = 0;
21
- return false;
157
- int d = 0, i = 0, num = 0;
158
- uint8_t *nzrun_start = NULL;
159
- /* add 1 to include residual part in main loop */
160
- uint32_t count512s = (slen >> 6) + 1;
161
- /* countResidual is tail of data, i.e., countResidual = slen % 64 */
162
- uint32_t count_residual = slen & 0b111111;
163
- bool never_same = true;
164
- uint64_t mask_residual = 1;
165
- mask_residual <<= count_residual;
166
- mask_residual -= 1;
167
- __m512i r = _mm512_set1_epi32(0);
168
-
169
- while (count512s) {
170
- int bytes_to_check = 64;
171
- uint64_t mask = 0xffffffffffffffff;
172
- if (count512s == 1) {
173
- bytes_to_check = count_residual;
174
- mask = mask_residual;
175
- }
176
- __m512i old_data = _mm512_mask_loadu_epi8(r,
177
- mask, old_buf + i);
178
- __m512i new_data = _mm512_mask_loadu_epi8(r,
179
- mask, new_buf + i);
180
- uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
181
- count512s--;
182
-
183
- bool is_same = (comp & 0x1);
184
- while (bytes_to_check) {
185
- if (d + 2 > dlen) {
186
- return -1;
187
- }
188
- if (is_same) {
189
- if (nzrun_len) {
190
- d += uleb128_encode_small(dst + d, nzrun_len);
191
- if (d + nzrun_len > dlen) {
192
- return -1;
193
- }
194
- nzrun_start = new_buf + i - nzrun_len;
195
- memcpy(dst + d, nzrun_start, nzrun_len);
196
- d += nzrun_len;
197
- nzrun_len = 0;
198
- }
199
- /* 64 data at a time for speed */
200
- if (count512s && (comp == 0xffffffffffffffff)) {
201
- i += 64;
202
- zrun_len += 64;
203
- break;
204
- }
205
- never_same = false;
206
- num = ctz64(~comp);
207
- num = (num < bytes_to_check) ? num : bytes_to_check;
208
- zrun_len += num;
209
- bytes_to_check -= num;
210
- comp >>= num;
211
- i += num;
212
- if (bytes_to_check) {
213
- /* still has different data after same data */
214
- d += uleb128_encode_small(dst + d, zrun_len);
215
- zrun_len = 0;
216
- } else {
217
- break;
218
- }
219
- }
220
- if (never_same || zrun_len) {
221
- /*
222
- * never_same only acts if
223
- * data begins with diff in first count512s
224
- */
225
- d += uleb128_encode_small(dst + d, zrun_len);
226
- zrun_len = 0;
227
- never_same = false;
228
- }
229
- /* has diff, 64 data at a time for speed */
230
- if ((bytes_to_check == 64) && (comp == 0x0)) {
231
- i += 64;
232
- nzrun_len += 64;
233
- break;
234
- }
235
- num = ctz64(comp);
236
- num = (num < bytes_to_check) ? num : bytes_to_check;
237
- nzrun_len += num;
238
- bytes_to_check -= num;
239
- comp >>= num;
240
- i += num;
241
- if (bytes_to_check) {
242
- /* mask like 111000 */
243
- d += uleb128_encode_small(dst + d, nzrun_len);
244
- /* overflow */
245
- if (d + nzrun_len > dlen) {
246
- return -1;
247
- }
248
- nzrun_start = new_buf + i - nzrun_len;
249
- memcpy(dst + d, nzrun_start, nzrun_len);
250
- d += nzrun_len;
251
- nzrun_len = 0;
252
- is_same = true;
253
- }
254
- }
22
- }
255
- }
23
-
256
-
24
-#ifndef CONFIG_USER_ONLY
257
- if (nzrun_len != 0) {
25
- return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
258
- d += uleb128_encode_small(dst + d, nzrun_len);
26
-#else
259
- /* overflow */
27
- return true;
260
- if (d + nzrun_len > dlen) {
261
- return -1;
262
- }
263
- nzrun_start = new_buf + i - nzrun_len;
264
- memcpy(dst + d, nzrun_start, nzrun_len);
265
- d += nzrun_len;
266
- }
267
- return d;
268
-}
28
-#endif
269
-#endif
29
-}
30
-
31
static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
32
{
33
const TranslationBlock *tb = dc->base.tb;
34
35
- if (use_goto_tb(dc, dest)) {
36
+ if (translator_use_goto_tb(&dc->base, dest)) {
37
tcg_gen_goto_tb(n);
38
tcg_gen_movi_tl(cpu_R[R_PC], dest);
39
tcg_gen_exit_tb(tb, n);
40
--
270
--
41
2.25.1
271
2.34.1
42
272
43
273
diff view generated by jsdifflib
1
Now that we've moved helper_lookup_tb_ptr, the only user
1
Perform the function selection once, and only if CONFIG_AVX512_OPT
2
of tb-lookup.h is cpu-exec.c; merge the contents in.
2
is enabled. Centralize the selection to xbzrle.c, instead of
3
spreading the init across 3 files.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Remove xbzrle-bench.c. The benefit of being able to benchmark
6
the different implementations is less important than not peeking
7
into the internals of the implementation.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Juan Quintela <quintela@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
12
---
7
accel/tcg/tb-lookup.h | 49 -------------------------------------------
13
migration/xbzrle.h | 5 +-
8
accel/tcg/cpu-exec.c | 31 ++++++++++++++++++++++++++-
14
migration/ram.c | 34 +--
9
2 files changed, 30 insertions(+), 50 deletions(-)
15
migration/xbzrle.c | 26 +-
10
delete mode 100644 accel/tcg/tb-lookup.h
16
tests/bench/xbzrle-bench.c | 469 -------------------------------------
17
tests/unit/test-xbzrle.c | 49 +---
18
tests/bench/meson.build | 6 -
19
6 files changed, 39 insertions(+), 550 deletions(-)
20
delete mode 100644 tests/bench/xbzrle-bench.c
11
21
12
diff --git a/accel/tcg/tb-lookup.h b/accel/tcg/tb-lookup.h
22
diff --git a/migration/xbzrle.h b/migration/xbzrle.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/migration/xbzrle.h
25
+++ b/migration/xbzrle.h
26
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
27
uint8_t *dst, int dlen);
28
29
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
30
-#if defined(CONFIG_AVX512BW_OPT)
31
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
32
- uint8_t *dst, int dlen);
33
-#endif
34
+
35
#endif
36
diff --git a/migration/ram.c b/migration/ram.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/migration/ram.c
39
+++ b/migration/ram.c
40
@@ -XXX,XX +XXX,XX @@
41
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
42
/* We can't use any flag that is bigger than 0x200 */
43
44
-int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
45
- uint8_t *, int) = xbzrle_encode_buffer;
46
-#if defined(CONFIG_AVX512BW_OPT)
47
-#include "qemu/cpuid.h"
48
-static void __attribute__((constructor)) init_cpu_flag(void)
49
-{
50
- unsigned max = __get_cpuid_max(0, NULL);
51
- int a, b, c, d;
52
- if (max >= 1) {
53
- __cpuid(1, a, b, c, d);
54
- /* We must check that AVX is not just available, but usable. */
55
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
56
- int bv;
57
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
58
- __cpuid_count(7, 0, a, b, c, d);
59
- /* 0xe6:
60
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
61
- * and ZMM16-ZMM31 state are enabled by OS)
62
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
63
- */
64
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
65
- xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
66
- }
67
- }
68
- }
69
-}
70
-#endif
71
-
72
XBZRLECacheStats xbzrle_counters;
73
74
/* used by the search for pages to send */
75
@@ -XXX,XX +XXX,XX @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
76
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
77
78
/* XBZRLE encoding (if there is no overflow) */
79
- encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
80
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
81
- TARGET_PAGE_SIZE);
82
+ encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
83
+ TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
84
+ TARGET_PAGE_SIZE);
85
86
/*
87
* Update the cache contents, so that it corresponds to the data
88
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/migration/xbzrle.c
91
+++ b/migration/xbzrle.c
92
@@ -XXX,XX +XXX,XX @@
93
94
#if defined(CONFIG_AVX512BW_OPT)
95
#include <immintrin.h>
96
+#include "host/cpuinfo.h"
97
98
-int __attribute__((target("avx512bw")))
99
+static int __attribute__((target("avx512bw")))
100
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
101
uint8_t *dst, int dlen)
102
{
103
@@ -XXX,XX +XXX,XX @@ xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
104
}
105
return d;
106
}
107
+
108
+static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
109
+ int slen, uint8_t *dst, int dlen);
110
+
111
+static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
112
+
113
+static void __attribute__((constructor)) init_accel(void)
114
+{
115
+ unsigned info = cpuinfo_init();
116
+ if (info & CPUINFO_AVX512BW) {
117
+ accel_func = xbzrle_encode_buffer_avx512;
118
+ } else {
119
+ accel_func = xbzrle_encode_buffer_int;
120
+ }
121
+}
122
+
123
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
124
+ uint8_t *dst, int dlen)
125
+{
126
+ return accel_func(old_buf, new_buf, slen, dst, dlen);
127
+}
128
+
129
+#define xbzrle_encode_buffer xbzrle_encode_buffer_int
130
#endif
131
132
/*
133
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
13
deleted file mode 100644
134
deleted file mode 100644
14
index XXXXXXX..XXXXXXX
135
index XXXXXXX..XXXXXXX
15
--- a/accel/tcg/tb-lookup.h
136
--- a/tests/bench/xbzrle-bench.c
16
+++ /dev/null
137
+++ /dev/null
17
@@ -XXX,XX +XXX,XX @@
138
@@ -XXX,XX +XXX,XX @@
18
-/*
139
-/*
19
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
140
- * Xor Based Zero Run Length Encoding unit tests.
20
- *
141
- *
21
- * License: GNU GPL, version 2 or later.
142
- * Copyright 2013 Red Hat, Inc. and/or its affiliates
22
- * See the COPYING file in the top-level directory.
143
- *
144
- * Authors:
145
- * Orit Wasserman <owasserm@redhat.com>
146
- *
147
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
148
- * See the COPYING file in the top-level directory.
149
- *
23
- */
150
- */
24
-#ifndef EXEC_TB_LOOKUP_H
151
-#include "qemu/osdep.h"
25
-#define EXEC_TB_LOOKUP_H
152
-#include "qemu/cutils.h"
26
-
153
-#include "../migration/xbzrle.h"
27
-#ifdef NEED_CPU_H
154
-
28
-#include "cpu.h"
155
-#if defined(CONFIG_AVX512BW_OPT)
29
-#else
156
-#define XBZRLE_PAGE_SIZE 4096
30
-#include "exec/poison.h"
157
-static bool is_cpu_support_avx512bw;
158
-#include "qemu/cpuid.h"
159
-static void __attribute__((constructor)) init_cpu_flag(void)
160
-{
161
- unsigned max = __get_cpuid_max(0, NULL);
162
- int a, b, c, d;
163
- is_cpu_support_avx512bw = false;
164
- if (max >= 1) {
165
- __cpuid(1, a, b, c, d);
166
- /* We must check that AVX is not just available, but usable. */
167
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
168
- int bv;
169
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
170
- __cpuid_count(7, 0, a, b, c, d);
171
- /* 0xe6:
172
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
173
- * and ZMM16-ZMM31 state are enabled by OS)
174
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
175
- */
176
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
177
- is_cpu_support_avx512bw = true;
178
- }
179
- }
180
- }
181
- return ;
182
-}
183
-
184
-struct ResTime {
185
- float t_raw;
186
- float t_512;
187
-};
188
-
189
-
190
-/* Function prototypes
191
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
192
- uint8_t *dst, int dlen);
193
-*/
194
-static void encode_decode_zero(struct ResTime *res)
195
-{
196
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
197
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
198
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
199
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
200
- int i = 0;
201
- int dlen = 0, dlen512 = 0;
202
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
203
-
204
- for (i = diff_len; i > 0; i--) {
205
- buffer[1000 + i] = i;
206
- buffer512[1000 + i] = i;
207
- }
208
-
209
- buffer[1000 + diff_len + 3] = 103;
210
- buffer[1000 + diff_len + 5] = 105;
211
-
212
- buffer512[1000 + diff_len + 3] = 103;
213
- buffer512[1000 + diff_len + 5] = 105;
214
-
215
- /* encode zero page */
216
- time_t t_start, t_end, t_start512, t_end512;
217
- t_start = clock();
218
- dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
219
- XBZRLE_PAGE_SIZE);
220
- t_end = clock();
221
- float time_val = difftime(t_end, t_start);
222
- g_assert(dlen == 0);
223
-
224
- t_start512 = clock();
225
- dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
226
- compressed512, XBZRLE_PAGE_SIZE);
227
- t_end512 = clock();
228
- float time_val512 = difftime(t_end512, t_start512);
229
- g_assert(dlen512 == 0);
230
-
231
- res->t_raw = time_val;
232
- res->t_512 = time_val512;
233
-
234
- g_free(buffer);
235
- g_free(compressed);
236
- g_free(buffer512);
237
- g_free(compressed512);
238
-
239
-}
240
-
241
-static void test_encode_decode_zero_avx512(void)
242
-{
243
- int i;
244
- float time_raw = 0.0, time_512 = 0.0;
245
- struct ResTime res;
246
- for (i = 0; i < 10000; i++) {
247
- encode_decode_zero(&res);
248
- time_raw += res.t_raw;
249
- time_512 += res.t_512;
250
- }
251
- printf("Zero test:\n");
252
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
253
- printf("512 xbzrle_encode time is %f ms\n", time_512);
254
-}
255
-
256
-static void encode_decode_unchanged(struct ResTime *res)
257
-{
258
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
259
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
260
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
261
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
262
- int i = 0;
263
- int dlen = 0, dlen512 = 0;
264
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
265
-
266
- for (i = diff_len; i > 0; i--) {
267
- test[1000 + i] = i + 4;
268
- test512[1000 + i] = i + 4;
269
- }
270
-
271
- test[1000 + diff_len + 3] = 107;
272
- test[1000 + diff_len + 5] = 109;
273
-
274
- test512[1000 + diff_len + 3] = 107;
275
- test512[1000 + diff_len + 5] = 109;
276
-
277
- /* test unchanged buffer */
278
- time_t t_start, t_end, t_start512, t_end512;
279
- t_start = clock();
280
- dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
281
- XBZRLE_PAGE_SIZE);
282
- t_end = clock();
283
- float time_val = difftime(t_end, t_start);
284
- g_assert(dlen == 0);
285
-
286
- t_start512 = clock();
287
- dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
288
- compressed512, XBZRLE_PAGE_SIZE);
289
- t_end512 = clock();
290
- float time_val512 = difftime(t_end512, t_start512);
291
- g_assert(dlen512 == 0);
292
-
293
- res->t_raw = time_val;
294
- res->t_512 = time_val512;
295
-
296
- g_free(test);
297
- g_free(compressed);
298
- g_free(test512);
299
- g_free(compressed512);
300
-
301
-}
302
-
303
-static void test_encode_decode_unchanged_avx512(void)
304
-{
305
- int i;
306
- float time_raw = 0.0, time_512 = 0.0;
307
- struct ResTime res;
308
- for (i = 0; i < 10000; i++) {
309
- encode_decode_unchanged(&res);
310
- time_raw += res.t_raw;
311
- time_512 += res.t_512;
312
- }
313
- printf("Unchanged test:\n");
314
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
315
- printf("512 xbzrle_encode time is %f ms\n", time_512);
316
-}
317
-
318
-static void encode_decode_1_byte(struct ResTime *res)
319
-{
320
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
321
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
322
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
323
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
324
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
325
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
326
- int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
327
- uint8_t buf[2];
328
- uint8_t buf512[2];
329
-
330
- test[XBZRLE_PAGE_SIZE - 1] = 1;
331
- test512[XBZRLE_PAGE_SIZE - 1] = 1;
332
-
333
- time_t t_start, t_end, t_start512, t_end512;
334
- t_start = clock();
335
- dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
336
- XBZRLE_PAGE_SIZE);
337
- t_end = clock();
338
- float time_val = difftime(t_end, t_start);
339
- g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
340
-
341
- rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
342
- g_assert(rc == XBZRLE_PAGE_SIZE);
343
- g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
344
-
345
- t_start512 = clock();
346
- dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
347
- compressed512, XBZRLE_PAGE_SIZE);
348
- t_end512 = clock();
349
- float time_val512 = difftime(t_end512, t_start512);
350
- g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
351
-
352
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
353
- XBZRLE_PAGE_SIZE);
354
- g_assert(rc512 == XBZRLE_PAGE_SIZE);
355
- g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
356
-
357
- res->t_raw = time_val;
358
- res->t_512 = time_val512;
359
-
360
- g_free(buffer);
361
- g_free(compressed);
362
- g_free(test);
363
- g_free(buffer512);
364
- g_free(compressed512);
365
- g_free(test512);
366
-
367
-}
368
-
369
-static void test_encode_decode_1_byte_avx512(void)
370
-{
371
- int i;
372
- float time_raw = 0.0, time_512 = 0.0;
373
- struct ResTime res;
374
- for (i = 0; i < 10000; i++) {
375
- encode_decode_1_byte(&res);
376
- time_raw += res.t_raw;
377
- time_512 += res.t_512;
378
- }
379
- printf("1 byte test:\n");
380
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
381
- printf("512 xbzrle_encode time is %f ms\n", time_512);
382
-}
383
-
384
-static void encode_decode_overflow(struct ResTime *res)
385
-{
386
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
387
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
388
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
389
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
390
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
391
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
392
- int i = 0, rc = 0, rc512 = 0;
393
-
394
- for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
395
- test[i * 2] = 1;
396
- test512[i * 2] = 1;
397
- }
398
-
399
- /* encode overflow */
400
- time_t t_start, t_end, t_start512, t_end512;
401
- t_start = clock();
402
- rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
403
- XBZRLE_PAGE_SIZE);
404
- t_end = clock();
405
- float time_val = difftime(t_end, t_start);
406
- g_assert(rc == -1);
407
-
408
- t_start512 = clock();
409
- rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
410
- compressed512, XBZRLE_PAGE_SIZE);
411
- t_end512 = clock();
412
- float time_val512 = difftime(t_end512, t_start512);
413
- g_assert(rc512 == -1);
414
-
415
- res->t_raw = time_val;
416
- res->t_512 = time_val512;
417
-
418
- g_free(buffer);
419
- g_free(compressed);
420
- g_free(test);
421
- g_free(buffer512);
422
- g_free(compressed512);
423
- g_free(test512);
424
-
425
-}
426
-
427
-static void test_encode_decode_overflow_avx512(void)
428
-{
429
- int i;
430
- float time_raw = 0.0, time_512 = 0.0;
431
- struct ResTime res;
432
- for (i = 0; i < 10000; i++) {
433
- encode_decode_overflow(&res);
434
- time_raw += res.t_raw;
435
- time_512 += res.t_512;
436
- }
437
- printf("Overflow test:\n");
438
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
439
- printf("512 xbzrle_encode time is %f ms\n", time_512);
440
-}
441
-
442
-static void encode_decode_range_avx512(struct ResTime *res)
443
-{
444
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
445
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
446
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
447
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
448
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
449
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
450
- int i = 0, rc = 0, rc512 = 0;
451
- int dlen = 0, dlen512 = 0;
452
-
453
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
454
-
455
- for (i = diff_len; i > 0; i--) {
456
- buffer[1000 + i] = i;
457
- test[1000 + i] = i + 4;
458
- buffer512[1000 + i] = i;
459
- test512[1000 + i] = i + 4;
460
- }
461
-
462
- buffer[1000 + diff_len + 3] = 103;
463
- test[1000 + diff_len + 3] = 107;
464
-
465
- buffer[1000 + diff_len + 5] = 105;
466
- test[1000 + diff_len + 5] = 109;
467
-
468
- buffer512[1000 + diff_len + 3] = 103;
469
- test512[1000 + diff_len + 3] = 107;
470
-
471
- buffer512[1000 + diff_len + 5] = 105;
472
- test512[1000 + diff_len + 5] = 109;
473
-
474
- /* test encode/decode */
475
- time_t t_start, t_end, t_start512, t_end512;
476
- t_start = clock();
477
- dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
478
- XBZRLE_PAGE_SIZE);
479
- t_end = clock();
480
- float time_val = difftime(t_end, t_start);
481
- rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
482
- g_assert(rc < XBZRLE_PAGE_SIZE);
483
- g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
484
-
485
- t_start512 = clock();
486
- dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
487
- compressed512, XBZRLE_PAGE_SIZE);
488
- t_end512 = clock();
489
- float time_val512 = difftime(t_end512, t_start512);
490
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
491
- g_assert(rc512 < XBZRLE_PAGE_SIZE);
492
- g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
493
-
494
- res->t_raw = time_val;
495
- res->t_512 = time_val512;
496
-
497
- g_free(buffer);
498
- g_free(compressed);
499
- g_free(test);
500
- g_free(buffer512);
501
- g_free(compressed512);
502
- g_free(test512);
503
-
504
-}
505
-
506
-static void test_encode_decode_avx512(void)
507
-{
508
- int i;
509
- float time_raw = 0.0, time_512 = 0.0;
510
- struct ResTime res;
511
- for (i = 0; i < 10000; i++) {
512
- encode_decode_range_avx512(&res);
513
- time_raw += res.t_raw;
514
- time_512 += res.t_512;
515
- }
516
- printf("Encode decode test:\n");
517
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
518
- printf("512 xbzrle_encode time is %f ms\n", time_512);
519
-}
520
-
521
-static void encode_decode_random(struct ResTime *res)
522
-{
523
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
524
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
525
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
526
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
527
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
528
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
529
- int i = 0, rc = 0, rc512 = 0;
530
- int dlen = 0, dlen512 = 0;
531
-
532
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
533
- /* store the index of diff */
534
- int dirty_index[diff_len];
535
- for (int j = 0; j < diff_len; j++) {
536
- dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
537
- }
538
- for (i = diff_len - 1; i >= 0; i--) {
539
- buffer[dirty_index[i]] = i;
540
- test[dirty_index[i]] = i + 4;
541
- buffer512[dirty_index[i]] = i;
542
- test512[dirty_index[i]] = i + 4;
543
- }
544
-
545
- time_t t_start, t_end, t_start512, t_end512;
546
- t_start = clock();
547
- dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
548
- XBZRLE_PAGE_SIZE);
549
- t_end = clock();
550
- float time_val = difftime(t_end, t_start);
551
- rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
552
- g_assert(rc < XBZRLE_PAGE_SIZE);
553
-
554
- t_start512 = clock();
555
- dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
556
- compressed512, XBZRLE_PAGE_SIZE);
557
- t_end512 = clock();
558
- float time_val512 = difftime(t_end512, t_start512);
559
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
560
- g_assert(rc512 < XBZRLE_PAGE_SIZE);
561
-
562
- res->t_raw = time_val;
563
- res->t_512 = time_val512;
564
-
565
- g_free(buffer);
566
- g_free(compressed);
567
- g_free(test);
568
- g_free(buffer512);
569
- g_free(compressed512);
570
- g_free(test512);
571
-
572
-}
573
-
574
-static void test_encode_decode_random_avx512(void)
575
-{
576
- int i;
577
- float time_raw = 0.0, time_512 = 0.0;
578
- struct ResTime res;
579
- for (i = 0; i < 10000; i++) {
580
- encode_decode_random(&res);
581
- time_raw += res.t_raw;
582
- time_512 += res.t_512;
583
- }
584
- printf("Random test:\n");
585
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
586
- printf("512 xbzrle_encode time is %f ms\n", time_512);
587
-}
31
-#endif
588
-#endif
32
-
589
-
33
-#include "exec/exec-all.h"
590
-int main(int argc, char **argv)
34
-#include "tb-hash.h"
591
-{
35
-
592
- g_test_init(&argc, &argv, NULL);
36
-/* Might cause an exception, so have a longjmp destination ready */
593
- g_test_rand_int();
37
-static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
594
- #if defined(CONFIG_AVX512BW_OPT)
38
- target_ulong cs_base,
595
- if (likely(is_cpu_support_avx512bw)) {
39
- uint32_t flags, uint32_t cflags)
596
- g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
40
-{
597
- g_test_add_func("/xbzrle/encode_decode_unchanged",
41
- TranslationBlock *tb;
598
- test_encode_decode_unchanged_avx512);
42
- uint32_t hash;
599
- g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
43
-
600
- g_test_add_func("/xbzrle/encode_decode_overflow",
44
- /* we should never be trying to look up an INVALID tb */
601
- test_encode_decode_overflow_avx512);
45
- tcg_debug_assert(!(cflags & CF_INVALID));
602
- g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
46
-
603
- g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
47
- hash = tb_jmp_cache_hash_func(pc);
604
- }
48
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
605
- #endif
49
-
606
- return g_test_run();
50
- if (likely(tb &&
607
-}
51
- tb->pc == pc &&
608
diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c
52
- tb->cs_base == cs_base &&
53
- tb->flags == flags &&
54
- tb->trace_vcpu_dstate == *cpu->trace_dstate &&
55
- tb_cflags(tb) == cflags)) {
56
- return tb;
57
- }
58
- tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
59
- if (tb == NULL) {
60
- return NULL;
61
- }
62
- qatomic_set(&cpu->tb_jmp_cache[hash], tb);
63
- return tb;
64
-}
65
-
66
-#endif /* EXEC_TB_LOOKUP_H */
67
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
68
index XXXXXXX..XXXXXXX 100644
609
index XXXXXXX..XXXXXXX 100644
69
--- a/accel/tcg/cpu-exec.c
610
--- a/tests/unit/test-xbzrle.c
70
+++ b/accel/tcg/cpu-exec.c
611
+++ b/tests/unit/test-xbzrle.c
71
@@ -XXX,XX +XXX,XX @@
612
@@ -XXX,XX +XXX,XX @@
72
#include "sysemu/replay.h"
613
73
#include "exec/helper-proto.h"
614
#define XBZRLE_PAGE_SIZE 4096
74
#include "tb-hash.h"
615
75
-#include "tb-lookup.h"
616
-int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
76
#include "tb-context.h"
617
- uint8_t *, int) = xbzrle_encode_buffer;
77
#include "internal.h"
618
-#if defined(CONFIG_AVX512BW_OPT)
78
619
-#include "qemu/cpuid.h"
79
@@ -XXX,XX +XXX,XX @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
620
-static void __attribute__((constructor)) init_cpu_flag(void)
80
}
621
-{
81
#endif /* CONFIG USER ONLY */
622
- unsigned max = __get_cpuid_max(0, NULL);
82
623
- int a, b, c, d;
83
+/* Might cause an exception, so have a longjmp destination ready */
624
- if (max >= 1) {
84
+static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
625
- __cpuid(1, a, b, c, d);
85
+ target_ulong cs_base,
626
- /* We must check that AVX is not just available, but usable. */
86
+ uint32_t flags, uint32_t cflags)
627
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
87
+{
628
- int bv;
88
+ TranslationBlock *tb;
629
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
89
+ uint32_t hash;
630
- __cpuid_count(7, 0, a, b, c, d);
90
+
631
- /* 0xe6:
91
+ /* we should never be trying to look up an INVALID tb */
632
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
92
+ tcg_debug_assert(!(cflags & CF_INVALID));
633
- * and ZMM16-ZMM31 state are enabled by OS)
93
+
634
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
94
+ hash = tb_jmp_cache_hash_func(pc);
635
- */
95
+ tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
636
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
96
+
637
- xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
97
+ if (likely(tb &&
638
- }
98
+ tb->pc == pc &&
639
- }
99
+ tb->cs_base == cs_base &&
640
- }
100
+ tb->flags == flags &&
641
- return ;
101
+ tb->trace_vcpu_dstate == *cpu->trace_dstate &&
642
-}
102
+ tb_cflags(tb) == cflags)) {
643
-#endif
103
+ return tb;
644
-
104
+ }
645
static void test_uleb(void)
105
+ tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
646
{
106
+ if (tb == NULL) {
647
uint32_t i, val;
107
+ return NULL;
648
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_zero(void)
108
+ }
649
buffer[1000 + diff_len + 5] = 105;
109
+ qatomic_set(&cpu->tb_jmp_cache[hash], tb);
650
110
+ return tb;
651
/* encode zero page */
111
+}
652
- dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
112
+
653
- XBZRLE_PAGE_SIZE);
113
/**
654
+ dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE,
114
* helper_lookup_tb_ptr: quick check for next tb
655
+ compressed, XBZRLE_PAGE_SIZE);
115
* @env: current cpu state
656
g_assert(dlen == 0);
657
658
g_free(buffer);
659
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_unchanged(void)
660
test[1000 + diff_len + 5] = 109;
661
662
/* test unchanged buffer */
663
- dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
664
- XBZRLE_PAGE_SIZE);
665
+ dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE,
666
+ compressed, XBZRLE_PAGE_SIZE);
667
g_assert(dlen == 0);
668
669
g_free(test);
670
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_1_byte(void)
671
672
test[XBZRLE_PAGE_SIZE - 1] = 1;
673
674
- dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
675
- XBZRLE_PAGE_SIZE);
676
+ dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
677
+ compressed, XBZRLE_PAGE_SIZE);
678
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
679
680
rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
681
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_overflow(void)
682
}
683
684
/* encode overflow */
685
- rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
686
- XBZRLE_PAGE_SIZE);
687
+ rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
688
+ compressed, XBZRLE_PAGE_SIZE);
689
g_assert(rc == -1);
690
691
g_free(buffer);
692
@@ -XXX,XX +XXX,XX @@ static void encode_decode_range(void)
693
test[1000 + diff_len + 5] = 109;
694
695
/* test encode/decode */
696
- dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
697
- XBZRLE_PAGE_SIZE);
698
+ dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE,
699
+ compressed, XBZRLE_PAGE_SIZE);
700
701
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
702
g_assert(rc < XBZRLE_PAGE_SIZE);
703
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
704
index XXXXXXX..XXXXXXX 100644
705
--- a/tests/bench/meson.build
706
+++ b/tests/bench/meson.build
707
@@ -XXX,XX +XXX,XX @@ qht_bench = executable('qht-bench',
708
sources: 'qht-bench.c',
709
dependencies: [qemuutil])
710
711
-if have_system
712
-xbzrle_bench = executable('xbzrle-bench',
713
- sources: 'xbzrle-bench.c',
714
- dependencies: [qemuutil,migration])
715
-endif
716
-
717
qtree_bench = executable('qtree-bench',
718
sources: 'qtree-bench.c',
719
dependencies: [qemuutil])
116
--
720
--
117
2.25.1
721
2.34.1
118
722
119
723
diff view generated by jsdifflib
1
Split out CPU_LOG_EXEC and CPU_LOG_TB_CPU logging from
1
The items in migration_files are built for libmigration and included
2
cpu_tb_exec to a new function. Perform only one pc
2
info softmmu_ss from there; no need to also include them directly.
3
range check after a combined mask check.
4
3
5
Use the new function in lookup_tb_ptr. This enables
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
CPU_LOG_TB_CPU between indirectly chained tbs.
5
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
accel/tcg/cpu-exec.c | 61 ++++++++++++++++++++++++--------------------
8
migration/meson.build | 1 -
11
1 file changed, 34 insertions(+), 27 deletions(-)
9
1 file changed, 1 deletion(-)
12
10
13
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
11
diff --git a/migration/meson.build b/migration/meson.build
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/accel/tcg/cpu-exec.c
13
--- a/migration/meson.build
16
+++ b/accel/tcg/cpu-exec.c
14
+++ b/migration/meson.build
17
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
15
@@ -XXX,XX +XXX,XX @@ migration_files = files(
18
return tb;
16
'qemu-file.c',
19
}
17
'yank_functions.c',
20
18
)
21
+static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
19
-softmmu_ss.add(migration_files)
22
+ const TranslationBlock *tb)
20
23
+{
21
softmmu_ss.add(files(
24
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
22
'block-dirty-bitmap.c',
25
+ && qemu_log_in_addr_range(pc)) {
26
+
27
+ qemu_log_mask(CPU_LOG_EXEC,
28
+ "Trace %d: %p [" TARGET_FMT_lx
29
+ "/" TARGET_FMT_lx "/%#x] %s\n",
30
+ cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, tb->flags,
31
+ lookup_symbol(pc));
32
+
33
+#if defined(DEBUG_DISAS)
34
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
35
+ FILE *logfile = qemu_log_lock();
36
+ int flags = 0;
37
+
38
+ if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
39
+ flags |= CPU_DUMP_FPU;
40
+ }
41
+#if defined(TARGET_I386)
42
+ flags |= CPU_DUMP_CCOP;
43
+#endif
44
+ log_cpu_state(cpu, flags);
45
+ qemu_log_unlock(logfile);
46
+ }
47
+#endif /* DEBUG_DISAS */
48
+ }
49
+}
50
+
51
/**
52
* helper_lookup_tb_ptr: quick check for next tb
53
* @env: current cpu state
54
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
55
if (tb == NULL) {
56
return tcg_code_gen_epilogue;
57
}
58
- qemu_log_mask_and_addr(CPU_LOG_EXEC, pc,
59
- "Chain %d: %p ["
60
- TARGET_FMT_lx "/" TARGET_FMT_lx "/%#x] %s\n",
61
- cpu->cpu_index, tb->tc.ptr, cs_base, pc, flags,
62
- lookup_symbol(pc));
63
+
64
+ log_cpu_exec(pc, cpu, tb);
65
+
66
return tb->tc.ptr;
67
}
68
69
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
70
TranslationBlock *last_tb;
71
const void *tb_ptr = itb->tc.ptr;
72
73
- qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
74
- "Trace %d: %p ["
75
- TARGET_FMT_lx "/" TARGET_FMT_lx "/%#x] %s\n",
76
- cpu->cpu_index, itb->tc.ptr,
77
- itb->cs_base, itb->pc, itb->flags,
78
- lookup_symbol(itb->pc));
79
-
80
-#if defined(DEBUG_DISAS)
81
- if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
82
- && qemu_log_in_addr_range(itb->pc)) {
83
- FILE *logfile = qemu_log_lock();
84
- int flags = 0;
85
- if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
86
- flags |= CPU_DUMP_FPU;
87
- }
88
-#if defined(TARGET_I386)
89
- flags |= CPU_DUMP_CCOP;
90
-#endif
91
- log_cpu_state(cpu, flags);
92
- qemu_log_unlock(logfile);
93
- }
94
-#endif /* DEBUG_DISAS */
95
+ log_cpu_exec(itb->pc, cpu, itb);
96
97
qemu_thread_jit_execute();
98
ret = tcg_qemu_tb_exec(env, tb_ptr);
99
--
23
--
100
2.25.1
24
2.34.1
101
25
102
26
diff view generated by jsdifflib
1
Just use translator_use_goto_tb directly at the one call site,
1
Move the code from tcg/. The only use of these bits so far
2
rather than maintaining a local wrapper.
2
is with respect to the atomicity of tcg operations.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
target/rx/translate.c | 11 +----------
8
host/include/aarch64/host/cpuinfo.h | 22 ++++++++++
8
1 file changed, 1 insertion(+), 10 deletions(-)
9
tcg/aarch64/tcg-target.h | 6 ++-
9
10
util/cpuinfo-aarch64.c | 67 +++++++++++++++++++++++++++++
10
diff --git a/target/rx/translate.c b/target/rx/translate.c
11
tcg/aarch64/tcg-target.c.inc | 40 -----------------
12
util/meson.build | 4 +-
13
5 files changed, 96 insertions(+), 43 deletions(-)
14
create mode 100644 host/include/aarch64/host/cpuinfo.h
15
create mode 100644 util/cpuinfo-aarch64.c
16
17
diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/host/include/aarch64/host/cpuinfo.h
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * SPDX-License-Identifier: GPL-2.0-or-later
25
+ * Host specific cpu indentification for AArch64.
26
+ */
27
+
28
+#ifndef HOST_CPUINFO_H
29
+#define HOST_CPUINFO_H
30
+
31
+#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
32
+#define CPUINFO_LSE (1u << 1)
33
+#define CPUINFO_LSE2 (1u << 2)
34
+
35
+/* Initialized with a constructor. */
36
+extern unsigned cpuinfo;
37
+
38
+/*
39
+ * We cannot rely on constructor ordering, so other constructors must
40
+ * use the function interface rather than the variable above.
41
+ */
42
+unsigned cpuinfo_init(void);
43
+
44
+#endif /* HOST_CPUINFO_H */
45
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
12
--- a/target/rx/translate.c
47
--- a/tcg/aarch64/tcg-target.h
13
+++ b/target/rx/translate.c
48
+++ b/tcg/aarch64/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ void rx_cpu_dump_state(CPUState *cs, FILE *f, int flags)
49
@@ -XXX,XX +XXX,XX @@
50
#ifndef AARCH64_TCG_TARGET_H
51
#define AARCH64_TCG_TARGET_H
52
53
+#include "host/cpuinfo.h"
54
+
55
#define TCG_TARGET_INSN_UNIT_SIZE 4
56
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
57
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
58
@@ -XXX,XX +XXX,XX @@ typedef enum {
59
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
60
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
61
62
-extern bool have_lse;
63
-extern bool have_lse2;
64
+#define have_lse (cpuinfo & CPUINFO_LSE)
65
+#define have_lse2 (cpuinfo & CPUINFO_LSE2)
66
67
/* optional instructions */
68
#define TCG_TARGET_HAS_div_i32 1
69
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
70
new file mode 100644
71
index XXXXXXX..XXXXXXX
72
--- /dev/null
73
+++ b/util/cpuinfo-aarch64.c
74
@@ -XXX,XX +XXX,XX @@
75
+/*
76
+ * SPDX-License-Identifier: GPL-2.0-or-later
77
+ * Host specific cpu indentification for AArch64.
78
+ */
79
+
80
+#include "qemu/osdep.h"
81
+#include "host/cpuinfo.h"
82
+
83
+#ifdef CONFIG_LINUX
84
+# ifdef CONFIG_GETAUXVAL
85
+# include <sys/auxv.h>
86
+# else
87
+# include <asm/hwcap.h>
88
+# include "elf.h"
89
+# endif
90
+#endif
91
+#ifdef CONFIG_DARWIN
92
+# include <sys/sysctl.h>
93
+#endif
94
+
95
+unsigned cpuinfo;
96
+
97
+#ifdef CONFIG_DARWIN
98
+static bool sysctl_for_bool(const char *name)
99
+{
100
+ int val = 0;
101
+ size_t len = sizeof(val);
102
+
103
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
104
+ return val != 0;
105
+ }
106
+
107
+ /*
108
+ * We might in the future ask for properties not present in older kernels,
109
+ * but we're only asking about static properties, all of which should be
110
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
111
+ * more exotic errors.
112
+ */
113
+ assert(errno == ENOENT);
114
+ return false;
115
+}
116
+#endif
117
+
118
+/* Called both as constructor and (possibly) via other constructors. */
119
+unsigned __attribute__((constructor)) cpuinfo_init(void)
120
+{
121
+ unsigned info = cpuinfo;
122
+
123
+ if (info) {
124
+ return info;
125
+ }
126
+
127
+ info = CPUINFO_ALWAYS;
128
+
129
+#ifdef CONFIG_LINUX
130
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
131
+ info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
132
+ info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
133
+#endif
134
+#ifdef CONFIG_DARWIN
135
+ info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
136
+ info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
137
+#endif
138
+
139
+ cpuinfo = info;
140
+ return info;
141
+}
142
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
143
index XXXXXXX..XXXXXXX 100644
144
--- a/tcg/aarch64/tcg-target.c.inc
145
+++ b/tcg/aarch64/tcg-target.c.inc
146
@@ -XXX,XX +XXX,XX @@
147
#include "../tcg-ldst.c.inc"
148
#include "../tcg-pool.c.inc"
149
#include "qemu/bitops.h"
150
-#ifdef __linux__
151
-#include <asm/hwcap.h>
152
-#endif
153
-#ifdef CONFIG_DARWIN
154
-#include <sys/sysctl.h>
155
-#endif
156
157
/* We're going to re-use TCGType in setting of the SF bit, which controls
158
the size of the operation performed. If we know the values match, it
159
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
160
return TCG_REG_X0 + slot;
161
}
162
163
-bool have_lse;
164
-bool have_lse2;
165
-
166
#define TCG_REG_TMP TCG_REG_X30
167
#define TCG_VEC_TMP TCG_REG_V31
168
169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
15
}
170
}
16
}
171
}
17
172
18
-static bool use_goto_tb(DisasContext *dc, target_ulong dest)
173
-#ifdef CONFIG_DARWIN
174
-static bool sysctl_for_bool(const char *name)
19
-{
175
-{
20
- if (unlikely(dc->base.singlestep_enabled)) {
176
- int val = 0;
21
- return false;
177
- size_t len = sizeof(val);
22
- } else {
178
-
23
- return true;
179
- if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
180
- return val != 0;
24
- }
181
- }
182
-
183
- /*
184
- * We might in the future ask for properties not present in older kernels,
185
- * but we're only asking about static properties, all of which should be
186
- * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
187
- * more exotic errors.
188
- */
189
- assert(errno == ENOENT);
190
- return false;
25
-}
191
-}
26
-
192
-#endif
27
static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
193
-
194
static void tcg_target_init(TCGContext *s)
28
{
195
{
29
- if (use_goto_tb(dc, dest)) {
196
-#ifdef __linux__
30
+ if (translator_use_goto_tb(&dc->base, dest)) {
197
- unsigned long hwcap = qemu_getauxval(AT_HWCAP);
31
tcg_gen_goto_tb(n);
198
- have_lse = hwcap & HWCAP_ATOMICS;
32
tcg_gen_movi_i32(cpu_pc, dest);
199
- have_lse2 = hwcap & HWCAP_USCAT;
33
tcg_gen_exit_tb(dc->base.tb, n);
200
-#endif
201
-#ifdef CONFIG_DARWIN
202
- have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
203
- have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
204
-#endif
205
-
206
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
207
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
208
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
209
diff --git a/util/meson.build b/util/meson.build
210
index XXXXXXX..XXXXXXX 100644
211
--- a/util/meson.build
212
+++ b/util/meson.build
213
@@ -XXX,XX +XXX,XX @@ if have_block
214
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
215
endif
216
217
-if cpu in ['x86', 'x86_64']
218
+if cpu == 'aarch64'
219
+ util_ss.add(files('cpuinfo-aarch64.c'))
220
+elif cpu in ['x86', 'x86_64']
221
util_ss.add(files('cpuinfo-i386.c'))
222
endif
34
--
223
--
35
2.25.1
224
2.34.1
36
225
37
226
diff view generated by jsdifflib
1
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
1
Separates the aarch64-specific portion into its own file.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
target/xtensa/translate.c | 6 +-----
6
host/include/aarch64/host/atomic128-cas.h | 43 ++++++++++++++++++
5
1 file changed, 1 insertion(+), 5 deletions(-)
7
host/include/generic/host/atomic128-cas.h | 43 ++++++++++++++++++
8
include/qemu/atomic128.h | 55 +----------------------
9
3 files changed, 87 insertions(+), 54 deletions(-)
10
create mode 100644 host/include/aarch64/host/atomic128-cas.h
11
create mode 100644 host/include/generic/host/atomic128-cas.h
6
12
7
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
13
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
17
+++ b/host/include/aarch64/host/atomic128-cas.h
18
@@ -XXX,XX +XXX,XX @@
19
+/*
20
+ * SPDX-License-Identifier: GPL-2.0-or-later
21
+ * Compare-and-swap for 128-bit atomic operations, AArch64 version.
22
+ *
23
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
24
+ *
25
+ * See docs/devel/atomics.rst for discussion about the guarantees each
26
+ * atomic primitive is meant to provide.
27
+ */
28
+
29
+#ifndef AARCH64_ATOMIC128_CAS_H
30
+#define AARCH64_ATOMIC128_CAS_H
31
+
32
+/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
33
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
34
+#include "host/include/generic/host/atomic128-cas.h"
35
+#else
36
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
+{
38
+ uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
39
+ uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
40
+ uint64_t oldl, oldh;
41
+ uint32_t tmp;
42
+
43
+ asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
44
+ "cmp %[oldl], %[cmpl]\n\t"
45
+ "ccmp %[oldh], %[cmph], #0, eq\n\t"
46
+ "b.ne 1f\n\t"
47
+ "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
48
+ "cbnz %w[tmp], 0b\n"
49
+ "1:"
50
+ : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
51
+ [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
52
+ : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
53
+ [newl] "r"(newl), [newh] "r"(newh)
54
+ : "memory", "cc");
55
+
56
+ return int128_make128(oldl, oldh);
57
+}
58
+# define HAVE_CMPXCHG128 1
59
+#endif
60
+
61
+#endif /* AARCH64_ATOMIC128_CAS_H */
62
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h
63
new file mode 100644
64
index XXXXXXX..XXXXXXX
65
--- /dev/null
66
+++ b/host/include/generic/host/atomic128-cas.h
67
@@ -XXX,XX +XXX,XX @@
68
+/*
69
+ * SPDX-License-Identifier: GPL-2.0-or-later
70
+ * Compare-and-swap for 128-bit atomic operations, generic version.
71
+ *
72
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
73
+ *
74
+ * See docs/devel/atomics.rst for discussion about the guarantees each
75
+ * atomic primitive is meant to provide.
76
+ */
77
+
78
+#ifndef HOST_ATOMIC128_CAS_H
79
+#define HOST_ATOMIC128_CAS_H
80
+
81
+#if defined(CONFIG_ATOMIC128)
82
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
83
+{
84
+ Int128Alias r, c, n;
85
+
86
+ c.s = cmp;
87
+ n.s = new;
88
+ r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
89
+ return r.s;
90
+}
91
+# define HAVE_CMPXCHG128 1
92
+#elif defined(CONFIG_CMPXCHG128)
93
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
94
+{
95
+ Int128Alias r, c, n;
96
+
97
+ c.s = cmp;
98
+ n.s = new;
99
+ r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
100
+ return r.s;
101
+}
102
+# define HAVE_CMPXCHG128 1
103
+#else
104
+/* Fallback definition that must be optimized away, or error. */
105
+Int128 QEMU_ERROR("unsupported atomic")
106
+ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
107
+# define HAVE_CMPXCHG128 0
108
+#endif
109
+
110
+#endif /* HOST_ATOMIC128_CAS_H */
111
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
8
index XXXXXXX..XXXXXXX 100644
112
index XXXXXXX..XXXXXXX 100644
9
--- a/target/xtensa/translate.c
113
--- a/include/qemu/atomic128.h
10
+++ b/target/xtensa/translate.c
114
+++ b/include/qemu/atomic128.h
11
@@ -XXX,XX +XXX,XX @@ static void gen_jump(DisasContext *dc, TCGv dest)
115
@@ -XXX,XX +XXX,XX @@
12
116
* Therefore, special case each platform.
13
static int adjust_jump_slot(DisasContext *dc, uint32_t dest, int slot)
117
*/
14
{
118
15
- if (((dc->base.pc_first ^ dest) & TARGET_PAGE_MASK) != 0) {
119
-#if defined(CONFIG_ATOMIC128)
16
- return -1;
120
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
17
- } else {
121
-{
18
- return slot;
122
- Int128Alias r, c, n;
19
- }
123
-
20
+ return translator_use_goto_tb(&dc->base, dest) ? slot : -1;
124
- c.s = cmp;
21
}
125
- n.s = new;
22
126
- r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
23
static void gen_jumpi(DisasContext *dc, uint32_t dest, int slot)
127
- return r.s;
128
-}
129
-# define HAVE_CMPXCHG128 1
130
-#elif defined(CONFIG_CMPXCHG128)
131
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
132
-{
133
- Int128Alias r, c, n;
134
-
135
- c.s = cmp;
136
- n.s = new;
137
- r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
138
- return r.s;
139
-}
140
-# define HAVE_CMPXCHG128 1
141
-#elif defined(__aarch64__)
142
-/* Through gcc 8, aarch64 has no support for 128-bit at all. */
143
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
144
-{
145
- uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
146
- uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
147
- uint64_t oldl, oldh;
148
- uint32_t tmp;
149
-
150
- asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
151
- "cmp %[oldl], %[cmpl]\n\t"
152
- "ccmp %[oldh], %[cmph], #0, eq\n\t"
153
- "b.ne 1f\n\t"
154
- "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
155
- "cbnz %w[tmp], 0b\n"
156
- "1:"
157
- : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
158
- [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
159
- : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
160
- [newl] "r"(newl), [newh] "r"(newh)
161
- : "memory", "cc");
162
-
163
- return int128_make128(oldl, oldh);
164
-}
165
-# define HAVE_CMPXCHG128 1
166
-#else
167
-/* Fallback definition that must be optimized away, or error. */
168
-Int128 QEMU_ERROR("unsupported atomic")
169
- atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
170
-# define HAVE_CMPXCHG128 0
171
-#endif /* Some definition for HAVE_CMPXCHG128 */
172
-
173
+#include "host/atomic128-cas.h"
174
175
#if defined(CONFIG_ATOMIC128)
176
static inline Int128 atomic16_read(Int128 *ptr)
24
--
177
--
25
2.25.1
178
2.34.1
26
179
27
180
diff view generated by jsdifflib
1
All of these helpers end with cpu_loop_exit.
1
Separates the aarch64-specific portion into its own file.
2
2
3
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
target/avr/helper.h | 8 ++++----
6
host/include/aarch64/host/atomic128-ldst.h | 49 ++++++++++++++
8
1 file changed, 4 insertions(+), 4 deletions(-)
7
host/include/generic/host/atomic128-ldst.h | 57 +++++++++++++++++
9
8
include/qemu/atomic128.h | 74 +---------------------
10
diff --git a/target/avr/helper.h b/target/avr/helper.h
9
3 files changed, 107 insertions(+), 73 deletions(-)
10
create mode 100644 host/include/aarch64/host/atomic128-ldst.h
11
create mode 100644 host/include/generic/host/atomic128-ldst.h
12
13
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
17
+++ b/host/include/aarch64/host/atomic128-ldst.h
18
@@ -XXX,XX +XXX,XX @@
19
+/*
20
+ * SPDX-License-Identifier: GPL-2.0-or-later
21
+ * Load/store for 128-bit atomic operations, AArch64 version.
22
+ *
23
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
24
+ *
25
+ * See docs/devel/atomics.rst for discussion about the guarantees each
26
+ * atomic primitive is meant to provide.
27
+ */
28
+
29
+#ifndef AARCH64_ATOMIC128_LDST_H
30
+#define AARCH64_ATOMIC128_LDST_H
31
+
32
+/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
33
+#if !defined(CONFIG_ATOMIC128) && !defined(CONFIG_USER_ONLY)
34
+/* We can do better than cmpxchg for AArch64. */
35
+static inline Int128 atomic16_read(Int128 *ptr)
36
+{
37
+ uint64_t l, h;
38
+ uint32_t tmp;
39
+
40
+ /* The load must be paired with the store to guarantee not tearing. */
41
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
42
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
43
+ "cbnz %w[tmp], 0b"
44
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
45
+
46
+ return int128_make128(l, h);
47
+}
48
+
49
+static inline void atomic16_set(Int128 *ptr, Int128 val)
50
+{
51
+ uint64_t l = int128_getlo(val), h = int128_gethi(val);
52
+ uint64_t t1, t2;
53
+
54
+ /* Load into temporaries to acquire the exclusive access lock. */
55
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
56
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
57
+ "cbnz %w[t1], 0b"
58
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
59
+ : [l] "r"(l), [h] "r"(h));
60
+}
61
+
62
+# define HAVE_ATOMIC128 1
63
+#else
64
+#include "host/include/generic/host/atomic128-ldst.h"
65
+#endif
66
+
67
+#endif /* AARCH64_ATOMIC128_LDST_H */
68
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
69
new file mode 100644
70
index XXXXXXX..XXXXXXX
71
--- /dev/null
72
+++ b/host/include/generic/host/atomic128-ldst.h
73
@@ -XXX,XX +XXX,XX @@
74
+/*
75
+ * SPDX-License-Identifier: GPL-2.0-or-later
76
+ * Load/store for 128-bit atomic operations, generic version.
77
+ *
78
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
79
+ *
80
+ * See docs/devel/atomics.rst for discussion about the guarantees each
81
+ * atomic primitive is meant to provide.
82
+ */
83
+
84
+#ifndef HOST_ATOMIC128_LDST_H
85
+#define HOST_ATOMIC128_LDST_H
86
+
87
+#if defined(CONFIG_ATOMIC128)
88
+static inline Int128 atomic16_read(Int128 *ptr)
89
+{
90
+ Int128Alias r;
91
+
92
+ r.i = qatomic_read__nocheck((__int128_t *)ptr);
93
+ return r.s;
94
+}
95
+
96
+static inline void atomic16_set(Int128 *ptr, Int128 val)
97
+{
98
+ Int128Alias v;
99
+
100
+ v.s = val;
101
+ qatomic_set__nocheck((__int128_t *)ptr, v.i);
102
+}
103
+
104
+# define HAVE_ATOMIC128 1
105
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
106
+static inline Int128 atomic16_read(Int128 *ptr)
107
+{
108
+ /* Maybe replace 0 with 0, returning the old value. */
109
+ Int128 z = int128_make64(0);
110
+ return atomic16_cmpxchg(ptr, z, z);
111
+}
112
+
113
+static inline void atomic16_set(Int128 *ptr, Int128 val)
114
+{
115
+ Int128 old = *ptr, cmp;
116
+ do {
117
+ cmp = old;
118
+ old = atomic16_cmpxchg(ptr, cmp, val);
119
+ } while (int128_ne(old, cmp));
120
+}
121
+
122
+# define HAVE_ATOMIC128 1
123
+#else
124
+/* Fallback definitions that must be optimized away, or error. */
125
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
126
+void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
127
+# define HAVE_ATOMIC128 0
128
+#endif
129
+
130
+#endif /* HOST_ATOMIC128_LDST_H */
131
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
11
index XXXXXXX..XXXXXXX 100644
132
index XXXXXXX..XXXXXXX 100644
12
--- a/target/avr/helper.h
133
--- a/include/qemu/atomic128.h
13
+++ b/target/avr/helper.h
134
+++ b/include/qemu/atomic128.h
14
@@ -XXX,XX +XXX,XX @@
135
@@ -XXX,XX +XXX,XX @@
15
*/
136
*/
16
137
17
DEF_HELPER_1(wdr, void, env)
138
#include "host/atomic128-cas.h"
18
-DEF_HELPER_1(debug, void, env)
139
-
19
-DEF_HELPER_1(break, void, env)
140
-#if defined(CONFIG_ATOMIC128)
20
-DEF_HELPER_1(sleep, void, env)
141
-static inline Int128 atomic16_read(Int128 *ptr)
21
-DEF_HELPER_1(unsupported, void, env)
142
-{
22
+DEF_HELPER_1(debug, noreturn, env)
143
- Int128Alias r;
23
+DEF_HELPER_1(break, noreturn, env)
144
-
24
+DEF_HELPER_1(sleep, noreturn, env)
145
- r.i = qatomic_read__nocheck((__int128_t *)ptr);
25
+DEF_HELPER_1(unsupported, noreturn, env)
146
- return r.s;
26
DEF_HELPER_3(outb, void, env, i32, i32)
147
-}
27
DEF_HELPER_2(inb, tl, env, i32)
148
-
28
DEF_HELPER_3(fullwr, void, env, i32, i32)
149
-static inline void atomic16_set(Int128 *ptr, Int128 val)
150
-{
151
- Int128Alias v;
152
-
153
- v.s = val;
154
- qatomic_set__nocheck((__int128_t *)ptr, v.i);
155
-}
156
-
157
-# define HAVE_ATOMIC128 1
158
-#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
159
-/* We can do better than cmpxchg for AArch64. */
160
-static inline Int128 atomic16_read(Int128 *ptr)
161
-{
162
- uint64_t l, h;
163
- uint32_t tmp;
164
-
165
- /* The load must be paired with the store to guarantee not tearing. */
166
- asm("0: ldxp %[l], %[h], %[mem]\n\t"
167
- "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
168
- "cbnz %w[tmp], 0b"
169
- : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
170
-
171
- return int128_make128(l, h);
172
-}
173
-
174
-static inline void atomic16_set(Int128 *ptr, Int128 val)
175
-{
176
- uint64_t l = int128_getlo(val), h = int128_gethi(val);
177
- uint64_t t1, t2;
178
-
179
- /* Load into temporaries to acquire the exclusive access lock. */
180
- asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
181
- "stxp %w[t1], %[l], %[h], %[mem]\n\t"
182
- "cbnz %w[t1], 0b"
183
- : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
184
- : [l] "r"(l), [h] "r"(h));
185
-}
186
-
187
-# define HAVE_ATOMIC128 1
188
-#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
189
-static inline Int128 atomic16_read(Int128 *ptr)
190
-{
191
- /* Maybe replace 0 with 0, returning the old value. */
192
- Int128 z = int128_make64(0);
193
- return atomic16_cmpxchg(ptr, z, z);
194
-}
195
-
196
-static inline void atomic16_set(Int128 *ptr, Int128 val)
197
-{
198
- Int128 old = *ptr, cmp;
199
- do {
200
- cmp = old;
201
- old = atomic16_cmpxchg(ptr, cmp, val);
202
- } while (int128_ne(old, cmp));
203
-}
204
-
205
-# define HAVE_ATOMIC128 1
206
-#else
207
-/* Fallback definitions that must be optimized away, or error. */
208
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
209
-void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
210
-# define HAVE_ATOMIC128 0
211
-#endif /* Some definition for HAVE_ATOMIC128 */
212
+#include "host/atomic128-ldst.h"
213
214
#endif /* QEMU_ATOMIC128_H */
29
--
215
--
30
2.25.1
216
2.34.1
31
217
32
218
diff view generated by jsdifflib
1
We lost the ',' following the called function name.
1
Silly typo: sizeof(16) != 16.
2
2
3
Fixes: 3e92aa34434
3
Fixes: e61f1efeb730 ("meson: Detect atomic128 support with optimization")
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/tcg.c | 2 +-
8
meson.build | 2 +-
8
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 1 insertion(+), 1 deletion(-)
9
10
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
diff --git a/meson.build b/meson.build
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
--- a/meson.build
13
+++ b/tcg/tcg.c
14
+++ b/meson.build
14
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
15
@@ -XXX,XX +XXX,XX @@ if has_int128
15
col += qemu_log("plugin(%p)", func);
16
# __alignof(unsigned __int128) for the host.
16
}
17
atomic_test_128 = '''
17
18
int main(int ac, char **av) {
18
- col += qemu_log("$0x%x,$%d", info->flags, nb_oargs);
19
- unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
19
+ col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
20
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
20
for (i = 0; i < nb_oargs; i++) {
21
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
21
col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
22
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
22
op->args[i]));
23
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
23
--
24
--
24
2.25.1
25
2.34.1
25
26
26
27
diff view generated by jsdifflib
1
Add a generic version of the common use_goto_tb test.
1
Not only the routines in ldst_atomicity.c.inc need markup,
2
but also the ones in the headers.
2
3
3
Various targets avoid the page crossing test for CONFIG_USER_ONLY,
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
but that is wrong: mmap and mprotect can change page permissions.
5
6
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
include/exec/translator.h | 10 ++++++++++
7
host/include/generic/host/atomic128-cas.h | 12 ++++++++----
11
accel/tcg/translator.c | 11 +++++++++++
8
host/include/generic/host/atomic128-ldst.h | 18 ++++++++++++------
12
2 files changed, 21 insertions(+)
9
include/qemu/atomic128.h | 17 +++++++++++++++++
10
accel/tcg/ldst_atomicity.c.inc | 17 -----------------
11
4 files changed, 37 insertions(+), 27 deletions(-)
13
12
14
diff --git a/include/exec/translator.h b/include/exec/translator.h
13
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/include/exec/translator.h
15
--- a/host/include/generic/host/atomic128-cas.h
17
+++ b/include/exec/translator.h
16
+++ b/host/include/generic/host/atomic128-cas.h
18
@@ -XXX,XX +XXX,XX @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
17
@@ -XXX,XX +XXX,XX @@
19
18
#define HOST_ATOMIC128_CAS_H
20
void translator_loop_temp_check(DisasContextBase *db);
19
21
20
#if defined(CONFIG_ATOMIC128)
22
+/**
21
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
23
+ * translator_use_goto_tb
22
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
24
+ * @db: Disassembly context
23
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
25
+ * @dest: target pc of the goto
24
{
26
+ *
25
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
27
+ * Return true if goto_tb is allowed between the current TB
26
Int128Alias r, c, n;
28
+ * and the destination PC.
27
28
c.s = cmp;
29
n.s = new;
30
- r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
31
+ r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i);
32
return r.s;
33
}
34
# define HAVE_CMPXCHG128 1
35
#elif defined(CONFIG_CMPXCHG128)
36
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
38
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
39
{
40
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
41
Int128Alias r, c, n;
42
43
c.s = cmp;
44
n.s = new;
45
- r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
46
+ r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i);
47
return r.s;
48
}
49
# define HAVE_CMPXCHG128 1
50
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/host/include/generic/host/atomic128-ldst.h
53
+++ b/host/include/generic/host/atomic128-ldst.h
54
@@ -XXX,XX +XXX,XX @@
55
#define HOST_ATOMIC128_LDST_H
56
57
#if defined(CONFIG_ATOMIC128)
58
-static inline Int128 atomic16_read(Int128 *ptr)
59
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
60
+atomic16_read(Int128 *ptr)
61
{
62
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
63
Int128Alias r;
64
65
- r.i = qatomic_read__nocheck((__int128_t *)ptr);
66
+ r.i = qatomic_read__nocheck(ptr_align);
67
return r.s;
68
}
69
70
-static inline void atomic16_set(Int128 *ptr, Int128 val)
71
+static inline void ATTRIBUTE_ATOMIC128_OPT
72
+atomic16_set(Int128 *ptr, Int128 val)
73
{
74
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
75
Int128Alias v;
76
77
v.s = val;
78
- qatomic_set__nocheck((__int128_t *)ptr, v.i);
79
+ qatomic_set__nocheck(ptr_align, v.i);
80
}
81
82
# define HAVE_ATOMIC128 1
83
#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
84
-static inline Int128 atomic16_read(Int128 *ptr)
85
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
86
+atomic16_read(Int128 *ptr)
87
{
88
/* Maybe replace 0 with 0, returning the old value. */
89
Int128 z = int128_make64(0);
90
return atomic16_cmpxchg(ptr, z, z);
91
}
92
93
-static inline void atomic16_set(Int128 *ptr, Int128 val)
94
+static inline void ATTRIBUTE_ATOMIC128_OPT
95
+atomic16_set(Int128 *ptr, Int128 val)
96
{
97
Int128 old = *ptr, cmp;
98
do {
99
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/include/qemu/atomic128.h
102
+++ b/include/qemu/atomic128.h
103
@@ -XXX,XX +XXX,XX @@
104
105
#include "qemu/int128.h"
106
107
+/*
108
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
109
+ * that are supported by the host, e.g. s390x. We can force the pointer to
110
+ * have our known alignment with __builtin_assume_aligned, however prior to
111
+ * GCC 13 that was only reliable with optimization enabled. See
112
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
29
+ */
113
+ */
30
+bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
114
+#if defined(CONFIG_ATOMIC128_OPT)
115
+# if !defined(__OPTIMIZE__)
116
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
117
+# endif
118
+# define CONFIG_ATOMIC128
119
+#endif
120
+#ifndef ATTRIBUTE_ATOMIC128_OPT
121
+# define ATTRIBUTE_ATOMIC128_OPT
122
+#endif
31
+
123
+
32
/*
124
/*
33
* Translator Load Functions
125
* GCC is a house divided about supporting large atomic operations.
34
*
126
*
35
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
127
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
36
index XXXXXXX..XXXXXXX 100644
128
index XXXXXXX..XXXXXXX 100644
37
--- a/accel/tcg/translator.c
129
--- a/accel/tcg/ldst_atomicity.c.inc
38
+++ b/accel/tcg/translator.c
130
+++ b/accel/tcg/ldst_atomicity.c.inc
39
@@ -XXX,XX +XXX,XX @@ void translator_loop_temp_check(DisasContextBase *db)
131
@@ -XXX,XX +XXX,XX @@
40
}
132
#endif
41
}
133
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
42
134
43
+bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
135
-/*
44
+{
136
- * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
45
+ /* Suppress goto_tb in the case of single-steping. */
137
- * that are supported by the host, e.g. s390x. We can force the pointer to
46
+ if (db->singlestep_enabled || singlestep) {
138
- * have our known alignment with __builtin_assume_aligned, however prior to
47
+ return false;
139
- * GCC 13 that was only reliable with optimization enabled. See
48
+ }
140
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
49
+
141
- */
50
+ /* Check for the dest on the same page as the start of the TB. */
142
-#if defined(CONFIG_ATOMIC128_OPT)
51
+ return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
143
-# if !defined(__OPTIMIZE__)
52
+}
144
-# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
53
+
145
-# endif
54
void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
146
-# define CONFIG_ATOMIC128
55
CPUState *cpu, TranslationBlock *tb, int max_insns)
147
-#endif
56
{
148
-#ifndef ATTRIBUTE_ATOMIC128_OPT
149
-# define ATTRIBUTE_ATOMIC128_OPT
150
-#endif
151
-
152
#if defined(CONFIG_ATOMIC128)
153
# define HAVE_al16_fast true
154
#else
57
--
155
--
58
2.25.1
156
2.34.1
59
157
60
158
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
No need to roll our own, as this is now provided by tcg.
2
This was the last use of retxl, so remove that too.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
target/ppc/translate.c | 10 +---------
7
target/ppc/cpu.h | 1 -
5
1 file changed, 1 insertion(+), 9 deletions(-)
8
target/ppc/helper.h | 9 ----
6
9
target/ppc/mem_helper.c | 48 --------------------
10
target/ppc/translate.c | 34 ++-------------
11
target/ppc/translate/fixedpoint-impl.c.inc | 51 +++-------------------
12
5 files changed, 11 insertions(+), 132 deletions(-)
13
14
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/ppc/cpu.h
17
+++ b/target/ppc/cpu.h
18
@@ -XXX,XX +XXX,XX @@ struct CPUArchState {
19
/* used to speed-up TLB assist handlers */
20
21
target_ulong nip; /* next instruction pointer */
22
- uint64_t retxh; /* high part of 128-bit helper return */
23
24
/* when a memory exception occurs, the access type is stored here */
25
int access_type;
26
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/ppc/helper.h
29
+++ b/target/ppc/helper.h
30
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
31
32
DEF_HELPER_1(tbegin, void, env)
33
DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
34
-
35
-#ifdef TARGET_PPC64
36
-DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
37
-DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
38
-DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
39
- void, env, tl, i64, i64, i32)
40
-DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
41
- void, env, tl, i64, i64, i32)
42
-#endif
43
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/ppc/mem_helper.c
46
+++ b/target/ppc/mem_helper.c
47
@@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
48
return i;
49
}
50
51
-#ifdef TARGET_PPC64
52
-uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
53
- uint32_t opidx)
54
-{
55
- Int128 ret;
56
-
57
- /* We will have raised EXCP_ATOMIC from the translator. */
58
- assert(HAVE_ATOMIC128);
59
- ret = cpu_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
60
- env->retxh = int128_gethi(ret);
61
- return int128_getlo(ret);
62
-}
63
-
64
-uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
65
- uint32_t opidx)
66
-{
67
- Int128 ret;
68
-
69
- /* We will have raised EXCP_ATOMIC from the translator. */
70
- assert(HAVE_ATOMIC128);
71
- ret = cpu_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
72
- env->retxh = int128_gethi(ret);
73
- return int128_getlo(ret);
74
-}
75
-
76
-void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
77
- uint64_t lo, uint64_t hi, uint32_t opidx)
78
-{
79
- Int128 val;
80
-
81
- /* We will have raised EXCP_ATOMIC from the translator. */
82
- assert(HAVE_ATOMIC128);
83
- val = int128_make128(lo, hi);
84
- cpu_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
85
-}
86
-
87
-void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
88
- uint64_t lo, uint64_t hi, uint32_t opidx)
89
-{
90
- Int128 val;
91
-
92
- /* We will have raised EXCP_ATOMIC from the translator. */
93
- assert(HAVE_ATOMIC128);
94
- val = int128_make128(lo, hi);
95
- cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
96
-}
97
-#endif
98
-
99
/*****************************************************************************/
100
/* Altivec extension helpers */
101
#if HOST_BIG_ENDIAN
7
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
102
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
8
index XXXXXXX..XXXXXXX 100644
103
index XXXXXXX..XXXXXXX 100644
9
--- a/target/ppc/translate.c
104
--- a/target/ppc/translate.c
10
+++ b/target/ppc/translate.c
105
+++ b/target/ppc/translate.c
11
@@ -XXX,XX +XXX,XX @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
106
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
12
13
static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
14
{
107
{
15
- if (unlikely(ctx->singlestep_enabled)) {
108
int rd = rD(ctx->opcode);
16
- return false;
109
TCGv EA, hi, lo;
110
+ TCGv_i128 t16;
111
112
if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
113
(rd == rB(ctx->opcode)))) {
114
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
115
lo = cpu_gpr[rd + 1];
116
hi = cpu_gpr[rd];
117
118
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
119
- if (HAVE_ATOMIC128) {
120
- TCGv_i32 oi = tcg_temp_new_i32();
121
- if (ctx->le_mode) {
122
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LE | MO_128 | MO_ALIGN,
123
- ctx->mem_idx));
124
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
125
- } else {
126
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BE | MO_128 | MO_ALIGN,
127
- ctx->mem_idx));
128
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
129
- }
130
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
131
- } else {
132
- /* Restart with exclusive lock. */
133
- gen_helper_exit_atomic(cpu_env);
134
- ctx->base.is_jmp = DISAS_NORETURN;
135
- return;
136
- }
137
- } else if (ctx->le_mode) {
138
- tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEUQ | MO_ALIGN_16);
139
- tcg_gen_mov_tl(cpu_reserve, EA);
140
- gen_addr_add(ctx, EA, EA, 8);
141
- tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEUQ);
142
- } else {
143
- tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEUQ | MO_ALIGN_16);
144
- tcg_gen_mov_tl(cpu_reserve, EA);
145
- gen_addr_add(ctx, EA, EA, 8);
146
- tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEUQ);
17
- }
147
- }
18
-
148
+ t16 = tcg_temp_new_i128();
19
-#ifndef CONFIG_USER_ONLY
149
+ tcg_gen_qemu_ld_i128(t16, EA, ctx->mem_idx, DEF_MEMOP(MO_128 | MO_ALIGN));
20
- return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
150
+ tcg_gen_extr_i128_i64(lo, hi, t16);
21
-#else
151
22
- return true;
152
tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
23
-#endif
153
tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));
24
+ return translator_use_goto_tb(&ctx->base, dest);
154
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
25
}
155
index XXXXXXX..XXXXXXX 100644
26
156
--- a/target/ppc/translate/fixedpoint-impl.c.inc
27
static void gen_lookup_and_goto_ptr(DisasContext *ctx)
157
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
158
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
159
#if defined(TARGET_PPC64)
160
TCGv ea;
161
TCGv_i64 low_addr_gpr, high_addr_gpr;
162
- MemOp mop;
163
+ TCGv_i128 t16;
164
165
REQUIRE_INSNS_FLAGS(ctx, 64BX);
166
167
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
168
low_addr_gpr = cpu_gpr[a->rt + 1];
169
high_addr_gpr = cpu_gpr[a->rt];
170
}
171
+ t16 = tcg_temp_new_i128();
172
173
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
174
- if (HAVE_ATOMIC128) {
175
- mop = DEF_MEMOP(MO_128);
176
- TCGv_i32 oi = tcg_constant_i32(make_memop_idx(mop, ctx->mem_idx));
177
- if (store) {
178
- if (ctx->le_mode) {
179
- gen_helper_stq_le_parallel(cpu_env, ea, low_addr_gpr,
180
- high_addr_gpr, oi);
181
- } else {
182
- gen_helper_stq_be_parallel(cpu_env, ea, high_addr_gpr,
183
- low_addr_gpr, oi);
184
-
185
- }
186
- } else {
187
- if (ctx->le_mode) {
188
- gen_helper_lq_le_parallel(low_addr_gpr, cpu_env, ea, oi);
189
- tcg_gen_ld_i64(high_addr_gpr, cpu_env,
190
- offsetof(CPUPPCState, retxh));
191
- } else {
192
- gen_helper_lq_be_parallel(high_addr_gpr, cpu_env, ea, oi);
193
- tcg_gen_ld_i64(low_addr_gpr, cpu_env,
194
- offsetof(CPUPPCState, retxh));
195
- }
196
- }
197
- } else {
198
- /* Restart with exclusive lock. */
199
- gen_helper_exit_atomic(cpu_env);
200
- ctx->base.is_jmp = DISAS_NORETURN;
201
- }
202
+ if (store) {
203
+ tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr);
204
+ tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
205
} else {
206
- mop = DEF_MEMOP(MO_UQ);
207
- if (store) {
208
- tcg_gen_qemu_st_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
209
- } else {
210
- tcg_gen_qemu_ld_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
211
- }
212
-
213
- gen_addr_add(ctx, ea, ea, 8);
214
-
215
- if (store) {
216
- tcg_gen_qemu_st_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
217
- } else {
218
- tcg_gen_qemu_ld_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
219
- }
220
+ tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
221
+ tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16);
222
}
223
#else
224
qemu_build_not_reached();
28
--
225
--
29
2.25.1
226
2.34.1
30
227
31
228
diff view generated by jsdifflib
1
The number of links across (normal) pages using this is low,
1
No need to roll our own, as this is now provided by tcg.
2
and it will shortly violate the contract for breakpoints.
2
This was the last use of retxl, so remove that too.
3
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
target/alpha/translate.c | 24 ++----------------------
8
target/s390x/cpu.h | 3 --
8
1 file changed, 2 insertions(+), 22 deletions(-)
9
target/s390x/helper.h | 4 ---
10
target/s390x/tcg/mem_helper.c | 61 --------------------------------
11
target/s390x/tcg/translate.c | 30 +++++-----------
12
target/s390x/tcg/insn-data.h.inc | 2 +-
13
5 files changed, 9 insertions(+), 91 deletions(-)
9
14
10
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
15
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/target/alpha/translate.c
17
--- a/target/s390x/cpu.h
13
+++ b/target/alpha/translate.c
18
+++ b/target/s390x/cpu.h
14
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb,
19
@@ -XXX,XX +XXX,XX @@ struct CPUArchState {
20
21
float_status fpu_status; /* passed to softfloat lib */
22
23
- /* The low part of a 128-bit return, or remainder of a divide. */
24
- uint64_t retxl;
25
-
26
PSW psw;
27
28
S390CrashReason crash_reason;
29
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/s390x/helper.h
32
+++ b/target/s390x/helper.h
33
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
34
DEF_HELPER_FLAGS_2(srnm, TCG_CALL_NO_WG, void, env, i64)
35
DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
36
DEF_HELPER_2(stfle, i32, env, i64)
37
-DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
38
-DEF_HELPER_FLAGS_2(lpq_parallel, TCG_CALL_NO_WG, i64, env, i64)
39
-DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
40
-DEF_HELPER_FLAGS_4(stpq_parallel, TCG_CALL_NO_WG, void, env, i64, i64, i64)
41
DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
42
DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
43
DEF_HELPER_4(cu14, i32, env, i32, i32, i32)
44
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/s390x/tcg/mem_helper.c
47
+++ b/target/s390x/tcg/mem_helper.c
48
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
49
}
50
#endif
51
52
-/* load pair from quadword */
53
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
54
-{
55
- uintptr_t ra = GETPC();
56
- uint64_t hi, lo;
57
-
58
- check_alignment(env, addr, 16, ra);
59
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
60
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
61
-
62
- env->retxl = lo;
63
- return hi;
64
-}
65
-
66
-uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
67
-{
68
- uintptr_t ra = GETPC();
69
- uint64_t hi, lo;
70
- int mem_idx;
71
- MemOpIdx oi;
72
- Int128 v;
73
-
74
- assert(HAVE_ATOMIC128);
75
-
76
- mem_idx = cpu_mmu_index(env, false);
77
- oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
78
- v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
79
- hi = int128_gethi(v);
80
- lo = int128_getlo(v);
81
-
82
- env->retxl = lo;
83
- return hi;
84
-}
85
-
86
-/* store pair to quadword */
87
-void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
88
- uint64_t low, uint64_t high)
89
-{
90
- uintptr_t ra = GETPC();
91
-
92
- check_alignment(env, addr, 16, ra);
93
- cpu_stq_data_ra(env, addr + 0, high, ra);
94
- cpu_stq_data_ra(env, addr + 8, low, ra);
95
-}
96
-
97
-void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
98
- uint64_t low, uint64_t high)
99
-{
100
- uintptr_t ra = GETPC();
101
- int mem_idx;
102
- MemOpIdx oi;
103
- Int128 v;
104
-
105
- assert(HAVE_ATOMIC128);
106
-
107
- mem_idx = cpu_mmu_index(env, false);
108
- oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
109
- v = int128_make128(low, high);
110
- cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
111
-}
112
-
113
/* Execute instruction. This instruction executes an insn modified with
114
the contents of r1. It does not change the executed instruction in memory;
115
it does not change the program counter.
116
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/s390x/tcg/translate.c
119
+++ b/target/s390x/tcg/translate.c
120
@@ -XXX,XX +XXX,XX @@ static void store_freg32_i64(int reg, TCGv_i64 v)
121
tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
122
}
123
124
-static void return_low128(TCGv_i64 dest)
125
-{
126
- tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
127
-}
128
-
129
static void update_psw_addr(DisasContext *s)
130
{
131
/* psw.addr */
132
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
133
134
static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
135
{
136
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
137
- gen_helper_lpq(o->out, cpu_env, o->in2);
138
- } else if (HAVE_ATOMIC128) {
139
- gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
140
- } else {
141
- gen_helper_exit_atomic(cpu_env);
142
- return DISAS_NORETURN;
143
- }
144
- return_low128(o->out2);
145
+ o->out_128 = tcg_temp_new_i128();
146
+ tcg_gen_qemu_ld_i128(o->out_128, o->in2, get_mem_index(s),
147
+ MO_TE | MO_128 | MO_ALIGN);
15
return DISAS_NEXT;
148
return DISAS_NEXT;
16
}
149
}
17
150
18
-static bool in_superpage(DisasContext *ctx, int64_t addr)
151
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
19
-{
152
20
-#ifndef CONFIG_USER_ONLY
153
static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
21
- return ((ctx->tbflags & ENV_FLAG_PS_USER) == 0
22
- && addr >> TARGET_VIRT_ADDR_SPACE_BITS == -1
23
- && ((addr >> 41) & 3) == 2);
24
-#else
25
- return false;
26
-#endif
27
-}
28
-
29
static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
30
{
154
{
31
#ifndef CONFIG_USER_ONLY
155
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
32
- /* If the destination is in the superpage, the page perms can't change. */
156
- gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
33
- if (in_superpage(ctx, dest)) {
157
- } else if (HAVE_ATOMIC128) {
34
- return true;
158
- gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
159
- } else {
160
- gen_helper_exit_atomic(cpu_env);
161
- return DISAS_NORETURN;
35
- }
162
- }
36
/* Check for the dest on the same page as the start of the TB. */
163
+ TCGv_i128 t16 = tcg_temp_new_i128();
37
return ((ctx->base.tb->pc ^ dest) & TARGET_PAGE_MASK) == 0;
164
+
38
#else
165
+ tcg_gen_concat_i64_i128(t16, o->out2, o->out);
39
@@ -XXX,XX +XXX,XX @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
166
+ tcg_gen_qemu_st_i128(t16, o->in2, get_mem_index(s),
40
{
167
+ MO_TE | MO_128 | MO_ALIGN);
41
DisasContext *ctx = container_of(dcbase, DisasContext, base);
168
return DISAS_NEXT;
42
CPUAlphaState *env = cpu->env_ptr;
43
- int64_t bound, mask;
44
+ int64_t bound;
45
46
ctx->tbflags = ctx->base.tb->flags;
47
ctx->mem_idx = cpu_mmu_index(env, false);
48
@@ -XXX,XX +XXX,XX @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
49
ctx->lit = NULL;
50
51
/* Bound the number of insns to execute to those left on the page. */
52
- if (in_superpage(ctx, ctx->base.pc_first)) {
53
- mask = -1ULL << 41;
54
- } else {
55
- mask = TARGET_PAGE_MASK;
56
- }
57
- bound = -(ctx->base.pc_first | mask) / 4;
58
+ bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
59
ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
60
}
169
}
61
170
171
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
172
index XXXXXXX..XXXXXXX 100644
173
--- a/target/s390x/tcg/insn-data.h.inc
174
+++ b/target/s390x/tcg/insn-data.h.inc
175
@@ -XXX,XX +XXX,XX @@
176
D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
177
D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEUQ)
178
/* LOAD PAIR FROM QUADWORD */
179
- C(0xe38f, LPQ, RXY_a, Z, 0, a2, r1_P, 0, lpq, 0)
180
+ C(0xe38f, LPQ, RXY_a, Z, 0, a2, 0, r1_D64, lpq, 0)
181
/* LOAD POSITIVE */
182
C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32)
183
C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64)
62
--
184
--
63
2.25.1
185
2.34.1
64
186
65
187
diff view generated by jsdifflib
1
This will allow additional code sharing.
1
With the current structure of cputlb.c, there is no difference
2
No functional change.
2
between the little-endian and big-endian entry points, aside
3
from the assert. Unify the pairs of functions.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
The only use of the functions with explicit endianness was in
6
target/sparc64, and that was only to satisfy the assert: the
7
correct endianness is already built into memop.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
12
---
7
accel/tcg/cpu-exec.c | 30 ++++++++++++++++++++++++++++++
13
include/exec/cpu_ldst.h | 58 ++-----
8
accel/tcg/tcg-runtime.c | 22 ----------------------
14
accel/tcg/cputlb.c | 122 +++-----------
9
2 files changed, 30 insertions(+), 22 deletions(-)
15
accel/tcg/user-exec.c | 322 ++++++++++--------------------------
16
target/arm/tcg/m_helper.c | 4 +-
17
target/sparc/ldst_helper.c | 18 +-
18
accel/tcg/ldst_common.c.inc | 24 +--
19
6 files changed, 137 insertions(+), 411 deletions(-)
10
20
11
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
21
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
12
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cpu-exec.c
23
--- a/include/exec/cpu_ldst.h
14
+++ b/accel/tcg/cpu-exec.c
24
+++ b/include/exec/cpu_ldst.h
15
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
16
#include "exec/cpu-all.h"
26
int mmu_idx, uintptr_t ra);
17
#include "sysemu/cpu-timers.h"
27
18
#include "sysemu/replay.h"
28
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
19
+#include "exec/helper-proto.h"
29
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr ptr,
20
#include "tb-hash.h"
30
- MemOpIdx oi, uintptr_t ra);
21
#include "tb-lookup.h"
31
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr ptr,
22
#include "tb-context.h"
32
- MemOpIdx oi, uintptr_t ra);
23
@@ -XXX,XX +XXX,XX @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
33
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr ptr,
24
}
34
- MemOpIdx oi, uintptr_t ra);
25
#endif /* CONFIG USER ONLY */
35
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr ptr,
26
36
- MemOpIdx oi, uintptr_t ra);
27
+/**
37
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
28
+ * helper_lookup_tb_ptr: quick check for next tb
38
- MemOpIdx oi, uintptr_t ra);
29
+ * @env: current cpu state
39
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
30
+ *
40
- MemOpIdx oi, uintptr_t ra);
31
+ * Look for an existing TB matching the current cpu state.
41
-
32
+ * If found, return the code pointer. If not found, return
42
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
33
+ * the tcg epilogue so that we return into cpu_tb_exec.
43
- MemOpIdx oi, uintptr_t ra);
34
+ */
44
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
35
+const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
45
- MemOpIdx oi, uintptr_t ra);
46
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
47
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
48
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
49
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra);
50
51
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
52
MemOpIdx oi, uintptr_t ra);
53
-void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
54
- MemOpIdx oi, uintptr_t ra);
55
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
56
- MemOpIdx oi, uintptr_t ra);
57
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
58
- MemOpIdx oi, uintptr_t ra);
59
-void cpu_stw_le_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
60
- MemOpIdx oi, uintptr_t ra);
61
-void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
62
- MemOpIdx oi, uintptr_t ra);
63
-void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
64
- MemOpIdx oi, uintptr_t ra);
65
-
66
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
67
- MemOpIdx oi, uintptr_t ra);
68
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
69
- MemOpIdx oi, uintptr_t ra);
70
+void cpu_stw_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
71
+ MemOpIdx oi, uintptr_t ra);
72
+void cpu_stl_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
73
+ MemOpIdx oi, uintptr_t ra);
74
+void cpu_stq_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
75
+ MemOpIdx oi, uintptr_t ra);
76
+void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
77
+ MemOpIdx oi, uintptr_t ra);
78
79
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
80
uint32_t cmpv, uint32_t newv,
81
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
82
# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra
83
# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra
84
# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra
85
-# define cpu_ldw_mmu cpu_ldw_be_mmu
86
-# define cpu_ldl_mmu cpu_ldl_be_mmu
87
-# define cpu_ldq_mmu cpu_ldq_be_mmu
88
# define cpu_stw_data cpu_stw_be_data
89
# define cpu_stl_data cpu_stl_be_data
90
# define cpu_stq_data cpu_stq_be_data
91
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
92
# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra
93
# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra
94
# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra
95
-# define cpu_stw_mmu cpu_stw_be_mmu
96
-# define cpu_stl_mmu cpu_stl_be_mmu
97
-# define cpu_stq_mmu cpu_stq_be_mmu
98
#else
99
# define cpu_lduw_data cpu_lduw_le_data
100
# define cpu_ldsw_data cpu_ldsw_le_data
101
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
102
# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra
103
# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra
104
# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra
105
-# define cpu_ldw_mmu cpu_ldw_le_mmu
106
-# define cpu_ldl_mmu cpu_ldl_le_mmu
107
-# define cpu_ldq_mmu cpu_ldq_le_mmu
108
# define cpu_stw_data cpu_stw_le_data
109
# define cpu_stl_data cpu_stl_le_data
110
# define cpu_stq_data cpu_stq_le_data
111
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
112
# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra
113
# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra
114
# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra
115
-# define cpu_stw_mmu cpu_stw_le_mmu
116
-# define cpu_stl_mmu cpu_stl_le_mmu
117
-# define cpu_stq_mmu cpu_stq_le_mmu
118
#endif
119
120
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
121
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/accel/tcg/cputlb.c
124
+++ b/accel/tcg/cputlb.c
125
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
126
return ret;
127
}
128
129
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
130
- MemOpIdx oi, uintptr_t ra)
131
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
132
+ MemOpIdx oi, uintptr_t ra)
133
{
134
uint16_t ret;
135
136
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
137
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
138
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
139
plugin_load_cb(env, addr, oi);
140
return ret;
141
}
142
143
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
144
- MemOpIdx oi, uintptr_t ra)
145
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
146
+ MemOpIdx oi, uintptr_t ra)
147
{
148
uint32_t ret;
149
150
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
151
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
152
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
153
plugin_load_cb(env, addr, oi);
154
return ret;
155
}
156
157
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
158
- MemOpIdx oi, uintptr_t ra)
159
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
160
+ MemOpIdx oi, uintptr_t ra)
161
{
162
uint64_t ret;
163
164
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
165
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
166
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
167
plugin_load_cb(env, addr, oi);
168
return ret;
169
}
170
171
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
172
- MemOpIdx oi, uintptr_t ra)
173
-{
174
- uint16_t ret;
175
-
176
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
177
- ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
178
- plugin_load_cb(env, addr, oi);
179
- return ret;
180
-}
181
-
182
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
183
- MemOpIdx oi, uintptr_t ra)
184
-{
185
- uint32_t ret;
186
-
187
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
188
- ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
189
- plugin_load_cb(env, addr, oi);
190
- return ret;
191
-}
192
-
193
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
194
- MemOpIdx oi, uintptr_t ra)
195
-{
196
- uint64_t ret;
197
-
198
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
199
- ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
200
- plugin_load_cb(env, addr, oi);
201
- return ret;
202
-}
203
-
204
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
205
- MemOpIdx oi, uintptr_t ra)
206
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
207
+ MemOpIdx oi, uintptr_t ra)
208
{
209
Int128 ret;
210
211
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
212
- ret = do_ld16_mmu(env, addr, oi, ra);
213
- plugin_load_cb(env, addr, oi);
214
- return ret;
215
-}
216
-
217
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
218
- MemOpIdx oi, uintptr_t ra)
219
-{
220
- Int128 ret;
221
-
222
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
223
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
224
ret = do_ld16_mmu(env, addr, oi, ra);
225
plugin_load_cb(env, addr, oi);
226
return ret;
227
@@ -XXX,XX +XXX,XX @@ void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
228
plugin_store_cb(env, addr, oi);
229
}
230
231
-void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
232
- MemOpIdx oi, uintptr_t retaddr)
233
+void cpu_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
234
+ MemOpIdx oi, uintptr_t retaddr)
235
{
236
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
237
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
238
do_st2_mmu(env, addr, val, oi, retaddr);
239
plugin_store_cb(env, addr, oi);
240
}
241
242
-void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
243
+void cpu_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
244
MemOpIdx oi, uintptr_t retaddr)
245
{
246
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
247
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
248
do_st4_mmu(env, addr, val, oi, retaddr);
249
plugin_store_cb(env, addr, oi);
250
}
251
252
-void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
253
- MemOpIdx oi, uintptr_t retaddr)
254
+void cpu_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
255
+ MemOpIdx oi, uintptr_t retaddr)
256
{
257
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
258
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
259
do_st8_mmu(env, addr, val, oi, retaddr);
260
plugin_store_cb(env, addr, oi);
261
}
262
263
-void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
264
- MemOpIdx oi, uintptr_t retaddr)
265
+void cpu_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
266
+ MemOpIdx oi, uintptr_t retaddr)
267
{
268
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
269
- do_st2_mmu(env, addr, val, oi, retaddr);
270
- plugin_store_cb(env, addr, oi);
271
-}
272
-
273
-void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
- MemOpIdx oi, uintptr_t retaddr)
275
-{
276
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
277
- do_st4_mmu(env, addr, val, oi, retaddr);
278
- plugin_store_cb(env, addr, oi);
279
-}
280
-
281
-void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
282
- MemOpIdx oi, uintptr_t retaddr)
283
-{
284
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
285
- do_st8_mmu(env, addr, val, oi, retaddr);
286
- plugin_store_cb(env, addr, oi);
287
-}
288
-
289
-void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
290
- MemOpIdx oi, uintptr_t retaddr)
291
-{
292
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
293
- do_st16_mmu(env, addr, val, oi, retaddr);
294
- plugin_store_cb(env, addr, oi);
295
-}
296
-
297
-void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
298
- MemOpIdx oi, uintptr_t retaddr)
299
-{
300
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
302
do_st16_mmu(env, addr, val, oi, retaddr);
303
plugin_store_cb(env, addr, oi);
304
}
305
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/accel/tcg/user-exec.c
308
+++ b/accel/tcg/user-exec.c
309
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
310
return ret;
311
}
312
313
-static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
314
- MemOp mop, uintptr_t ra)
315
+static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
316
+ MemOp mop, uintptr_t ra)
317
{
318
void *haddr;
319
uint16_t ret;
320
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
321
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
322
ret = load_atom_2(env, ra, haddr, mop);
323
clear_helper_retaddr();
324
+
325
+ if (mop & MO_BSWAP) {
326
+ ret = bswap16(ret);
327
+ }
328
return ret;
329
}
330
331
tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
332
MemOpIdx oi, uintptr_t ra)
333
{
334
- MemOp mop = get_memop(oi);
335
- uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
336
-
337
- if (mop & MO_BSWAP) {
338
- ret = bswap16(ret);
339
- }
340
- return ret;
341
+ return do_ld2_mmu(env, addr, get_memop(oi), ra);
342
}
343
344
tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
345
MemOpIdx oi, uintptr_t ra)
346
{
347
- MemOp mop = get_memop(oi);
348
- int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
349
+ return (int16_t)do_ld2_mmu(env, addr, get_memop(oi), ra);
350
+}
351
352
- if (mop & MO_BSWAP) {
353
- ret = bswap16(ret);
354
- }
355
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
356
+ MemOpIdx oi, uintptr_t ra)
36
+{
357
+{
37
+ CPUState *cpu = env_cpu(env);
358
+ uint16_t ret = do_ld2_mmu(env, addr, get_memop(oi), ra);
38
+ TranslationBlock *tb;
359
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
39
+ target_ulong cs_base, pc;
360
return ret;
40
+ uint32_t flags;
361
}
362
363
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
364
- MemOpIdx oi, uintptr_t ra)
365
-{
366
- MemOp mop = get_memop(oi);
367
- uint16_t ret;
368
-
369
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
370
- ret = do_ld2_he_mmu(env, addr, mop, ra);
371
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
372
- return cpu_to_be16(ret);
373
-}
374
-
375
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
376
- MemOpIdx oi, uintptr_t ra)
377
-{
378
- MemOp mop = get_memop(oi);
379
- uint16_t ret;
380
-
381
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
382
- ret = do_ld2_he_mmu(env, addr, mop, ra);
383
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
384
- return cpu_to_le16(ret);
385
-}
386
-
387
-static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
388
- MemOp mop, uintptr_t ra)
389
+static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
390
+ MemOp mop, uintptr_t ra)
391
{
392
void *haddr;
393
uint32_t ret;
394
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
395
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
396
ret = load_atom_4(env, ra, haddr, mop);
397
clear_helper_retaddr();
41
+
398
+
42
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
399
+ if (mop & MO_BSWAP) {
43
+
400
+ ret = bswap32(ret);
44
+ tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
45
+ if (tb == NULL) {
46
+ return tcg_code_gen_epilogue;
47
+ }
401
+ }
48
+ qemu_log_mask_and_addr(CPU_LOG_EXEC, pc,
402
return ret;
49
+ "Chain %d: %p ["
403
}
50
+ TARGET_FMT_lx "/" TARGET_FMT_lx "/%#x] %s\n",
404
51
+ cpu->cpu_index, tb->tc.ptr, cs_base, pc, flags,
405
tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
52
+ lookup_symbol(pc));
406
MemOpIdx oi, uintptr_t ra)
53
+ return tb->tc.ptr;
407
{
408
- MemOp mop = get_memop(oi);
409
- uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
410
-
411
- if (mop & MO_BSWAP) {
412
- ret = bswap32(ret);
413
- }
414
- return ret;
415
+ return do_ld4_mmu(env, addr, get_memop(oi), ra);
416
}
417
418
tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
419
MemOpIdx oi, uintptr_t ra)
420
{
421
- MemOp mop = get_memop(oi);
422
- int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
423
+ return (int32_t)do_ld4_mmu(env, addr, get_memop(oi), ra);
424
+}
425
426
- if (mop & MO_BSWAP) {
427
- ret = bswap32(ret);
428
- }
429
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
430
+ MemOpIdx oi, uintptr_t ra)
431
+{
432
+ uint32_t ret = do_ld4_mmu(env, addr, get_memop(oi), ra);
433
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
434
return ret;
435
}
436
437
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
438
- MemOpIdx oi, uintptr_t ra)
439
-{
440
- MemOp mop = get_memop(oi);
441
- uint32_t ret;
442
-
443
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
- ret = do_ld4_he_mmu(env, addr, mop, ra);
445
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
446
- return cpu_to_be32(ret);
447
-}
448
-
449
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
450
- MemOpIdx oi, uintptr_t ra)
451
-{
452
- MemOp mop = get_memop(oi);
453
- uint32_t ret;
454
-
455
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
456
- ret = do_ld4_he_mmu(env, addr, mop, ra);
457
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
458
- return cpu_to_le32(ret);
459
-}
460
-
461
-static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
462
- MemOp mop, uintptr_t ra)
463
+static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
464
+ MemOp mop, uintptr_t ra)
465
{
466
void *haddr;
467
uint64_t ret;
468
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
469
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
470
ret = load_atom_8(env, ra, haddr, mop);
471
clear_helper_retaddr();
472
- return ret;
473
-}
474
-
475
-uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
476
- MemOpIdx oi, uintptr_t ra)
477
-{
478
- MemOp mop = get_memop(oi);
479
- uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
480
481
if (mop & MO_BSWAP) {
482
ret = bswap64(ret);
483
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
484
return ret;
485
}
486
487
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
488
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
489
MemOpIdx oi, uintptr_t ra)
490
{
491
- MemOp mop = get_memop(oi);
492
- uint64_t ret;
493
-
494
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
495
- ret = do_ld8_he_mmu(env, addr, mop, ra);
496
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
497
- return cpu_to_be64(ret);
498
+ return do_ld8_mmu(env, addr, get_memop(oi), ra);
499
}
500
501
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
502
- MemOpIdx oi, uintptr_t ra)
503
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
504
+ MemOpIdx oi, uintptr_t ra)
505
{
506
- MemOp mop = get_memop(oi);
507
- uint64_t ret;
508
-
509
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
510
- ret = do_ld8_he_mmu(env, addr, mop, ra);
511
+ uint64_t ret = do_ld8_mmu(env, addr, get_memop(oi), ra);
512
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
513
- return cpu_to_le64(ret);
514
+ return ret;
515
}
516
517
-static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
518
- MemOp mop, uintptr_t ra)
519
+static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
520
+ MemOp mop, uintptr_t ra)
521
{
522
void *haddr;
523
Int128 ret;
524
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
525
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
526
ret = load_atom_16(env, ra, haddr, mop);
527
clear_helper_retaddr();
528
- return ret;
529
-}
530
-
531
-Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
532
- MemOpIdx oi, uintptr_t ra)
533
-{
534
- MemOp mop = get_memop(oi);
535
- Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
536
537
if (mop & MO_BSWAP) {
538
ret = bswap128(ret);
539
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
540
return ret;
541
}
542
543
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
544
+ MemOpIdx oi, uintptr_t ra)
545
+{
546
+ return do_ld16_mmu(env, addr, get_memop(oi), ra);
54
+}
547
+}
55
+
548
+
56
/* Execute a TB, and fix up the CPU state afterwards if necessary */
549
Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
57
/*
550
{
58
* Disable CFI checks.
551
return helper_ld16_mmu(env, addr, oi, GETPC());
59
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
552
}
553
554
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
555
- MemOpIdx oi, uintptr_t ra)
556
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
557
+ MemOpIdx oi, uintptr_t ra)
558
{
559
- MemOp mop = get_memop(oi);
560
- Int128 ret;
561
-
562
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
563
- ret = do_ld16_he_mmu(env, addr, mop, ra);
564
+ Int128 ret = do_ld16_mmu(env, addr, get_memop(oi), ra);
565
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
566
- if (!HOST_BIG_ENDIAN) {
567
- ret = bswap128(ret);
568
- }
569
- return ret;
570
-}
571
-
572
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
573
- MemOpIdx oi, uintptr_t ra)
574
-{
575
- MemOp mop = get_memop(oi);
576
- Int128 ret;
577
-
578
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
579
- ret = do_ld16_he_mmu(env, addr, mop, ra);
580
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
581
- if (HOST_BIG_ENDIAN) {
582
- ret = bswap128(ret);
583
- }
584
return ret;
585
}
586
587
@@ -XXX,XX +XXX,XX @@ void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
589
}
590
591
-static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
592
- MemOp mop, uintptr_t ra)
593
+static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
594
+ MemOp mop, uintptr_t ra)
595
{
596
void *haddr;
597
598
tcg_debug_assert((mop & MO_SIZE) == MO_16);
599
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
600
+
601
+ if (mop & MO_BSWAP) {
602
+ val = bswap16(val);
603
+ }
604
store_atom_2(env, ra, haddr, mop, val);
605
clear_helper_retaddr();
606
}
607
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
608
void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
609
MemOpIdx oi, uintptr_t ra)
610
{
611
- MemOp mop = get_memop(oi);
612
-
613
- if (mop & MO_BSWAP) {
614
- val = bswap16(val);
615
- }
616
- do_st2_he_mmu(env, addr, val, mop, ra);
617
+ do_st2_mmu(env, addr, val, get_memop(oi), ra);
618
}
619
620
-void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
621
+void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
622
MemOpIdx oi, uintptr_t ra)
623
{
624
- MemOp mop = get_memop(oi);
625
-
626
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
627
- do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
628
+ do_st2_mmu(env, addr, val, get_memop(oi), ra);
629
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
630
}
631
632
-void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
633
- MemOpIdx oi, uintptr_t ra)
634
-{
635
- MemOp mop = get_memop(oi);
636
-
637
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
638
- do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
639
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
640
-}
641
-
642
-static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
643
- MemOp mop, uintptr_t ra)
644
+static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
645
+ MemOp mop, uintptr_t ra)
646
{
647
void *haddr;
648
649
tcg_debug_assert((mop & MO_SIZE) == MO_32);
650
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
651
+
652
+ if (mop & MO_BSWAP) {
653
+ val = bswap32(val);
654
+ }
655
store_atom_4(env, ra, haddr, mop, val);
656
clear_helper_retaddr();
657
}
658
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
659
void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
660
MemOpIdx oi, uintptr_t ra)
661
{
662
- MemOp mop = get_memop(oi);
663
-
664
- if (mop & MO_BSWAP) {
665
- val = bswap32(val);
666
- }
667
- do_st4_he_mmu(env, addr, val, mop, ra);
668
+ do_st4_mmu(env, addr, val, get_memop(oi), ra);
669
}
670
671
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
672
- MemOpIdx oi, uintptr_t ra)
673
+void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
674
+ MemOpIdx oi, uintptr_t ra)
675
{
676
- MemOp mop = get_memop(oi);
677
-
678
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
679
- do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
680
+ do_st4_mmu(env, addr, val, get_memop(oi), ra);
681
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
682
}
683
684
-void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
685
- MemOpIdx oi, uintptr_t ra)
686
-{
687
- MemOp mop = get_memop(oi);
688
-
689
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
690
- do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
691
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
692
-}
693
-
694
-static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
695
- MemOp mop, uintptr_t ra)
696
+static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
697
+ MemOp mop, uintptr_t ra)
698
{
699
void *haddr;
700
701
tcg_debug_assert((mop & MO_SIZE) == MO_64);
702
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
703
+
704
+ if (mop & MO_BSWAP) {
705
+ val = bswap64(val);
706
+ }
707
store_atom_8(env, ra, haddr, mop, val);
708
clear_helper_retaddr();
709
}
710
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
711
void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
712
MemOpIdx oi, uintptr_t ra)
713
{
714
- MemOp mop = get_memop(oi);
715
-
716
- if (mop & MO_BSWAP) {
717
- val = bswap64(val);
718
- }
719
- do_st8_he_mmu(env, addr, val, mop, ra);
720
+ do_st8_mmu(env, addr, val, get_memop(oi), ra);
721
}
722
723
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
724
+void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
725
MemOpIdx oi, uintptr_t ra)
726
{
727
- MemOp mop = get_memop(oi);
728
-
729
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
730
- do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
731
+ do_st8_mmu(env, addr, val, get_memop(oi), ra);
732
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
733
}
734
735
-void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
736
- MemOpIdx oi, uintptr_t ra)
737
-{
738
- MemOp mop = get_memop(oi);
739
-
740
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
741
- do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
742
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
743
-}
744
-
745
-static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
746
- MemOp mop, uintptr_t ra)
747
+static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
748
+ MemOp mop, uintptr_t ra)
749
{
750
void *haddr;
751
752
tcg_debug_assert((mop & MO_SIZE) == MO_128);
753
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
754
+
755
+ if (mop & MO_BSWAP) {
756
+ val = bswap128(val);
757
+ }
758
store_atom_16(env, ra, haddr, mop, val);
759
clear_helper_retaddr();
760
}
761
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
762
void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
763
MemOpIdx oi, uintptr_t ra)
764
{
765
- MemOp mop = get_memop(oi);
766
-
767
- if (mop & MO_BSWAP) {
768
- val = bswap128(val);
769
- }
770
- do_st16_he_mmu(env, addr, val, mop, ra);
771
+ do_st16_mmu(env, addr, val, get_memop(oi), ra);
772
}
773
774
void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
775
@@ -XXX,XX +XXX,XX @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
776
helper_st16_mmu(env, addr, val, oi, GETPC());
777
}
778
779
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
780
- Int128 val, MemOpIdx oi, uintptr_t ra)
781
+void cpu_st16_mmu(CPUArchState *env, abi_ptr addr,
782
+ Int128 val, MemOpIdx oi, uintptr_t ra)
783
{
784
- MemOp mop = get_memop(oi);
785
-
786
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
787
- if (!HOST_BIG_ENDIAN) {
788
- val = bswap128(val);
789
- }
790
- do_st16_he_mmu(env, addr, val, mop, ra);
791
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
792
-}
793
-
794
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
795
- Int128 val, MemOpIdx oi, uintptr_t ra)
796
-{
797
- MemOp mop = get_memop(oi);
798
-
799
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
800
- if (HOST_BIG_ENDIAN) {
801
- val = bswap128(val);
802
- }
803
- do_st16_he_mmu(env, addr, val, mop, ra);
804
+ do_st16_mmu(env, addr, val, get_memop(oi), ra);
805
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
806
}
807
808
diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
60
index XXXXXXX..XXXXXXX 100644
809
index XXXXXXX..XXXXXXX 100644
61
--- a/accel/tcg/tcg-runtime.c
810
--- a/target/arm/tcg/m_helper.c
62
+++ b/accel/tcg/tcg-runtime.c
811
+++ b/target/arm/tcg/m_helper.c
63
@@ -XXX,XX +XXX,XX @@
812
@@ -XXX,XX +XXX,XX @@ static bool do_v7m_function_return(ARMCPU *cpu)
64
#include "disas/disas.h"
813
*/
65
#include "exec/log.h"
814
mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
66
#include "tcg/tcg.h"
815
oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
67
-#include "tb-lookup.h"
816
- newpc = cpu_ldl_le_mmu(env, frameptr, oi, 0);
68
817
- newpsr = cpu_ldl_le_mmu(env, frameptr + 4, oi, 0);
69
/* 32-bit helpers */
818
+ newpc = cpu_ldl_mmu(env, frameptr, oi, 0);
70
819
+ newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0);
71
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
820
72
return ctpop64(arg);
821
/* Consistency checks on new IPSR */
73
}
822
newpsr_exc = newpsr & XPSR_EXCP;
74
823
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
75
-const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
824
index XXXXXXX..XXXXXXX 100644
76
-{
825
--- a/target/sparc/ldst_helper.c
77
- CPUState *cpu = env_cpu(env);
826
+++ b/target/sparc/ldst_helper.c
78
- TranslationBlock *tb;
827
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
79
- target_ulong cs_base, pc;
828
ret = cpu_ldb_mmu(env, addr, oi, GETPC());
80
- uint32_t flags;
829
break;
81
-
830
case 2:
82
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
831
- if (asi & 8) {
83
-
832
- ret = cpu_ldw_le_mmu(env, addr, oi, GETPC());
84
- tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
833
- } else {
85
- if (tb == NULL) {
834
- ret = cpu_ldw_be_mmu(env, addr, oi, GETPC());
86
- return tcg_code_gen_epilogue;
835
- }
87
- }
836
+ ret = cpu_ldw_mmu(env, addr, oi, GETPC());
88
- qemu_log_mask_and_addr(CPU_LOG_EXEC, pc,
837
break;
89
- "Chain %d: %p ["
838
case 4:
90
- TARGET_FMT_lx "/" TARGET_FMT_lx "/%#x] %s\n",
839
- if (asi & 8) {
91
- cpu->cpu_index, tb->tc.ptr, cs_base, pc, flags,
840
- ret = cpu_ldl_le_mmu(env, addr, oi, GETPC());
92
- lookup_symbol(pc));
841
- } else {
93
- return tb->tc.ptr;
842
- ret = cpu_ldl_be_mmu(env, addr, oi, GETPC());
94
-}
843
- }
95
-
844
+ ret = cpu_ldl_mmu(env, addr, oi, GETPC());
96
void HELPER(exit_atomic)(CPUArchState *env)
845
break;
97
{
846
case 8:
98
cpu_loop_exit_atomic(env_cpu(env), GETPC());
847
- if (asi & 8) {
848
- ret = cpu_ldq_le_mmu(env, addr, oi, GETPC());
849
- } else {
850
- ret = cpu_ldq_be_mmu(env, addr, oi, GETPC());
851
- }
852
+ ret = cpu_ldq_mmu(env, addr, oi, GETPC());
853
break;
854
default:
855
g_assert_not_reached();
856
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
857
index XXXXXXX..XXXXXXX 100644
858
--- a/accel/tcg/ldst_common.c.inc
859
+++ b/accel/tcg/ldst_common.c.inc
860
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
861
int mmu_idx, uintptr_t ra)
862
{
863
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
864
- return cpu_ldw_be_mmu(env, addr, oi, ra);
865
+ return cpu_ldw_mmu(env, addr, oi, ra);
866
}
867
868
int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
869
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
870
int mmu_idx, uintptr_t ra)
871
{
872
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
873
- return cpu_ldl_be_mmu(env, addr, oi, ra);
874
+ return cpu_ldl_mmu(env, addr, oi, ra);
875
}
876
877
uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
878
int mmu_idx, uintptr_t ra)
879
{
880
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
881
- return cpu_ldq_be_mmu(env, addr, oi, ra);
882
+ return cpu_ldq_mmu(env, addr, oi, ra);
883
}
884
885
uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
886
int mmu_idx, uintptr_t ra)
887
{
888
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
889
- return cpu_ldw_le_mmu(env, addr, oi, ra);
890
+ return cpu_ldw_mmu(env, addr, oi, ra);
891
}
892
893
int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
894
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
895
int mmu_idx, uintptr_t ra)
896
{
897
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
898
- return cpu_ldl_le_mmu(env, addr, oi, ra);
899
+ return cpu_ldl_mmu(env, addr, oi, ra);
900
}
901
902
uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
903
int mmu_idx, uintptr_t ra)
904
{
905
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
906
- return cpu_ldq_le_mmu(env, addr, oi, ra);
907
+ return cpu_ldq_mmu(env, addr, oi, ra);
908
}
909
910
void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
911
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
912
int mmu_idx, uintptr_t ra)
913
{
914
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
915
- cpu_stw_be_mmu(env, addr, val, oi, ra);
916
+ cpu_stw_mmu(env, addr, val, oi, ra);
917
}
918
919
void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
920
int mmu_idx, uintptr_t ra)
921
{
922
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
923
- cpu_stl_be_mmu(env, addr, val, oi, ra);
924
+ cpu_stl_mmu(env, addr, val, oi, ra);
925
}
926
927
void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
928
int mmu_idx, uintptr_t ra)
929
{
930
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
931
- cpu_stq_be_mmu(env, addr, val, oi, ra);
932
+ cpu_stq_mmu(env, addr, val, oi, ra);
933
}
934
935
void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
936
int mmu_idx, uintptr_t ra)
937
{
938
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
939
- cpu_stw_le_mmu(env, addr, val, oi, ra);
940
+ cpu_stw_mmu(env, addr, val, oi, ra);
941
}
942
943
void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
944
int mmu_idx, uintptr_t ra)
945
{
946
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
947
- cpu_stl_le_mmu(env, addr, val, oi, ra);
948
+ cpu_stl_mmu(env, addr, val, oi, ra);
949
}
950
951
void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
952
int mmu_idx, uintptr_t ra)
953
{
954
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
955
- cpu_stq_le_mmu(env, addr, val, oi, ra);
956
+ cpu_stq_mmu(env, addr, val, oi, ra);
957
}
958
959
/*--------------------------*/
99
--
960
--
100
2.25.1
961
2.34.1
101
962
102
963
diff view generated by jsdifflib
1
We have not needed to end a TB for I/O since ba3e7926691
1
Use cpu_ld16_mmu and cpu_st16_mmu to eliminate the special case,
2
("icount: clean up cpu_can_io at the entry to the block").
2
and change all of the *_data_ra functions to match.
3
3
4
In use_goto_tb, the check for singlestep_enabled is in the
4
Note that we check the alignment of both compare and store
5
generic translator_use_goto_tb. In s390x_tr_tb_stop, the
5
pointers at the top of the function, so MO_ALIGN* may be
6
check for singlestep_enabled is in the preceding do_debug test.
6
safely removed from the individual memory operations.
7
7
8
Which leaves only FLAG_MASK_PER: fold that test alone into
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
the two callers of use_exit tb.
10
11
Reviewed-by: David Hildenbrand <david@redhat.com>
9
Reviewed-by: David Hildenbrand <david@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
11
---
14
target/s390x/translate.c | 11 ++---------
12
target/s390x/tcg/mem_helper.c | 66 ++++++++++++++---------------------
15
1 file changed, 2 insertions(+), 9 deletions(-)
13
1 file changed, 27 insertions(+), 39 deletions(-)
16
14
17
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
15
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/target/s390x/translate.c
17
--- a/target/s390x/tcg/mem_helper.c
20
+++ b/target/s390x/translate.c
18
+++ b/target/s390x/tcg/mem_helper.c
21
@@ -XXX,XX +XXX,XX @@ static void gen_op_calc_cc(DisasContext *s)
19
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
22
set_cc_static(s);
20
uint64_t a2, bool parallel)
23
}
21
{
24
22
uint32_t mem_idx = cpu_mmu_index(env, false);
25
-static bool use_exit_tb(DisasContext *s)
23
+ MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
26
-{
24
+ MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
27
- return s->base.singlestep_enabled ||
25
+ MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
28
- (tb_cflags(s->base.tb) & CF_LAST_IO) ||
26
+ MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
29
- (s->base.tb->flags & FLAG_MASK_PER);
27
+ MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
30
-}
28
uintptr_t ra = GETPC();
29
uint32_t fc = extract32(env->regs[0], 0, 8);
30
uint32_t sc = extract32(env->regs[0], 8, 8);
31
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
32
}
33
}
34
35
- /* All loads happen before all stores. For simplicity, load the entire
36
- store value area from the parameter list. */
37
- svh = cpu_ldq_data_ra(env, pl + 16, ra);
38
- svl = cpu_ldq_data_ra(env, pl + 24, ra);
39
+ /*
40
+ * All loads happen before all stores. For simplicity, load the entire
41
+ * store value area from the parameter list.
42
+ */
43
+ svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
44
+ svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
45
46
switch (fc) {
47
case 0:
48
{
49
- uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
50
+ uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
51
uint32_t cv = env->regs[r3];
52
uint32_t ov;
53
54
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
55
ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
56
#endif
57
} else {
58
- ov = cpu_ldl_data_ra(env, a1, ra);
59
- cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
60
+ ov = cpu_ldl_mmu(env, a1, oi4, ra);
61
+ cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
62
}
63
cc = (ov != cv);
64
env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
65
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
66
67
case 1:
68
{
69
- uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
70
+ uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
71
uint64_t cv = env->regs[r3];
72
uint64_t ov;
73
74
if (parallel) {
75
#ifdef CONFIG_ATOMIC64
76
- MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
77
- ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
78
+ ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
79
#else
80
/* Note that we asserted !parallel above. */
81
g_assert_not_reached();
82
#endif
83
} else {
84
- ov = cpu_ldq_data_ra(env, a1, ra);
85
- cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
86
+ ov = cpu_ldq_mmu(env, a1, oi8, ra);
87
+ cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
88
}
89
cc = (ov != cv);
90
env->regs[r3] = ov;
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
92
93
case 2:
94
{
95
- uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
96
- uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
97
- Int128 nv = int128_make128(nvl, nvh);
98
+ Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
99
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
100
Int128 ov;
101
102
if (!parallel) {
103
- uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
104
- uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
31
-
105
-
32
static bool use_goto_tb(DisasContext *s, uint64_t dest)
106
- ov = int128_make128(ol, oh);
33
{
107
+ ov = cpu_ld16_mmu(env, a1, oi16, ra);
34
- if (unlikely(use_exit_tb(s))) {
108
cc = !int128_eq(ov, cv);
35
+ if (unlikely(s->base.tb->flags & FLAG_MASK_PER)) {
109
if (cc) {
36
return false;
110
nv = ov;
37
}
111
}
38
return translator_use_goto_tb(&s->base, dest);
112
-
39
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
113
- cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
40
/* Exit the TB, either by raising a debug exception or by return. */
114
- cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
41
if (dc->do_debug) {
115
+ cpu_st16_mmu(env, a1, nv, oi16, ra);
42
gen_exception(EXCP_DEBUG);
116
} else if (HAVE_CMPXCHG128) {
43
- } else if (use_exit_tb(dc) ||
117
- MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
44
+ } else if ((dc->base.tb->flags & FLAG_MASK_PER) ||
118
- ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
45
dc->base.is_jmp == DISAS_PC_STALE_NOCHAIN) {
119
+ ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
46
tcg_gen_exit_tb(NULL, 0);
120
cc = !int128_eq(ov, cv);
47
} else {
121
} else {
122
/* Note that we asserted !parallel above. */
123
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
124
if (cc == 0) {
125
switch (sc) {
126
case 0:
127
- cpu_stb_data_ra(env, a2, svh >> 56, ra);
128
+ cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
129
break;
130
case 1:
131
- cpu_stw_data_ra(env, a2, svh >> 48, ra);
132
+ cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
133
break;
134
case 2:
135
- cpu_stl_data_ra(env, a2, svh >> 32, ra);
136
+ cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
137
break;
138
case 3:
139
- cpu_stq_data_ra(env, a2, svh, ra);
140
+ cpu_stq_mmu(env, a2, svh, oi8, ra);
141
break;
142
case 4:
143
- if (!parallel) {
144
- cpu_stq_data_ra(env, a2 + 0, svh, ra);
145
- cpu_stq_data_ra(env, a2 + 8, svl, ra);
146
- } else if (HAVE_ATOMIC128) {
147
- MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
148
- Int128 sv = int128_make128(svl, svh);
149
- cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
150
- } else {
151
- /* Note that we asserted !parallel above. */
152
- g_assert_not_reached();
153
- }
154
+ cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
155
break;
156
default:
157
g_assert_not_reached();
48
--
158
--
49
2.25.1
159
2.34.1
50
160
51
161
diff view generated by jsdifflib
1
Eliminate the CONFIG_USER_ONLY specialization.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Reviewed-by: David Hildenbrand <david@redhat.com>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
target/s390x/translate.c | 7 +------
7
target/s390x/tcg/mem_helper.c | 8 +-------
5
1 file changed, 1 insertion(+), 6 deletions(-)
8
1 file changed, 1 insertion(+), 7 deletions(-)
6
9
7
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
10
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
8
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
9
--- a/target/s390x/translate.c
12
--- a/target/s390x/tcg/mem_helper.c
10
+++ b/target/s390x/translate.c
13
+++ b/target/s390x/tcg/mem_helper.c
11
@@ -XXX,XX +XXX,XX @@ static bool use_goto_tb(DisasContext *s, uint64_t dest)
14
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
12
if (unlikely(use_exit_tb(s))) {
15
uint32_t ov;
13
return false;
16
14
}
17
if (parallel) {
15
-#ifndef CONFIG_USER_ONLY
18
-#ifdef CONFIG_USER_ONLY
16
- return (dest & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
19
- uint32_t *haddr = g2h(env_cpu(env), a1);
17
- (dest & TARGET_PAGE_MASK) == (s->base.pc_next & TARGET_PAGE_MASK);
20
- ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
18
-#else
21
-#else
19
- return true;
22
- MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
23
- ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
20
-#endif
24
-#endif
21
+ return translator_use_goto_tb(&s->base, dest);
25
+ ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
22
}
26
} else {
23
27
ov = cpu_ldl_mmu(env, a1, oi4, ra);
24
static void account_noninline_branch(DisasContext *s, int cc_op)
28
cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
25
--
29
--
26
2.25.1
30
2.34.1
27
31
28
32
diff view generated by jsdifflib
1
Just use translator_use_goto_tb directly at the one call site,
1
Atomic load/store of 128-byte quantities is now handled
2
rather than maintaining a local wrapper.
2
by cpu_{ld,st}16_mmu.
3
3
4
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
target/tricore/translate.c | 17 ++---------------
7
accel/tcg/atomic_template.h | 61 +++--------------------------------
8
1 file changed, 2 insertions(+), 15 deletions(-)
8
include/exec/cpu_ldst.h | 9 ------
9
accel/tcg/atomic_common.c.inc | 14 --------
10
3 files changed, 4 insertions(+), 80 deletions(-)
9
11
10
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
12
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/target/tricore/translate.c
14
--- a/accel/tcg/atomic_template.h
13
+++ b/target/tricore/translate.c
15
+++ b/accel/tcg/atomic_template.h
14
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_pc(target_ulong pc)
16
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
15
tcg_gen_movi_tl(cpu_PC, pc);
17
return ret;
16
}
18
}
17
19
18
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
20
-#if DATA_SIZE >= 16
21
-#if HAVE_ATOMIC128
22
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
23
- MemOpIdx oi, uintptr_t retaddr)
19
-{
24
-{
20
- if (unlikely(ctx->base.singlestep_enabled)) {
25
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
21
- return false;
26
- PAGE_READ, retaddr);
22
- }
27
- DATA_TYPE val;
23
-
28
-
24
-#ifndef CONFIG_USER_ONLY
29
- val = atomic16_read(haddr);
25
- return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
30
- ATOMIC_MMU_CLEANUP;
26
-#else
31
- atomic_trace_ld_post(env, addr, oi);
27
- return true;
32
- return val;
28
-#endif
29
-}
33
-}
30
-
34
-
31
static void generate_qemu_excp(DisasContext *ctx, int excp)
35
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
36
- MemOpIdx oi, uintptr_t retaddr)
37
-{
38
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
39
- PAGE_WRITE, retaddr);
40
-
41
- atomic16_set(haddr, val);
42
- ATOMIC_MMU_CLEANUP;
43
- atomic_trace_st_post(env, addr, oi);
44
-}
45
-#endif
46
-#else
47
+#if DATA_SIZE < 16
48
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
49
MemOpIdx oi, uintptr_t retaddr)
32
{
50
{
33
TCGv_i32 tmp = tcg_const_i32(excp);
51
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
34
@@ -XXX,XX +XXX,XX @@ static void generate_qemu_excp(DisasContext *ctx, int excp)
52
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
35
tcg_temp_free(tmp);
53
54
#undef GEN_ATOMIC_HELPER_FN
55
-#endif /* DATA SIZE >= 16 */
56
+#endif /* DATA SIZE < 16 */
57
58
#undef END
59
60
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
61
return BSWAP(ret);
36
}
62
}
37
63
38
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
64
-#if DATA_SIZE >= 16
39
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
65
-#if HAVE_ATOMIC128
66
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
67
- MemOpIdx oi, uintptr_t retaddr)
68
-{
69
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
70
- PAGE_READ, retaddr);
71
- DATA_TYPE val;
72
-
73
- val = atomic16_read(haddr);
74
- ATOMIC_MMU_CLEANUP;
75
- atomic_trace_ld_post(env, addr, oi);
76
- return BSWAP(val);
77
-}
78
-
79
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
80
- MemOpIdx oi, uintptr_t retaddr)
81
-{
82
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
83
- PAGE_WRITE, retaddr);
84
-
85
- val = BSWAP(val);
86
- atomic16_set(haddr, val);
87
- ATOMIC_MMU_CLEANUP;
88
- atomic_trace_st_post(env, addr, oi);
89
-}
90
-#endif
91
-#else
92
+#if DATA_SIZE < 16
93
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
94
MemOpIdx oi, uintptr_t retaddr)
40
{
95
{
41
- if (use_goto_tb(ctx, dest)) {
96
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
42
+ if (translator_use_goto_tb(&ctx->base, dest)) {
97
#undef ADD
43
tcg_gen_goto_tb(n);
98
44
gen_save_pc(dest);
99
#undef GEN_ATOMIC_HELPER_FN
45
tcg_gen_exit_tb(ctx->base.tb, n);
100
-#endif /* DATA_SIZE >= 16 */
101
+#endif /* DATA_SIZE < 16 */
102
103
#undef END
104
#endif /* DATA_SIZE > 1 */
105
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/exec/cpu_ldst.h
108
+++ b/include/exec/cpu_ldst.h
109
@@ -XXX,XX +XXX,XX @@ Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
110
Int128 cmpv, Int128 newv,
111
MemOpIdx oi, uintptr_t retaddr);
112
113
-Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
114
- MemOpIdx oi, uintptr_t retaddr);
115
-Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
116
- MemOpIdx oi, uintptr_t retaddr);
117
-void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
118
- MemOpIdx oi, uintptr_t retaddr);
119
-void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
120
- MemOpIdx oi, uintptr_t retaddr);
121
-
122
#if defined(CONFIG_USER_ONLY)
123
124
extern __thread uintptr_t helper_retaddr;
125
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
126
index XXXXXXX..XXXXXXX 100644
127
--- a/accel/tcg/atomic_common.c.inc
128
+++ b/accel/tcg/atomic_common.c.inc
129
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
130
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
131
}
132
133
-#if HAVE_ATOMIC128
134
-static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
135
- MemOpIdx oi)
136
-{
137
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
138
-}
139
-
140
-static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
141
- MemOpIdx oi)
142
-{
143
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
144
-}
145
-#endif
146
-
147
/*
148
* Atomic helpers callable from TCG.
149
* These have a common interface and all defer to cpu_atomic_*
46
--
150
--
47
2.25.1
151
2.34.1
48
152
49
153
diff view generated by jsdifflib
1
From: Liren Wei <lrwei@bupt.edu.cn>
1
Now that load/store are gone, we're always passing
2
2
PAGE_READ | PAGE_WRITE for RMW atomic operations.
3
TranslationBlocks not inserted into the corresponding region
3
4
tree shall be regarded as partially initialized objects, and
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
needs to be finalized first before inserting into QHT.
6
7
Signed-off-by: Liren Wei <lrwei@bupt.edu.cn>
8
Message-Id: <f9fc263f71e11b6308d8c1fbc0dd366bf4aeb532.1625404483.git.lrwei@bupt.edu.cn>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
accel/tcg/translate-all.c | 9 ++++++++-
7
accel/tcg/atomic_template.h | 32 ++++++--------
12
1 file changed, 8 insertions(+), 1 deletion(-)
8
accel/tcg/cputlb.c | 85 ++++++++++++++-----------------------
13
9
accel/tcg/user-exec.c | 8 +---
14
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
10
3 files changed, 45 insertions(+), 80 deletions(-)
11
12
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/translate-all.c
14
--- a/accel/tcg/atomic_template.h
17
+++ b/accel/tcg/translate-all.c
15
+++ b/accel/tcg/atomic_template.h
18
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
16
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
19
return tb;
17
ABI_TYPE cmpv, ABI_TYPE newv,
20
}
18
MemOpIdx oi, uintptr_t retaddr)
19
{
20
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
21
- PAGE_READ | PAGE_WRITE, retaddr);
22
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
23
DATA_TYPE ret;
24
25
#if DATA_SIZE == 16
26
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
27
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
28
MemOpIdx oi, uintptr_t retaddr)
29
{
30
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
31
- PAGE_READ | PAGE_WRITE, retaddr);
32
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
33
DATA_TYPE ret;
34
35
ret = qatomic_xchg__nocheck(haddr, val);
36
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
37
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
38
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
39
{ \
40
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
41
- PAGE_READ | PAGE_WRITE, retaddr); \
42
- DATA_TYPE ret; \
43
+ DATA_TYPE *haddr, ret; \
44
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
45
ret = qatomic_##X(haddr, val); \
46
ATOMIC_MMU_CLEANUP; \
47
atomic_trace_rmw_post(env, addr, oi); \
48
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
49
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
50
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
51
{ \
52
- XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
53
- PAGE_READ | PAGE_WRITE, retaddr); \
54
- XDATA_TYPE cmp, old, new, val = xval; \
55
+ XDATA_TYPE *haddr, cmp, old, new, val = xval; \
56
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
57
smp_mb(); \
58
cmp = qatomic_read__nocheck(haddr); \
59
do { \
60
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
61
ABI_TYPE cmpv, ABI_TYPE newv,
62
MemOpIdx oi, uintptr_t retaddr)
63
{
64
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
65
- PAGE_READ | PAGE_WRITE, retaddr);
66
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
67
DATA_TYPE ret;
68
69
#if DATA_SIZE == 16
70
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
71
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
72
MemOpIdx oi, uintptr_t retaddr)
73
{
74
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
75
- PAGE_READ | PAGE_WRITE, retaddr);
76
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
77
ABI_TYPE ret;
78
79
ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
80
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
81
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
82
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
83
{ \
84
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
85
- PAGE_READ | PAGE_WRITE, retaddr); \
86
- DATA_TYPE ret; \
87
+ DATA_TYPE *haddr, ret; \
88
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
89
ret = qatomic_##X(haddr, BSWAP(val)); \
90
ATOMIC_MMU_CLEANUP; \
91
atomic_trace_rmw_post(env, addr, oi); \
92
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
93
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
94
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
95
{ \
96
- XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
97
- PAGE_READ | PAGE_WRITE, retaddr); \
98
- XDATA_TYPE ldo, ldn, old, new, val = xval; \
99
+ XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval; \
100
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
101
smp_mb(); \
102
ldn = qatomic_read__nocheck(haddr); \
103
do { \
104
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/accel/tcg/cputlb.c
107
+++ b/accel/tcg/cputlb.c
108
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
109
/*
110
* Probe for an atomic operation. Do not allow unaligned operations,
111
* or io operations to proceed. Return the host address.
112
- *
113
- * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
114
*/
115
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
116
- MemOpIdx oi, int size, int prot,
117
- uintptr_t retaddr)
118
+ MemOpIdx oi, int size, uintptr_t retaddr)
119
{
120
uintptr_t mmu_idx = get_mmuidx(oi);
121
MemOp mop = get_memop(oi);
122
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
123
tlbe = tlb_entry(env, mmu_idx, addr);
124
125
/* Check TLB entry and enforce page permissions. */
126
- if (prot & PAGE_WRITE) {
127
- tlb_addr = tlb_addr_write(tlbe);
128
- if (!tlb_hit(tlb_addr, addr)) {
129
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
130
- addr & TARGET_PAGE_MASK)) {
131
- tlb_fill(env_cpu(env), addr, size,
132
- MMU_DATA_STORE, mmu_idx, retaddr);
133
- index = tlb_index(env, mmu_idx, addr);
134
- tlbe = tlb_entry(env, mmu_idx, addr);
135
- }
136
- tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
137
- }
138
-
139
- if (prot & PAGE_READ) {
140
- /*
141
- * Let the guest notice RMW on a write-only page.
142
- * We have just verified that the page is writable.
143
- * Subpage lookups may have left TLB_INVALID_MASK set,
144
- * but addr_read will only be -1 if PAGE_READ was unset.
145
- */
146
- if (unlikely(tlbe->addr_read == -1)) {
147
- tlb_fill(env_cpu(env), addr, size,
148
- MMU_DATA_LOAD, mmu_idx, retaddr);
149
- /*
150
- * Since we don't support reads and writes to different
151
- * addresses, and we do have the proper page loaded for
152
- * write, this shouldn't ever return. But just in case,
153
- * handle via stop-the-world.
154
- */
155
- goto stop_the_world;
156
- }
157
- /* Collect TLB_WATCHPOINT for read. */
158
- tlb_addr |= tlbe->addr_read;
159
- }
160
- } else /* if (prot & PAGE_READ) */ {
161
- tlb_addr = tlbe->addr_read;
162
- if (!tlb_hit(tlb_addr, addr)) {
163
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
164
- addr & TARGET_PAGE_MASK)) {
165
- tlb_fill(env_cpu(env), addr, size,
166
- MMU_DATA_LOAD, mmu_idx, retaddr);
167
- index = tlb_index(env, mmu_idx, addr);
168
- tlbe = tlb_entry(env, mmu_idx, addr);
169
- }
170
- tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
171
+ tlb_addr = tlb_addr_write(tlbe);
172
+ if (!tlb_hit(tlb_addr, addr)) {
173
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
174
+ addr & TARGET_PAGE_MASK)) {
175
+ tlb_fill(env_cpu(env), addr, size,
176
+ MMU_DATA_STORE, mmu_idx, retaddr);
177
+ index = tlb_index(env, mmu_idx, addr);
178
+ tlbe = tlb_entry(env, mmu_idx, addr);
179
}
180
+ tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
181
}
21
182
22
+ /*
183
+ /*
23
+ * Insert TB into the corresponding region tree before publishing it
184
+ * Let the guest notice RMW on a write-only page.
24
+ * through QHT. Otherwise rewinding happened in the TB might fail to
185
+ * We have just verified that the page is writable.
25
+ * lookup itself using host PC.
186
+ * Subpage lookups may have left TLB_INVALID_MASK set,
187
+ * but addr_read will only be -1 if PAGE_READ was unset.
26
+ */
188
+ */
27
+ tcg_tb_insert(tb);
189
+ if (unlikely(tlbe->addr_read == -1)) {
190
+ tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
191
+ /*
192
+ * Since we don't support reads and writes to different
193
+ * addresses, and we do have the proper page loaded for
194
+ * write, this shouldn't ever return. But just in case,
195
+ * handle via stop-the-world.
196
+ */
197
+ goto stop_the_world;
198
+ }
199
+ /* Collect TLB_WATCHPOINT for read. */
200
+ tlb_addr |= tlbe->addr_read;
28
+
201
+
29
/* check next page if needed */
202
/* Notice an IO access or a needs-MMU-lookup access */
30
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
203
if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
31
phys_page2 = -1;
204
/* There's really nothing that can be done to
32
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
205
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
33
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
206
}
34
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
207
35
tb_destroy(tb);
208
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
36
+ tcg_tb_remove(tb);
209
- QEMU_BUILD_BUG_ON(PAGE_READ != BP_MEM_READ);
37
return existing_tb;
210
- QEMU_BUILD_BUG_ON(PAGE_WRITE != BP_MEM_WRITE);
38
}
211
- /* therefore prot == watchpoint bits */
39
- tcg_tb_insert(tb);
212
- cpu_check_watchpoint(env_cpu(env), addr, size,
40
return tb;
213
- full->attrs, prot, retaddr);
41
}
214
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs,
42
215
+ BP_MEM_READ | BP_MEM_WRITE, retaddr);
216
}
217
218
return hostaddr;
219
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/user-exec.c
222
+++ b/accel/tcg/user-exec.c
223
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
224
225
/*
226
* Do not allow unaligned operations to proceed. Return the host address.
227
- *
228
- * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
229
*/
230
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
231
- MemOpIdx oi, int size, int prot,
232
- uintptr_t retaddr)
233
+ MemOpIdx oi, int size, uintptr_t retaddr)
234
{
235
MemOp mop = get_memop(oi);
236
int a_bits = get_alignment_bits(mop);
237
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
238
239
/* Enforce guest required alignment. */
240
if (unlikely(addr & ((1 << a_bits) - 1))) {
241
- MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE;
242
- cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
243
+ cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, retaddr);
244
}
245
246
/* Enforce qemu required alignment. */
43
--
247
--
44
2.25.1
248
2.34.1
45
249
46
250
diff view generated by jsdifflib
1
Using gen_goto_tb directly misses the single-step check.
1
These symbols will shortly become dynamic runtime tests and
2
Let the branch or debug exception be emitted by arm_tr_tb_stop.
2
therefore not appropriate for the preprocessor. Use the
3
matching CONFIG_* symbols for that purpose.
3
4
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
target/arm/translate.c | 4 ++--
8
host/include/aarch64/host/atomic128-cas.h | 2 ++
8
1 file changed, 2 insertions(+), 2 deletions(-)
9
host/include/generic/host/atomic128-ldst.h | 2 +-
10
accel/tcg/cputlb.c | 2 +-
11
accel/tcg/user-exec.c | 2 +-
12
4 files changed, 5 insertions(+), 3 deletions(-)
9
13
10
diff --git a/target/arm/translate.c b/target/arm/translate.c
14
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
11
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate.c
16
--- a/host/include/aarch64/host/atomic128-cas.h
13
+++ b/target/arm/translate.c
17
+++ b/host/include/aarch64/host/atomic128-cas.h
14
@@ -XXX,XX +XXX,XX @@ static bool trans_ISB(DisasContext *s, arg_ISB *a)
18
@@ -XXX,XX +XXX,XX @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
15
* self-modifying code correctly and also to take
19
16
* any pending interrupts immediately.
20
return int128_make128(oldl, oldh);
17
*/
18
- gen_goto_tb(s, 0, s->base.pc_next);
19
+ s->base.is_jmp = DISAS_TOO_MANY;
20
return true;
21
}
21
}
22
22
+
23
@@ -XXX,XX +XXX,XX @@ static bool trans_SB(DisasContext *s, arg_SB *a)
23
+# define CONFIG_CMPXCHG128 1
24
* for TCG; MB and end the TB instead.
24
# define HAVE_CMPXCHG128 1
25
*/
25
#endif
26
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
26
27
- gen_goto_tb(s, 0, s->base.pc_next);
27
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
28
+ s->base.is_jmp = DISAS_TOO_MANY;
28
index XXXXXXX..XXXXXXX 100644
29
return true;
29
--- a/host/include/generic/host/atomic128-ldst.h
30
+++ b/host/include/generic/host/atomic128-ldst.h
31
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
30
}
32
}
31
33
34
# define HAVE_ATOMIC128 1
35
-#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
36
+#elif defined(CONFIG_CMPXCHG128) && !defined(CONFIG_USER_ONLY)
37
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
38
atomic16_read(Int128 *ptr)
39
{
40
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/accel/tcg/cputlb.c
43
+++ b/accel/tcg/cputlb.c
44
@@ -XXX,XX +XXX,XX @@ void cpu_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
45
#include "atomic_template.h"
46
#endif
47
48
-#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
49
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
50
#define DATA_SIZE 16
51
#include "atomic_template.h"
52
#endif
53
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/accel/tcg/user-exec.c
56
+++ b/accel/tcg/user-exec.c
57
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
58
#include "atomic_template.h"
59
#endif
60
61
-#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
62
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
63
#define DATA_SIZE 16
64
#include "atomic_template.h"
65
#endif
32
--
66
--
33
2.25.1
67
2.34.1
34
68
35
69
diff view generated by jsdifflib
1
We have not needed to end a TB for I/O since ba3e7926691
1
Create both atomic16_read_ro and atomic16_read_rw.
2
("icount: clean up cpu_can_io at the entry to the block"),
2
Previously we pretended that we had atomic16_read in system mode,
3
and gdbstub singlestep is handled by the generic function.
3
because we "know" that all ram is always writable to the host.
4
Now, expose read-only and read-write versions all of the time.
4
5
5
Drop the unused 'n' argument to use_goto_tb.
6
For aarch64, do not fall back to __atomic_read_16 even if
7
supported by the compiler, to work around a clang bug.
6
8
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
11
---
10
target/arm/translate-a64.c | 25 +++++--------------------
12
host/include/aarch64/host/atomic128-ldst.h | 21 ++++++++-------
11
1 file changed, 5 insertions(+), 20 deletions(-)
13
host/include/generic/host/atomic128-ldst.h | 31 ++++++++++++++++------
14
target/s390x/tcg/mem_helper.c | 2 +-
15
3 files changed, 36 insertions(+), 18 deletions(-)
12
16
13
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-a64.c
19
--- a/host/include/aarch64/host/atomic128-ldst.h
16
+++ b/target/arm/translate-a64.c
20
+++ b/host/include/aarch64/host/atomic128-ldst.h
17
@@ -XXX,XX +XXX,XX @@ static void gen_step_complete_exception(DisasContext *s)
21
@@ -XXX,XX +XXX,XX @@
18
s->base.is_jmp = DISAS_NORETURN;
22
#ifndef AARCH64_ATOMIC128_LDST_H
23
#define AARCH64_ATOMIC128_LDST_H
24
25
-/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
26
-#if !defined(CONFIG_ATOMIC128) && !defined(CONFIG_USER_ONLY)
27
-/* We can do better than cmpxchg for AArch64. */
28
-static inline Int128 atomic16_read(Int128 *ptr)
29
+/*
30
+ * Through gcc 10, aarch64 has no support for 128-bit atomics.
31
+ * Through clang 16, without -march=armv8.4-a, __atomic_load_16
32
+ * is incorrectly expanded to a read-write operation.
33
+ */
34
+
35
+#define HAVE_ATOMIC128_RO 0
36
+#define HAVE_ATOMIC128_RW 1
37
+
38
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
39
+
40
+static inline Int128 atomic16_read_rw(Int128 *ptr)
41
{
42
uint64_t l, h;
43
uint32_t tmp;
44
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
45
: [l] "r"(l), [h] "r"(h));
19
}
46
}
20
47
21
-static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
48
-# define HAVE_ATOMIC128 1
22
+static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
49
-#else
23
{
50
-#include "host/include/generic/host/atomic128-ldst.h"
24
- /* No direct tb linking with singlestep (either QEMU's or the ARM
25
- * debug architecture kind) or deterministic io
26
- */
27
- if (s->base.singlestep_enabled || s->ss_active ||
28
- (tb_cflags(s->base.tb) & CF_LAST_IO)) {
29
+ if (s->ss_active) {
30
return false;
31
}
32
-
33
-#ifndef CONFIG_USER_ONLY
34
- /* Only link tbs from inside the same guest page */
35
- if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
36
- return false;
37
- }
38
-#endif
51
-#endif
39
-
52
-
40
- return true;
53
#endif /* AARCH64_ATOMIC128_LDST_H */
41
+ return translator_use_goto_tb(&s->base, dest);
54
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/host/include/generic/host/atomic128-ldst.h
57
+++ b/host/include/generic/host/atomic128-ldst.h
58
@@ -XXX,XX +XXX,XX @@
59
#define HOST_ATOMIC128_LDST_H
60
61
#if defined(CONFIG_ATOMIC128)
62
+# define HAVE_ATOMIC128_RO 1
63
+# define HAVE_ATOMIC128_RW 1
64
+
65
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
66
-atomic16_read(Int128 *ptr)
67
+atomic16_read_ro(const Int128 *ptr)
68
{
69
- __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
70
+ const __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
71
Int128Alias r;
72
73
r.i = qatomic_read__nocheck(ptr_align);
74
return r.s;
42
}
75
}
43
76
44
static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
77
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
78
+atomic16_read_rw(Int128 *ptr)
79
+{
80
+ return atomic16_read_ro(ptr);
81
+}
82
+
83
static inline void ATTRIBUTE_ATOMIC128_OPT
84
atomic16_set(Int128 *ptr, Int128 val)
45
{
85
{
46
- const TranslationBlock *tb;
86
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
47
-
87
qatomic_set__nocheck(ptr_align, v.i);
48
- tb = s->base.tb;
88
}
49
- if (use_goto_tb(s, n, dest)) {
89
50
+ if (use_goto_tb(s, dest)) {
90
-# define HAVE_ATOMIC128 1
51
tcg_gen_goto_tb(n);
91
-#elif defined(CONFIG_CMPXCHG128) && !defined(CONFIG_USER_ONLY)
52
gen_a64_set_pc_im(dest);
92
+#elif defined(CONFIG_CMPXCHG128)
53
- tcg_gen_exit_tb(tb, n);
93
+# define HAVE_ATOMIC128_RO 0
54
+ tcg_gen_exit_tb(s->base.tb, n);
94
+# define HAVE_ATOMIC128_RW 1
55
s->base.is_jmp = DISAS_NORETURN;
95
+
56
} else {
96
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
57
gen_a64_set_pc_im(dest);
97
+
98
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
99
-atomic16_read(Int128 *ptr)
100
+atomic16_read_rw(Int128 *ptr)
101
{
102
/* Maybe replace 0 with 0, returning the old value. */
103
Int128 z = int128_make64(0);
104
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
105
} while (int128_ne(old, cmp));
106
}
107
108
-# define HAVE_ATOMIC128 1
109
#else
110
+# define HAVE_ATOMIC128_RO 0
111
+# define HAVE_ATOMIC128_RW 0
112
+
113
/* Fallback definitions that must be optimized away, or error. */
114
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
115
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
116
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_rw(Int128 *ptr);
117
void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
118
-# define HAVE_ATOMIC128 0
119
#endif
120
121
#endif /* HOST_ATOMIC128_LDST_H */
122
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/target/s390x/tcg/mem_helper.c
125
+++ b/target/s390x/tcg/mem_helper.c
126
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
127
max = 3;
128
#endif
129
if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
130
- (HAVE_ATOMIC128 ? 0 : sc > max)) {
131
+ (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
132
cpu_loop_exit_atomic(env_cpu(env), ra);
133
}
134
}
58
--
135
--
59
2.25.1
136
2.34.1
60
137
61
138
diff view generated by jsdifflib
1
From: Liren Wei <lrwei@bupt.edu.cn>
1
Remove the locally defined load_atomic16 and store_atomic16,
2
2
along with HAVE_al16 and HAVE_al16_fast in favor of the
3
The function is called only at tcg_gen_code() when duplicated TBs
3
routines defined in atomic128.h.
4
are translated by different threads, and when the tcg_region_tree
4
5
is reset. Bake it into the underlying GTree as its value destroy
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
function to unite these situations.
7
Also remove tcg_region_tree_traverse() which now becomes useless.
8
9
Signed-off-by: Liren Wei <lrwei@bupt.edu.cn>
10
Message-Id: <8dc352f08d038c4e7a1f5f56962398cdc700c3aa.1625404483.git.lrwei@bupt.edu.cn>
11
[rth: Name the new tb_tc_cmp parameter correctly.]
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
7
---
14
include/tcg/tcg.h | 1 -
8
accel/tcg/cputlb.c | 2 +-
15
accel/tcg/translate-all.c | 6 ------
9
accel/tcg/ldst_atomicity.c.inc | 118 +++++++--------------------------
16
tcg/region.c | 19 ++++++++-----------
10
2 files changed, 24 insertions(+), 96 deletions(-)
17
3 files changed, 8 insertions(+), 18 deletions(-)
11
18
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
19
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/include/tcg/tcg.h
14
--- a/accel/tcg/cputlb.c
22
+++ b/include/tcg/tcg.h
15
+++ b/accel/tcg/cputlb.c
23
@@ -XXX,XX +XXX,XX @@ void *tcg_malloc_internal(TCGContext *s, int size);
16
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
24
void tcg_pool_reset(TCGContext *s);
17
25
TranslationBlock *tcg_tb_alloc(TCGContext *s);
18
case MO_ATOM_WITHIN16_PAIR:
26
19
/* Since size > 8, this is the half that must be atomic. */
27
-void tb_destroy(TranslationBlock *tb);
20
- if (!HAVE_al16) {
28
void tcg_region_reset_all(void);
21
+ if (!HAVE_ATOMIC128_RW) {
29
22
cpu_loop_exit_atomic(env_cpu(env), ra);
30
size_t tcg_code_size(void);
23
}
31
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
return store_whole_le16(p->haddr, p->size, val_le);
25
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
32
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
33
--- a/accel/tcg/translate-all.c
27
--- a/accel/tcg/ldst_atomicity.c.inc
34
+++ b/accel/tcg/translate-all.c
28
+++ b/accel/tcg/ldst_atomicity.c.inc
35
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
29
@@ -XXX,XX +XXX,XX @@
36
return 0;
30
#endif
31
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
32
33
-#if defined(CONFIG_ATOMIC128)
34
-# define HAVE_al16_fast true
35
-#else
36
-# define HAVE_al16_fast false
37
-#endif
38
-#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
39
-# define HAVE_al16 true
40
-#else
41
-# define HAVE_al16 false
42
-#endif
43
-
44
-
45
/**
46
* required_atomicity:
47
*
48
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
49
return qatomic_read__nocheck(p);
37
}
50
}
38
51
39
-void tb_destroy(TranslationBlock *tb)
52
-/**
53
- * load_atomic16:
54
- * @pv: host address
55
- *
56
- * Atomically load 16 aligned bytes from @pv.
57
- */
58
-static inline Int128 ATTRIBUTE_ATOMIC128_OPT
59
-load_atomic16(void *pv)
40
-{
60
-{
41
- qemu_spin_destroy(&tb->jmp_lock);
61
-#ifdef CONFIG_ATOMIC128
62
- __uint128_t *p = __builtin_assume_aligned(pv, 16);
63
- Int128Alias r;
64
-
65
- r.u = qatomic_read__nocheck(p);
66
- return r.s;
67
-#else
68
- qemu_build_not_reached();
69
-#endif
42
-}
70
-}
43
-
71
-
44
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
72
/**
73
* load_atomic8_or_exit:
74
* @env: cpu context
75
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
45
{
76
{
46
/*
77
Int128 *p = __builtin_assume_aligned(pv, 16);
47
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
78
48
79
- if (HAVE_al16_fast) {
49
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
80
- return load_atomic16(p);
50
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
81
+ if (HAVE_ATOMIC128_RO) {
51
- tb_destroy(tb);
82
+ return atomic16_read_ro(p);
52
tcg_tb_remove(tb);
83
}
53
return existing_tb;
84
54
}
85
#ifdef CONFIG_USER_ONLY
55
diff --git a/tcg/region.c b/tcg/region.c
86
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
56
index XXXXXXX..XXXXXXX 100644
87
* In system mode all guest pages are writable, and for user-only
57
--- a/tcg/region.c
88
* we have just checked writability. Try cmpxchg.
58
+++ b/tcg/region.c
89
*/
59
@@ -XXX,XX +XXX,XX @@ static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
90
-#if defined(CONFIG_CMPXCHG128)
60
return 0;
91
- /* Swap 0 with 0, with the side-effect of returning the old value. */
92
- {
93
- Int128Alias r;
94
- r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
95
- return r.s;
96
+ if (HAVE_ATOMIC128_RW) {
97
+ return atomic16_read_rw(p);
98
}
99
-#endif
100
101
/* Ultimate fallback: re-execute in serial context. */
102
cpu_loop_exit_atomic(env_cpu(env), ra);
103
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
104
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
105
load_atom_extract_al16_or_al8(void *pv, int s)
106
{
107
-#if defined(CONFIG_ATOMIC128)
108
uintptr_t pi = (uintptr_t)pv;
109
int o = pi & 7;
110
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
111
- __uint128_t r;
112
+ Int128 r;
113
114
pv = (void *)(pi & ~7);
115
if (pi & 8) {
116
@@ -XXX,XX +XXX,XX @@ load_atom_extract_al16_or_al8(void *pv, int s)
117
uint64_t b = qatomic_read__nocheck(p8 + 1);
118
119
if (HOST_BIG_ENDIAN) {
120
- r = ((__uint128_t)a << 64) | b;
121
+ r = int128_make128(b, a);
122
} else {
123
- r = ((__uint128_t)b << 64) | a;
124
+ r = int128_make128(a, b);
125
}
126
} else {
127
- __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
128
- r = qatomic_read__nocheck(p16);
129
+ r = atomic16_read_ro(pv);
130
}
131
- return r >> shr;
132
-#else
133
- qemu_build_not_reached();
134
-#endif
135
+ return int128_getlo(int128_urshift(r, shr));
61
}
136
}
62
137
63
-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
138
/**
64
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
139
@@ -XXX,XX +XXX,XX @@ static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
65
{
140
if (likely((pi & 1) == 0)) {
66
const struct tb_tc *a = ap;
141
return load_atomic2(pv);
67
const struct tb_tc *b = bp;
142
}
68
@@ -XXX,XX +XXX,XX @@ static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
143
- if (HAVE_al16_fast) {
69
return ptr_cmp_tb_tc(b->ptr, a);
144
+ if (HAVE_ATOMIC128_RO) {
145
return load_atom_extract_al16_or_al8(pv, 2);
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
149
if (likely((pi & 3) == 0)) {
150
return load_atomic4(pv);
151
}
152
- if (HAVE_al16_fast) {
153
+ if (HAVE_ATOMIC128_RO) {
154
return load_atom_extract_al16_or_al8(pv, 4);
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
158
if (HAVE_al8 && likely((pi & 7) == 0)) {
159
return load_atomic8(pv);
160
}
161
- if (HAVE_al16_fast) {
162
+ if (HAVE_ATOMIC128_RO) {
163
return load_atom_extract_al16_or_al8(pv, 8);
164
}
165
166
@@ -XXX,XX +XXX,XX @@ static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
167
* If the host does not support 16-byte atomics, wait until we have
168
* examined the atomicity parameters below.
169
*/
170
- if (HAVE_al16_fast && likely((pi & 15) == 0)) {
171
- return load_atomic16(pv);
172
+ if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) {
173
+ return atomic16_read_ro(pv);
174
}
175
176
atmax = required_atomicity(env, pi, memop);
177
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
178
qatomic_set__nocheck(p, val);
70
}
179
}
71
180
72
+static void tb_destroy(gpointer value)
181
-/**
73
+{
182
- * store_atomic16:
74
+ TranslationBlock *tb = value;
183
- * @pv: host address
75
+ qemu_spin_destroy(&tb->jmp_lock);
184
- * @val: value to store
76
+}
185
- *
77
+
186
- * Atomically store 16 aligned bytes to @pv.
78
static void tcg_region_trees_init(void)
187
- */
79
{
188
-static inline void ATTRIBUTE_ATOMIC128_OPT
80
size_t i;
189
-store_atomic16(void *pv, Int128Alias val)
81
@@ -XXX,XX +XXX,XX @@ static void tcg_region_trees_init(void)
82
struct tcg_region_tree *rt = region_trees + i * tree_size;
83
84
qemu_mutex_init(&rt->lock);
85
- rt->tree = g_tree_new(tb_tc_cmp);
86
+ rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
87
}
88
}
89
90
@@ -XXX,XX +XXX,XX @@ size_t tcg_nb_tbs(void)
91
return nb_tbs;
92
}
93
94
-static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
95
-{
190
-{
96
- TranslationBlock *tb = v;
191
-#if defined(CONFIG_ATOMIC128)
97
-
192
- __uint128_t *pu = __builtin_assume_aligned(pv, 16);
98
- tb_destroy(tb);
193
- qatomic_set__nocheck(pu, val.u);
99
- return FALSE;
194
-#elif defined(CONFIG_CMPXCHG128)
195
- __uint128_t *pu = __builtin_assume_aligned(pv, 16);
196
- __uint128_t o;
197
-
198
- /*
199
- * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
200
- * defer to libatomic, so we must use __sync_*_compare_and_swap_16
201
- * and accept the sequential consistency that comes with it.
202
- */
203
- do {
204
- o = *pu;
205
- } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
206
-#else
207
- qemu_build_not_reached();
208
-#endif
100
-}
209
-}
101
-
210
-
102
static void tcg_region_tree_reset_all(void)
211
/**
103
{
212
* store_atom_4x2
104
size_t i;
213
*/
105
@@ -XXX,XX +XXX,XX @@ static void tcg_region_tree_reset_all(void)
214
@@ -XXX,XX +XXX,XX @@ static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
106
for (i = 0; i < region.n; i++) {
215
int sh = o * 8;
107
struct tcg_region_tree *rt = region_trees + i * tree_size;
216
Int128 m, v;
108
217
109
- g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
218
- qemu_build_assert(HAVE_al16);
110
/* Increment the refcount first so that destroy acts as a reset */
219
+ qemu_build_assert(HAVE_ATOMIC128_RW);
111
g_tree_ref(rt->tree);
220
112
g_tree_destroy(rt->tree);
221
/* Like MAKE_64BIT_MASK(0, sz), but larger. */
222
if (sz <= 64) {
223
@@ -XXX,XX +XXX,XX @@ static void store_atom_2(CPUArchState *env, uintptr_t ra,
224
return;
225
}
226
} else if ((pi & 15) == 7) {
227
- if (HAVE_al16) {
228
+ if (HAVE_ATOMIC128_RW) {
229
Int128 v = int128_lshift(int128_make64(val), 56);
230
Int128 m = int128_lshift(int128_make64(0xffff), 56);
231
store_atom_insert_al16(pv - 7, v, m);
232
@@ -XXX,XX +XXX,XX @@ static void store_atom_4(CPUArchState *env, uintptr_t ra,
233
return;
234
}
235
} else {
236
- if (HAVE_al16) {
237
+ if (HAVE_ATOMIC128_RW) {
238
store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
239
return;
240
}
241
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
242
}
243
break;
244
case MO_64:
245
- if (HAVE_al16) {
246
+ if (HAVE_ATOMIC128_RW) {
247
store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
248
return;
249
}
250
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
251
uint64_t a, b;
252
int atmax;
253
254
- if (HAVE_al16_fast && likely((pi & 15) == 0)) {
255
- store_atomic16(pv, val);
256
+ if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) {
257
+ atomic16_set(pv, val);
258
return;
259
}
260
261
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
262
}
263
break;
264
case -MO_64:
265
- if (HAVE_al16) {
266
+ if (HAVE_ATOMIC128_RW) {
267
uint64_t val_le;
268
int s2 = pi & 15;
269
int s1 = 16 - s2;
270
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
271
}
272
break;
273
case MO_128:
274
- if (HAVE_al16) {
275
- store_atomic16(pv, val);
276
+ if (HAVE_ATOMIC128_RW) {
277
+ atomic16_set(pv, val);
278
return;
279
}
280
break;
113
--
281
--
114
2.25.1
282
2.34.1
115
283
116
284
diff view generated by jsdifflib
1
We can call do_tb_phys_invalidate from an iocontext, which has
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
no per-thread tcg_ctx. Move this to tb_ctx, which is global.
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
The actual update still takes place with a lock held, so only
4
an atomic set is required, not an atomic increment.
5
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/457
7
Tested-by: Viktor Ashirov <vashirov@redhat.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
4
---
10
accel/tcg/tb-context.h | 1 +
5
include/tcg/debug-assert.h | 17 +++++++++++++++++
11
include/tcg/tcg.h | 3 ---
6
include/tcg/tcg.h | 9 +--------
12
accel/tcg/translate-all.c | 8 ++++----
7
MAINTAINERS | 1 +
13
tcg/region.c | 14 --------------
8
3 files changed, 19 insertions(+), 8 deletions(-)
14
4 files changed, 5 insertions(+), 21 deletions(-)
9
create mode 100644 include/tcg/debug-assert.h
15
10
16
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
11
diff --git a/include/tcg/debug-assert.h b/include/tcg/debug-assert.h
17
index XXXXXXX..XXXXXXX 100644
12
new file mode 100644
18
--- a/accel/tcg/tb-context.h
13
index XXXXXXX..XXXXXXX
19
+++ b/accel/tcg/tb-context.h
14
--- /dev/null
20
@@ -XXX,XX +XXX,XX @@ struct TBContext {
15
+++ b/include/tcg/debug-assert.h
21
16
@@ -XXX,XX +XXX,XX @@
22
/* statistics */
17
+/* SPDX-License-Identifier: MIT */
23
unsigned tb_flush_count;
18
+/*
24
+ unsigned tb_phys_invalidate_count;
19
+ * Define tcg_debug_assert
25
};
20
+ * Copyright (c) 2008 Fabrice Bellard
26
21
+ */
27
extern TBContext tb_ctx;
22
+
23
+#ifndef TCG_DEBUG_ASSERT_H
24
+#define TCG_DEBUG_ASSERT_H
25
+
26
+#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
27
+# define tcg_debug_assert(X) do { assert(X); } while (0)
28
+#else
29
+# define tcg_debug_assert(X) \
30
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
31
+#endif
32
+
33
+#endif
28
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
34
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
29
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
30
--- a/include/tcg/tcg.h
36
--- a/include/tcg/tcg.h
31
+++ b/include/tcg/tcg.h
37
+++ b/include/tcg/tcg.h
32
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
38
@@ -XXX,XX +XXX,XX @@
33
/* Threshold to flush the translated code buffer. */
39
#include "tcg/tcg-mo.h"
34
void *code_gen_highwater;
40
#include "tcg-target.h"
35
41
#include "tcg/tcg-cond.h"
36
- size_t tb_phys_invalidate_count;
42
+#include "tcg/debug-assert.h"
43
44
/* XXX: make safe guess about sizes */
45
#define MAX_OP_PER_INSTR 266
46
@@ -XXX,XX +XXX,XX @@ typedef uint64_t tcg_insn_unit;
47
/* The port better have done this. */
48
#endif
49
37
-
50
-
38
/* Track which vCPU triggers events */
51
-#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
39
CPUState *cpu; /* *_trans */
52
-# define tcg_debug_assert(X) do { assert(X); } while (0)
40
53
-#else
41
@@ -XXX,XX +XXX,XX @@ size_t tcg_code_capacity(void);
54
-# define tcg_debug_assert(X) \
42
55
- do { if (!(X)) { __builtin_unreachable(); } } while (0)
43
void tcg_tb_insert(TranslationBlock *tb);
56
-#endif
44
void tcg_tb_remove(TranslationBlock *tb);
57
-
45
-size_t tcg_tb_phys_invalidate_count(void);
58
typedef struct TCGRelocation TCGRelocation;
46
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
59
struct TCGRelocation {
47
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
60
QSIMPLEQ_ENTRY(TCGRelocation) next;
48
size_t tcg_nb_tbs(void);
61
diff --git a/MAINTAINERS b/MAINTAINERS
49
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
50
index XXXXXXX..XXXXXXX 100644
62
index XXXXXXX..XXXXXXX 100644
51
--- a/accel/tcg/translate-all.c
63
--- a/MAINTAINERS
52
+++ b/accel/tcg/translate-all.c
64
+++ b/MAINTAINERS
53
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
65
@@ -XXX,XX +XXX,XX @@ F: include/sysemu/tcg.h
54
/* suppress any remaining jumps to this TB */
66
F: include/hw/core/tcg-cpu-ops.h
55
tb_jmp_unlink(tb);
67
F: host/include/*/host/cpuinfo.h
56
68
F: util/cpuinfo-*.c
57
- qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
69
+F: include/tcg/
58
- tcg_ctx->tb_phys_invalidate_count + 1);
70
59
+ qatomic_set(&tb_ctx.tb_phys_invalidate_count,
71
FPU emulation
60
+ tb_ctx.tb_phys_invalidate_count + 1);
72
M: Aurelien Jarno <aurelien@aurel32.net>
61
}
62
63
static void tb_phys_invalidate__locked(TranslationBlock *tb)
64
@@ -XXX,XX +XXX,XX @@ void dump_exec_info(void)
65
qemu_printf("\nStatistics:\n");
66
qemu_printf("TB flush count %u\n",
67
qatomic_read(&tb_ctx.tb_flush_count));
68
- qemu_printf("TB invalidate count %zu\n",
69
- tcg_tb_phys_invalidate_count());
70
+ qemu_printf("TB invalidate count %u\n",
71
+ qatomic_read(&tb_ctx.tb_phys_invalidate_count));
72
73
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
74
qemu_printf("TLB full flushes %zu\n", flush_full);
75
diff --git a/tcg/region.c b/tcg/region.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/tcg/region.c
78
+++ b/tcg/region.c
79
@@ -XXX,XX +XXX,XX @@ size_t tcg_code_capacity(void)
80
81
return capacity;
82
}
83
-
84
-size_t tcg_tb_phys_invalidate_count(void)
85
-{
86
- unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
87
- unsigned int i;
88
- size_t total = 0;
89
-
90
- for (i = 0; i < n_ctxs; i++) {
91
- const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
92
-
93
- total += qatomic_read(&s->tb_phys_invalidate_count);
94
- }
95
- return total;
96
-}
97
--
73
--
98
2.25.1
74
2.34.1
99
75
100
76
diff view generated by jsdifflib
1
The non-single-step case of gen_goto_tb may use
1
Use __sync_bool_compare_and_swap_16 to control the loop,
2
tcg_gen_lookup_and_goto_ptr to indirectly chain.
2
rather than a separate comparison.
3
3
4
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
target/tricore/translate.c | 3 ++-
7
host/include/generic/host/atomic128-ldst.h | 11 +++++++----
8
1 file changed, 2 insertions(+), 1 deletion(-)
8
1 file changed, 7 insertions(+), 4 deletions(-)
9
9
10
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
10
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/tricore/translate.c
12
--- a/host/include/generic/host/atomic128-ldst.h
13
+++ b/target/tricore/translate.c
13
+++ b/host/include/generic/host/atomic128-ldst.h
14
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
14
@@ -XXX,XX +XXX,XX @@ atomic16_read_rw(Int128 *ptr)
15
gen_save_pc(dest);
15
static inline void ATTRIBUTE_ATOMIC128_OPT
16
if (ctx->base.singlestep_enabled) {
16
atomic16_set(Int128 *ptr, Int128 val)
17
generate_qemu_excp(ctx, EXCP_DEBUG);
17
{
18
+ } else {
18
- Int128 old = *ptr, cmp;
19
+ tcg_gen_lookup_and_goto_ptr();
19
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
20
}
20
+ __int128_t old;
21
- tcg_gen_exit_tb(NULL, 0);
21
+ Int128Alias new;
22
}
22
+
23
+ new.s = val;
24
do {
25
- cmp = old;
26
- old = atomic16_cmpxchg(ptr, cmp, val);
27
- } while (int128_ne(old, cmp));
28
+ old = *ptr_align;
29
+ } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
23
}
30
}
24
31
32
#else
25
--
33
--
26
2.25.1
34
2.34.1
27
35
28
36
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
With FEAT_LSE2, load and store of int128 is directly supported.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
target/sh4/translate.c | 11 +++--------
6
host/include/aarch64/host/atomic128-ldst.h | 53 ++++++++++++++++------
5
1 file changed, 3 insertions(+), 8 deletions(-)
7
1 file changed, 40 insertions(+), 13 deletions(-)
6
8
7
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
9
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
8
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
9
--- a/target/sh4/translate.c
11
--- a/host/include/aarch64/host/atomic128-ldst.h
10
+++ b/target/sh4/translate.c
12
+++ b/host/include/aarch64/host/atomic128-ldst.h
11
@@ -XXX,XX +XXX,XX @@ static inline bool use_exit_tb(DisasContext *ctx)
13
@@ -XXX,XX +XXX,XX @@
12
return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
14
#ifndef AARCH64_ATOMIC128_LDST_H
15
#define AARCH64_ATOMIC128_LDST_H
16
17
+#include "host/cpuinfo.h"
18
+#include "tcg/debug-assert.h"
19
+
20
/*
21
* Through gcc 10, aarch64 has no support for 128-bit atomics.
22
* Through clang 16, without -march=armv8.4-a, __atomic_load_16
23
* is incorrectly expanded to a read-write operation.
24
+ *
25
+ * Anyway, this method allows runtime detection of FEAT_LSE2.
26
*/
27
28
-#define HAVE_ATOMIC128_RO 0
29
+#define HAVE_ATOMIC128_RO (cpuinfo & CPUINFO_LSE2)
30
#define HAVE_ATOMIC128_RW 1
31
32
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
33
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
34
+{
35
+ uint64_t l, h;
36
+
37
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
38
+ /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
39
+ asm("ldp %[l], %[h], %[mem]"
40
+ : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
41
+
42
+ return int128_make128(l, h);
43
+}
44
45
static inline Int128 atomic16_read_rw(Int128 *ptr)
46
{
47
uint64_t l, h;
48
uint32_t tmp;
49
50
- /* The load must be paired with the store to guarantee not tearing. */
51
- asm("0: ldxp %[l], %[h], %[mem]\n\t"
52
- "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
53
- "cbnz %w[tmp], 0b"
54
- : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
55
+ if (cpuinfo & CPUINFO_LSE2) {
56
+ /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
57
+ asm("ldp %[l], %[h], %[mem]"
58
+ : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
59
+ } else {
60
+ /* The load must be paired with the store to guarantee not tearing. */
61
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
62
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
63
+ "cbnz %w[tmp], 0b"
64
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
65
+ }
66
67
return int128_make128(l, h);
13
}
68
}
14
69
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
15
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
70
uint64_t l = int128_getlo(val), h = int128_gethi(val);
16
+static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
71
uint64_t t1, t2;
17
{
72
18
- /* Use a direct jump if in same page and singlestep not enabled */
73
- /* Load into temporaries to acquire the exclusive access lock. */
19
- if (unlikely(ctx->base.singlestep_enabled || use_exit_tb(ctx))) {
74
- asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
20
+ if (use_exit_tb(ctx)) {
75
- "stxp %w[t1], %[l], %[h], %[mem]\n\t"
21
return false;
76
- "cbnz %w[t1], 0b"
22
}
77
- : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
23
-#ifndef CONFIG_USER_ONLY
78
- : [l] "r"(l), [h] "r"(h));
24
- return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
79
+ if (cpuinfo & CPUINFO_LSE2) {
25
-#else
80
+ /* With FEAT_LSE2, 16-byte aligned STP is atomic. */
26
- return true;
81
+ asm("stp %[l], %[h], %[mem]"
27
-#endif
82
+ : [mem] "=m"(*ptr) : [l] "r"(l), [h] "r"(h));
28
+ return translator_use_goto_tb(&ctx->base, dest);
83
+ } else {
84
+ /* Load into temporaries to acquire the exclusive access lock. */
85
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
86
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
87
+ "cbnz %w[t1], 0b"
88
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
89
+ : [l] "r"(l), [h] "r"(h));
90
+ }
29
}
91
}
30
92
31
static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
93
#endif /* AARCH64_ATOMIC128_LDST_H */
32
--
94
--
33
2.25.1
95
2.34.1
34
96
35
97
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
This had been set since the beginning, is never undefined,
2
and it would seem to be harmful to debugging to do so.
2
3
3
The root trace-events only declares a single TCG event:
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
5
$ git grep -w tcg trace-events
6
trace-events:115:# tcg/tcg-op.c
7
trace-events:137:vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
8
9
and only a tcg/tcg-op.c uses it:
10
11
$ git grep -l trace_guest_mem_before_tcg
12
tcg/tcg-op.c
13
14
therefore it is pointless to include "trace-tcg.h" in each target
15
(because it is not used). Remove it.
16
17
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
18
Message-Id: <20210629050935.2570721-1-f4bug@amsat.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
---
6
---
21
target/alpha/translate.c | 1 -
7
include/exec/exec-all.h | 3 ---
22
target/arm/translate-a64.c | 1 -
8
accel/tcg/cpu-exec.c | 2 --
23
target/arm/translate-sve.c | 1 -
9
accel/tcg/translate-all.c | 2 --
24
target/arm/translate.c | 1 -
10
accel/tcg/translator.c | 2 --
25
target/cris/translate.c | 1 -
11
target/sh4/translate.c | 2 --
26
target/hppa/translate.c | 1 -
12
target/sparc/translate.c | 2 --
27
target/i386/tcg/translate.c | 1 -
13
tcg/tcg.c | 9 +--------
28
target/m68k/translate.c | 1 -
14
7 files changed, 1 insertion(+), 21 deletions(-)
29
target/microblaze/translate.c | 1 -
30
target/mips/tcg/translate.c | 1 -
31
target/openrisc/translate.c | 1 -
32
target/ppc/translate.c | 1 -
33
target/rx/translate.c | 1 -
34
target/s390x/translate.c | 1 -
35
target/sh4/translate.c | 1 -
36
target/sparc/translate.c | 1 -
37
target/xtensa/translate.c | 1 -
38
17 files changed, 17 deletions(-)
39
15
40
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
41
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
42
--- a/target/alpha/translate.c
18
--- a/include/exec/exec-all.h
43
+++ b/target/alpha/translate.c
19
+++ b/include/exec/exec-all.h
44
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@
45
#include "exec/cpu_ldst.h"
21
#include "qemu/interval-tree.h"
46
#include "exec/helper-proto.h"
22
#include "qemu/clang-tsa.h"
47
#include "exec/helper-gen.h"
23
48
-#include "trace-tcg.h"
24
-/* allow to see translation results - the slowdown should be negligible, so we leave it */
49
#include "exec/translator.h"
25
-#define DEBUG_DISAS
50
#include "exec/log.h"
26
-
51
27
/* Page tracking code uses ram addresses in system mode, and virtual
52
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
28
addresses in userspace mode. Define tb_page_addr_t to be an appropriate
29
type. */
30
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
53
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/translate-a64.c
32
--- a/accel/tcg/cpu-exec.c
55
+++ b/target/arm/translate-a64.c
33
+++ b/accel/tcg/cpu-exec.c
56
@@ -XXX,XX +XXX,XX @@
34
@@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
57
#include "exec/helper-gen.h"
35
cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
58
#include "exec/log.h"
36
tb->flags, tb->cflags, lookup_symbol(pc));
59
37
60
-#include "trace-tcg.h"
38
-#if defined(DEBUG_DISAS)
61
#include "translate-a64.h"
39
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
62
#include "qemu/atomic128.h"
40
FILE *logfile = qemu_log_trylock();
63
41
if (logfile) {
64
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
42
@@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
43
qemu_log_unlock(logfile);
44
}
45
}
46
-#endif /* DEBUG_DISAS */
47
}
48
}
49
50
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
65
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
66
--- a/target/arm/translate-sve.c
52
--- a/accel/tcg/translate-all.c
67
+++ b/target/arm/translate-sve.c
53
+++ b/accel/tcg/translate-all.c
68
@@ -XXX,XX +XXX,XX @@
54
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
69
#include "exec/helper-proto.h"
55
qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
70
#include "exec/helper-gen.h"
56
#endif
71
#include "exec/log.h"
57
72
-#include "trace-tcg.h"
58
-#ifdef DEBUG_DISAS
73
#include "translate-a64.h"
59
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
74
#include "fpu/softfloat.h"
60
qemu_log_in_addr_range(pc)) {
75
61
FILE *logfile = qemu_log_trylock();
76
diff --git a/target/arm/translate.c b/target/arm/translate.c
62
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
63
qemu_log_unlock(logfile);
64
}
65
}
66
-#endif
67
68
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
69
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
70
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
77
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
78
--- a/target/arm/translate.c
72
--- a/accel/tcg/translator.c
79
+++ b/target/arm/translate.c
73
+++ b/accel/tcg/translator.c
80
@@ -XXX,XX +XXX,XX @@
74
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
81
#include "exec/helper-proto.h"
75
tb->size = db->pc_next - db->pc_first;
82
#include "exec/helper-gen.h"
76
tb->icount = db->num_insns;
83
77
84
-#include "trace-tcg.h"
78
-#ifdef DEBUG_DISAS
85
#include "exec/log.h"
79
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
86
80
&& qemu_log_in_addr_range(db->pc_first)) {
87
81
FILE *logfile = qemu_log_trylock();
88
diff --git a/target/cris/translate.c b/target/cris/translate.c
82
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
89
index XXXXXXX..XXXXXXX 100644
83
qemu_log_unlock(logfile);
90
--- a/target/cris/translate.c
84
}
91
+++ b/target/cris/translate.c
85
}
92
@@ -XXX,XX +XXX,XX @@
86
-#endif
93
87
}
94
#include "exec/helper-gen.h"
88
95
89
static void *translator_access(CPUArchState *env, DisasContextBase *db,
96
-#include "trace-tcg.h"
97
#include "exec/log.h"
98
99
100
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/hppa/translate.c
103
+++ b/target/hppa/translate.c
104
@@ -XXX,XX +XXX,XX @@
105
#include "exec/helper-proto.h"
106
#include "exec/helper-gen.h"
107
#include "exec/translator.h"
108
-#include "trace-tcg.h"
109
#include "exec/log.h"
110
111
/* Since we have a distinction between register size and address size,
112
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/target/i386/tcg/translate.c
115
+++ b/target/i386/tcg/translate.c
116
@@ -XXX,XX +XXX,XX @@
117
#include "exec/helper-gen.h"
118
#include "helper-tcg.h"
119
120
-#include "trace-tcg.h"
121
#include "exec/log.h"
122
123
#define PREFIX_REPZ 0x01
124
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/m68k/translate.c
127
+++ b/target/m68k/translate.c
128
@@ -XXX,XX +XXX,XX @@
129
#include "exec/helper-proto.h"
130
#include "exec/helper-gen.h"
131
132
-#include "trace-tcg.h"
133
#include "exec/log.h"
134
#include "fpu/softfloat.h"
135
136
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/target/microblaze/translate.c
139
+++ b/target/microblaze/translate.c
140
@@ -XXX,XX +XXX,XX @@
141
#include "exec/translator.h"
142
#include "qemu/qemu-print.h"
143
144
-#include "trace-tcg.h"
145
#include "exec/log.h"
146
147
#define EXTRACT_FIELD(src, start, end) \
148
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/mips/tcg/translate.c
151
+++ b/target/mips/tcg/translate.c
152
@@ -XXX,XX +XXX,XX @@
153
#include "semihosting/semihost.h"
154
155
#include "trace.h"
156
-#include "trace-tcg.h"
157
#include "exec/translator.h"
158
#include "exec/log.h"
159
#include "qemu/qemu-print.h"
160
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/openrisc/translate.c
163
+++ b/target/openrisc/translate.c
164
@@ -XXX,XX +XXX,XX @@
165
#include "exec/helper-gen.h"
166
#include "exec/gen-icount.h"
167
168
-#include "trace-tcg.h"
169
#include "exec/log.h"
170
171
/* is_jmp field values */
172
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/ppc/translate.c
175
+++ b/target/ppc/translate.c
176
@@ -XXX,XX +XXX,XX @@
177
#include "exec/helper-proto.h"
178
#include "exec/helper-gen.h"
179
180
-#include "trace-tcg.h"
181
#include "exec/translator.h"
182
#include "exec/log.h"
183
#include "qemu/atomic128.h"
184
diff --git a/target/rx/translate.c b/target/rx/translate.c
185
index XXXXXXX..XXXXXXX 100644
186
--- a/target/rx/translate.c
187
+++ b/target/rx/translate.c
188
@@ -XXX,XX +XXX,XX @@
189
#include "exec/helper-proto.h"
190
#include "exec/helper-gen.h"
191
#include "exec/translator.h"
192
-#include "trace-tcg.h"
193
#include "exec/log.h"
194
195
typedef struct DisasContext {
196
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
197
index XXXXXXX..XXXXXXX 100644
198
--- a/target/s390x/translate.c
199
+++ b/target/s390x/translate.c
200
@@ -XXX,XX +XXX,XX @@
201
#include "exec/helper-proto.h"
202
#include "exec/helper-gen.h"
203
204
-#include "trace-tcg.h"
205
#include "exec/translator.h"
206
#include "exec/log.h"
207
#include "qemu/atomic128.h"
208
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
90
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
209
index XXXXXXX..XXXXXXX 100644
91
index XXXXXXX..XXXXXXX 100644
210
--- a/target/sh4/translate.c
92
--- a/target/sh4/translate.c
211
+++ b/target/sh4/translate.c
93
+++ b/target/sh4/translate.c
212
@@ -XXX,XX +XXX,XX @@
94
@@ -XXX,XX +XXX,XX @@
213
#include "exec/helper-proto.h"
95
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
214
#include "exec/helper-gen.h"
96
*/
215
#include "exec/translator.h"
97
216
-#include "trace-tcg.h"
98
-#define DEBUG_DISAS
217
#include "exec/log.h"
99
-
218
#include "qemu/qemu-print.h"
100
#include "qemu/osdep.h"
219
101
#include "cpu.h"
102
#include "disas/disas.h"
220
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
103
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
221
index XXXXXXX..XXXXXXX 100644
104
index XXXXXXX..XXXXXXX 100644
222
--- a/target/sparc/translate.c
105
--- a/target/sparc/translate.c
223
+++ b/target/sparc/translate.c
106
+++ b/target/sparc/translate.c
224
@@ -XXX,XX +XXX,XX @@
107
@@ -XXX,XX +XXX,XX @@
225
226
#include "exec/helper-gen.h"
227
228
-#include "trace-tcg.h"
229
#include "exec/translator.h"
230
#include "exec/log.h"
231
#include "asi.h"
108
#include "asi.h"
232
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
109
110
111
-#define DEBUG_DISAS
112
-
113
#define DYNAMIC_PC 1 /* dynamic pc value */
114
#define JUMP_PC 2 /* dynamic pc value which takes only two values
115
according to jump_pc[T2] */
116
diff --git a/tcg/tcg.c b/tcg/tcg.c
233
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
234
--- a/target/xtensa/translate.c
118
--- a/tcg/tcg.c
235
+++ b/target/xtensa/translate.c
119
+++ b/tcg/tcg.c
236
@@ -XXX,XX +XXX,XX @@
120
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
237
#include "exec/helper-proto.h"
121
(uintptr_t)s->code_buf, prologue_size);
238
#include "exec/helper-gen.h"
122
#endif
239
123
240
-#include "trace-tcg.h"
124
-#ifdef DEBUG_DISAS
241
#include "exec/log.h"
125
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
242
126
FILE *logfile = qemu_log_trylock();
243
127
if (logfile) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
129
qemu_log_unlock(logfile);
130
}
131
}
132
-#endif
133
134
#ifndef CONFIG_TCG_INTERPRETER
135
/*
136
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
137
}
138
#endif
139
140
-#ifdef DEBUG_DISAS
141
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
142
&& qemu_log_in_addr_range(pc_start))) {
143
FILE *logfile = qemu_log_trylock();
144
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
145
qemu_log_unlock(logfile);
146
}
147
}
148
-#endif
149
150
#ifdef CONFIG_DEBUG_TCG
151
/* Ensure all labels referenced have been emitted. */
152
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
153
liveness_pass_1(s);
154
155
if (s->nb_indirects > 0) {
156
-#ifdef DEBUG_DISAS
157
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
158
&& qemu_log_in_addr_range(pc_start))) {
159
FILE *logfile = qemu_log_trylock();
160
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
161
qemu_log_unlock(logfile);
162
}
163
}
164
-#endif
165
+
166
/* Replace indirect temps with direct temps. */
167
if (liveness_pass_2(s)) {
168
/* If changes were made, re-run liveness. */
169
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
170
qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
171
#endif
172
173
-#ifdef DEBUG_DISAS
174
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
175
&& qemu_log_in_addr_range(pc_start))) {
176
FILE *logfile = qemu_log_trylock();
177
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
178
qemu_log_unlock(logfile);
179
}
180
}
181
-#endif
182
183
/* Initialize goto_tb jump offsets. */
184
tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
244
--
185
--
245
2.25.1
186
2.34.1
246
187
247
188
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/alpha/translate.c | 7 +------
5
1 file changed, 1 insertion(+), 6 deletions(-)
6
1
7
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/alpha/translate.c
10
+++ b/target/alpha/translate.c
11
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb,
12
13
static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
14
{
15
-#ifndef CONFIG_USER_ONLY
16
- /* Check for the dest on the same page as the start of the TB. */
17
- return ((ctx->base.tb->pc ^ dest) & TARGET_PAGE_MASK) == 0;
18
-#else
19
- return true;
20
-#endif
21
+ return translator_use_goto_tb(&ctx->base, dest);
22
}
23
24
static DisasJumpType gen_bdirect(DisasContext *ctx, int ra, int32_t disp)
25
--
26
2.25.1
27
28
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/translate.c | 12 +-----------
8
1 file changed, 1 insertion(+), 11 deletions(-)
9
10
diff --git a/target/arm/translate.c b/target/arm/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate.c
13
+++ b/target/arm/translate.c
14
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
15
return 1;
16
}
17
18
-static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
19
-{
20
-#ifndef CONFIG_USER_ONLY
21
- return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
22
- ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
23
-#else
24
- return true;
25
-#endif
26
-}
27
-
28
static void gen_goto_ptr(void)
29
{
30
tcg_gen_lookup_and_goto_ptr();
31
@@ -XXX,XX +XXX,XX @@ static void gen_goto_ptr(void)
32
*/
33
static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
34
{
35
- if (use_goto_tb(s, dest)) {
36
+ if (translator_use_goto_tb(&s->base, dest)) {
37
tcg_gen_goto_tb(n);
38
gen_set_pc_im(s, dest);
39
tcg_gen_exit_tb(s->base.tb, n);
40
--
41
2.25.1
42
43
diff view generated by jsdifflib
Deleted patch
1
Single stepping is not the only reason not to use goto_tb.
2
If goto_tb is disallowed, and single-stepping is not enabled,
3
then use tcg_gen_lookup_and_goto_tb to indirectly chain.
4
1
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/avr/translate.c | 9 ++++++---
9
1 file changed, 6 insertions(+), 3 deletions(-)
10
11
diff --git a/target/avr/translate.c b/target/avr/translate.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/avr/translate.c
14
+++ b/target/avr/translate.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
16
{
17
const TranslationBlock *tb = ctx->base.tb;
18
19
- if (!ctx->base.singlestep_enabled) {
20
+ if (translator_use_goto_tb(&ctx->base, dest)) {
21
tcg_gen_goto_tb(n);
22
tcg_gen_movi_i32(cpu_pc, dest);
23
tcg_gen_exit_tb(tb, n);
24
} else {
25
tcg_gen_movi_i32(cpu_pc, dest);
26
- gen_helper_debug(cpu_env);
27
- tcg_gen_exit_tb(NULL, 0);
28
+ if (ctx->base.singlestep_enabled) {
29
+ gen_helper_debug(cpu_env);
30
+ } else {
31
+ tcg_gen_lookup_and_goto_ptr();
32
+ }
33
}
34
ctx->base.is_jmp = DISAS_NORETURN;
35
}
36
--
37
2.25.1
38
39
diff view generated by jsdifflib
Deleted patch
1
The test for singlestepping is done in translator_use_goto_tb,
2
so we may elide it from cris_tr_tb_stop.
3
1
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/cris/translate.c | 5 ++---
8
1 file changed, 2 insertions(+), 3 deletions(-)
9
10
diff --git a/target/cris/translate.c b/target/cris/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/cris/translate.c
13
+++ b/target/cris/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void t_gen_swapr(TCGv d, TCGv s)
15
16
static bool use_goto_tb(DisasContext *dc, target_ulong dest)
17
{
18
- return ((dest ^ dc->base.pc_first) & TARGET_PAGE_MASK) == 0;
19
+ return translator_use_goto_tb(&dc->base, dest);
20
}
21
22
static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
23
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
24
* Use a conditional branch if either taken or not-taken path
25
* can use goto_tb. If neither can, then treat it as indirect.
26
*/
27
- if (likely(!dc->base.singlestep_enabled)
28
- && likely(!dc->cpustate_changed)
29
+ if (likely(!dc->cpustate_changed)
30
&& (use_goto_tb(dc, dc->jmp_pc) || use_goto_tb(dc, npc))) {
31
TCGLabel *not_taken = gen_new_label();
32
33
--
34
2.25.1
35
36
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/hppa/translate.c | 5 +----
5
1 file changed, 1 insertion(+), 4 deletions(-)
6
1
7
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/hppa/translate.c
10
+++ b/target/hppa/translate.c
11
@@ -XXX,XX +XXX,XX @@ static bool gen_illegal(DisasContext *ctx)
12
13
static bool use_goto_tb(DisasContext *ctx, target_ureg dest)
14
{
15
- /* Suppress goto_tb for page crossing, IO, or single-steping. */
16
- return !(((ctx->base.pc_first ^ dest) & TARGET_PAGE_MASK)
17
- || (tb_cflags(ctx->base.tb) & CF_LAST_IO)
18
- || ctx->base.singlestep_enabled);
19
+ return translator_use_goto_tb(&ctx->base, dest);
20
}
21
22
/* If the next insn is to be nullified, and it's on the same page,
23
--
24
2.25.1
25
26
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/i386/tcg/translate.c | 14 ++------------
8
1 file changed, 2 insertions(+), 12 deletions(-)
9
10
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/i386/tcg/translate.c
13
+++ b/target/i386/tcg/translate.c
14
@@ -XXX,XX +XXX,XX @@ static inline int insn_const_size(MemOp ot)
15
}
16
}
17
18
-static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
19
-{
20
-#ifndef CONFIG_USER_ONLY
21
- return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
22
- (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
23
-#else
24
- return true;
25
-#endif
26
-}
27
-
28
-static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
29
+static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
30
{
31
target_ulong pc = s->cs_base + eip;
32
33
- if (use_goto_tb(s, pc)) {
34
+ if (translator_use_goto_tb(&s->base, pc)) {
35
/* jump to same page: we can use a direct jump */
36
tcg_gen_goto_tb(tb_num);
37
gen_jmp_im(s, eip);
38
--
39
2.25.1
40
41
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Acked-by: Laurent Vivier <laurent@vivier.eu>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/m68k/translate.c | 12 +-----------
9
1 file changed, 1 insertion(+), 11 deletions(-)
10
11
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/m68k/translate.c
14
+++ b/target/m68k/translate.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_exit_tb(DisasContext *s)
16
} \
17
} while (0)
18
19
-static inline bool use_goto_tb(DisasContext *s, uint32_t dest)
20
-{
21
-#ifndef CONFIG_USER_ONLY
22
- return (s->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)
23
- || (s->base.pc_next & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
24
-#else
25
- return true;
26
-#endif
27
-}
28
-
29
/* Generate a jump to an immediate address. */
30
static void gen_jmp_tb(DisasContext *s, int n, uint32_t dest)
31
{
32
@@ -XXX,XX +XXX,XX @@ static void gen_jmp_tb(DisasContext *s, int n, uint32_t dest)
33
update_cc_op(s);
34
tcg_gen_movi_i32(QREG_PC, dest);
35
gen_singlestep_exception(s);
36
- } else if (use_goto_tb(s, dest)) {
37
+ } else if (translator_use_goto_tb(&s->base, dest)) {
38
tcg_gen_goto_tb(n);
39
tcg_gen_movi_i32(QREG_PC, dest);
40
tcg_gen_exit_tb(s->base.tb, n);
41
--
42
2.25.1
43
44
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/microblaze/translate.c | 11 +----------
8
1 file changed, 1 insertion(+), 10 deletions(-)
9
10
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/microblaze/translate.c
13
+++ b/target/microblaze/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void gen_raise_hw_excp(DisasContext *dc, uint32_t esr_ec)
15
gen_raise_exception_sync(dc, EXCP_HW_EXCP);
16
}
17
18
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
19
-{
20
-#ifndef CONFIG_USER_ONLY
21
- return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
22
-#else
23
- return true;
24
-#endif
25
-}
26
-
27
static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
28
{
29
if (dc->base.singlestep_enabled) {
30
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
31
tcg_gen_movi_i32(cpu_pc, dest);
32
gen_helper_raise_exception(cpu_env, tmp);
33
tcg_temp_free_i32(tmp);
34
- } else if (use_goto_tb(dc, dest)) {
35
+ } else if (translator_use_goto_tb(&dc->base, dest)) {
36
tcg_gen_goto_tb(n);
37
tcg_gen_movi_i32(cpu_pc, dest);
38
tcg_gen_exit_tb(dc->base.tb, n);
39
--
40
2.25.1
41
42
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/mips/tcg/translate.c | 17 ++---------------
8
1 file changed, 2 insertions(+), 15 deletions(-)
9
10
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/mips/tcg/translate.c
13
+++ b/target/mips/tcg/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void gen_trap(DisasContext *ctx, uint32_t opc,
15
tcg_temp_free(t1);
16
}
17
18
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
19
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
20
{
21
- if (unlikely(ctx->base.singlestep_enabled)) {
22
- return false;
23
- }
24
-
25
-#ifndef CONFIG_USER_ONLY
26
- return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
27
-#else
28
- return true;
29
-#endif
30
-}
31
-
32
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
33
-{
34
- if (use_goto_tb(ctx, dest)) {
35
+ if (translator_use_goto_tb(&ctx->base, dest)) {
36
tcg_gen_goto_tb(n);
37
gen_save_pc(dest);
38
tcg_gen_exit_tb(ctx->base.tb, n);
39
--
40
2.25.1
41
42
diff view generated by jsdifflib
Deleted patch
1
Do not emit dead code for the singlestep_enabled case,
2
after having exited the TB with a debug exception.
3
1
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/mips/tcg/translate.c | 3 ++-
8
1 file changed, 2 insertions(+), 1 deletion(-)
9
10
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/mips/tcg/translate.c
13
+++ b/target/mips/tcg/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
15
if (ctx->base.singlestep_enabled) {
16
save_cpu_state(ctx, 0);
17
gen_helper_raise_exception_debug(cpu_env);
18
+ } else {
19
+ tcg_gen_lookup_and_goto_ptr();
20
}
21
- tcg_gen_lookup_and_goto_ptr();
22
}
23
}
24
25
--
26
2.25.1
27
28
diff view generated by jsdifflib
Deleted patch
1
Reorder the control statements to allow using the page boundary
2
check from translator_use_goto_tb().
3
1
4
Reviewed-by: Stafford Horne <shorne@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/openrisc/translate.c | 15 ++++++++-------
8
1 file changed, 8 insertions(+), 7 deletions(-)
9
10
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/openrisc/translate.c
13
+++ b/target/openrisc/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
15
/* fallthru */
16
17
case DISAS_TOO_MANY:
18
- if (unlikely(dc->base.singlestep_enabled)) {
19
- tcg_gen_movi_tl(cpu_pc, jmp_dest);
20
- gen_exception(dc, EXCP_DEBUG);
21
- } else if ((dc->base.pc_first ^ jmp_dest) & TARGET_PAGE_MASK) {
22
- tcg_gen_movi_tl(cpu_pc, jmp_dest);
23
- tcg_gen_lookup_and_goto_ptr();
24
- } else {
25
+ if (translator_use_goto_tb(&dc->base, jmp_dest)) {
26
tcg_gen_goto_tb(0);
27
tcg_gen_movi_tl(cpu_pc, jmp_dest);
28
tcg_gen_exit_tb(dc->base.tb, 0);
29
+ break;
30
+ }
31
+ tcg_gen_movi_tl(cpu_pc, jmp_dest);
32
+ if (unlikely(dc->base.singlestep_enabled)) {
33
+ gen_exception(dc, EXCP_DEBUG);
34
+ } else {
35
+ tcg_gen_lookup_and_goto_ptr();
36
}
37
break;
38
39
--
40
2.25.1
41
42
diff view generated by jsdifflib
Deleted patch
1
Just use translator_use_goto_tb directly at the one call site,
2
rather than maintaining a local wrapper.
3
1
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/riscv/translate.c | 20 +-------------------
8
1 file changed, 1 insertion(+), 19 deletions(-)
9
10
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/riscv/translate.c
13
+++ b/target/riscv/translate.c
14
@@ -XXX,XX +XXX,XX @@ static void gen_exception_inst_addr_mis(DisasContext *ctx)
15
generate_exception_mtval(ctx, RISCV_EXCP_INST_ADDR_MIS);
16
}
17
18
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
19
-{
20
- if (unlikely(ctx->base.singlestep_enabled)) {
21
- return false;
22
- }
23
-
24
-#ifndef CONFIG_USER_ONLY
25
- return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
26
-#else
27
- return true;
28
-#endif
29
-}
30
-
31
static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
32
{
33
- if (use_goto_tb(ctx, dest)) {
34
- /* chaining is only allowed when the jump is to the same page */
35
+ if (translator_use_goto_tb(&ctx->base, dest)) {
36
tcg_gen_goto_tb(n);
37
tcg_gen_movi_tl(cpu_pc, dest);
38
-
39
- /* No need to check for single stepping here as use_goto_tb() will
40
- * return false in case of single stepping.
41
- */
42
tcg_gen_exit_tb(ctx->base.tb, n);
43
} else {
44
tcg_gen_movi_tl(cpu_pc, dest);
45
--
46
2.25.1
47
48
diff view generated by jsdifflib
1
In tcg_region_prologue_set, we reset TCGContext.code_gen_ptr.
1
This is always defined, and the optimization pass is
2
So do that after we've used it to dump the prologue contents.
2
essential to producing reasonable code.
3
3
4
Fixes: b0a0794a0f16
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/tcg.c | 4 ++--
7
tcg/tcg.c | 5 -----
8
1 file changed, 2 insertions(+), 2 deletions(-)
8
1 file changed, 5 deletions(-)
9
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@
15
(uintptr_t)s->code_buf, prologue_size);
15
* THE SOFTWARE.
16
*/
17
18
-/* define it to use liveness analysis (better code) */
19
-#define USE_TCG_OPTIMIZATIONS
20
-
21
#include "qemu/osdep.h"
22
23
/* Define to jump the ELF file used to communicate with GDB. */
24
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
25
qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
16
#endif
26
#endif
17
27
18
- tcg_region_prologue_set(s);
28
-#ifdef USE_TCG_OPTIMIZATIONS
19
-
29
tcg_optimize(s);
20
#ifdef DEBUG_DISAS
30
-#endif
21
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
31
22
FILE *logfile = qemu_log_lock();
32
#ifdef CONFIG_PROFILER
23
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
33
qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
24
tcg_debug_assert(tcg_code_gen_epilogue != NULL);
25
}
26
#endif
27
+
28
+ tcg_region_prologue_set(s);
29
}
30
31
void tcg_func_start(TCGContext *s)
32
--
34
--
33
2.25.1
35
2.34.1
34
36
35
37
diff view generated by jsdifflib
Deleted patch
1
The loop is performing a simple boolean test for the existence
2
of a BP_CPU breakpoint at EIP. Plus it gets the iteration wrong,
3
if we happen to have a BP_GDB breakpoint at the same address.
4
1
5
We have a function for this: cpu_breakpoint_test.
6
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
9
Message-Id: <20210620062317.1399034-1-richard.henderson@linaro.org>
10
---
11
target/i386/tcg/sysemu/bpt_helper.c | 12 +++---------
12
1 file changed, 3 insertions(+), 9 deletions(-)
13
14
diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/i386/tcg/sysemu/bpt_helper.c
17
+++ b/target/i386/tcg/sysemu/bpt_helper.c
18
@@ -XXX,XX +XXX,XX @@ void breakpoint_handler(CPUState *cs)
19
{
20
X86CPU *cpu = X86_CPU(cs);
21
CPUX86State *env = &cpu->env;
22
- CPUBreakpoint *bp;
23
24
if (cs->watchpoint_hit) {
25
if (cs->watchpoint_hit->flags & BP_CPU) {
26
@@ -XXX,XX +XXX,XX @@ void breakpoint_handler(CPUState *cs)
27
}
28
}
29
} else {
30
- QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
31
- if (bp->pc == env->eip) {
32
- if (bp->flags & BP_CPU) {
33
- check_hw_breakpoints(env, true);
34
- raise_exception(env, EXCP01_DB);
35
- }
36
- break;
37
- }
38
+ if (cpu_breakpoint_test(cs, env->eip, BP_CPU)) {
39
+ check_hw_breakpoints(env, true);
40
+ raise_exception(env, EXCP01_DB);
41
}
42
}
43
}
44
--
45
2.25.1
46
47
diff view generated by jsdifflib