1
The following changes since commit aa33508196f4e2da04625bee36e1f7be5b9267e7:
1
v3: One more try to fix macos issues.
2
2
3
Merge tag 'mem-2023-05-23' of https://github.com/davidhildenbrand/qemu into staging (2023-05-23 10:57:25 -0700)
3
4
r~
5
6
7
8
The following changes since commit e0209297cddd5e10a07e15fac5cca7aa1a8e0e59:
9
10
Merge tag 'pull-ufs-20250217' of https://gitlab.com/jeuk20.kim/qemu into staging (2025-02-18 10:58:48 +0800)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230523
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250215-3
8
15
9
for you to fetch changes up to 30d56836f98c7ed2d309bff1dde8854f3d0b5634:
16
for you to fetch changes up to e726f65867087d86436de05e9f372a86ec1381a6:
10
17
11
tcg: Remove USE_TCG_OPTIMIZATIONS (2023-05-23 16:52:39 -0700)
18
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 (2025-02-18 08:29:03 -0800)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
util: Host cpu detection for x86 and aa64
21
tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS
15
util: Use cpu detection for bufferiszero
22
tcg: Cleanups after disallowing 64-on-32
16
migration: Use cpu detection for xbzrle
23
tcg: Introduce constraint for zero register
17
tcg: Replace and remove cpu_atomic_{ld,st}o*
24
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64
18
host/include: Split qemu/atomic128.h
25
tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2
19
tcg: Remove DEBUG_DISAS
26
linux-user: Move TARGET_SA_RESTORER out of generic/signal.h
20
tcg: Remove USE_TCG_OPTIMIZATIONS
27
linux-user: Fix alignment when unmapping excess reservation
28
target/sparc: Fix register selection for all F*TOx and FxTO* instructions
29
target/sparc: Fix gdbstub incorrectly handling registers f32-f62
30
target/sparc: fake UltraSPARC T1 PCR and PIC registers
21
31
22
----------------------------------------------------------------
32
----------------------------------------------------------------
23
Richard Henderson (28):
33
Andreas Schwab (1):
24
util: Introduce host-specific cpuinfo.h
34
linux-user: Move TARGET_SA_RESTORER out of generic/signal.h
25
util: Add cpuinfo-i386.c
26
util: Add i386 CPUINFO_ATOMIC_VMOVDQU
27
tcg/i386: Use host/cpuinfo.h
28
util/bufferiszero: Use i386 host/cpuinfo.h
29
migration/xbzrle: Shuffle function order
30
migration/xbzrle: Use i386 host/cpuinfo.h
31
migration: Build migration_files once
32
util: Add cpuinfo-aarch64.c
33
include/host: Split out atomic128-cas.h
34
include/host: Split out atomic128-ldst.h
35
meson: Fix detect atomic128 support with optimization
36
include/qemu: Move CONFIG_ATOMIC128_OPT handling to atomic128.h
37
target/ppc: Use tcg_gen_qemu_{ld,st}_i128 for LQARX, LQ, STQ
38
target/s390x: Use tcg_gen_qemu_{ld,st}_i128 for LPQ, STPQ
39
accel/tcg: Unify cpu_{ld,st}*_{be,le}_mmu
40
target/s390x: Use cpu_{ld,st}*_mmu in do_csst
41
target/s390x: Always use cpu_atomic_cmpxchgl_be_mmu in do_csst
42
accel/tcg: Remove cpu_atomic_{ld,st}o_*_mmu
43
accel/tcg: Remove prot argument to atomic_mmu_lookup
44
accel/tcg: Eliminate #if on HAVE_ATOMIC128 and HAVE_CMPXCHG128
45
qemu/atomic128: Split atomic16_read
46
accel/tcg: Correctly use atomic128.h in ldst_atomicity.c.inc
47
tcg: Split out tcg/debug-assert.h
48
qemu/atomic128: Improve cmpxchg fallback for atomic16_set
49
qemu/atomic128: Add runtime test for FEAT_LSE2
50
tcg: Remove DEBUG_DISAS
51
tcg: Remove USE_TCG_OPTIMIZATIONS
52
35
53
accel/tcg/atomic_template.h | 93 +-----
36
Artyom Tarasenko (1):
54
host/include/aarch64/host/atomic128-cas.h | 45 +++
37
target/sparc: fake UltraSPARC T1 PCR and PIC registers
55
host/include/aarch64/host/atomic128-ldst.h | 79 +++++
38
56
host/include/aarch64/host/cpuinfo.h | 22 ++
39
Fabiano Rosas (1):
57
host/include/generic/host/atomic128-cas.h | 47 +++
40
elfload: Fix alignment when unmapping excess reservation
58
host/include/generic/host/atomic128-ldst.h | 81 +++++
41
59
host/include/generic/host/cpuinfo.h | 4 +
42
Mikael Szreder (2):
60
host/include/i386/host/cpuinfo.h | 39 +++
43
target/sparc: Fix register selection for all F*TOx and FxTO* instructions
61
host/include/x86_64/host/cpuinfo.h | 1 +
44
target/sparc: Fix gdbstub incorrectly handling registers f32-f62
62
include/exec/cpu_ldst.h | 67 +----
45
63
include/exec/exec-all.h | 3 -
46
Richard Henderson (23):
64
include/qemu/atomic128.h | 146 ++-------
47
tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS
65
include/tcg/debug-assert.h | 17 ++
48
tcg: Remove TCG_OVERSIZED_GUEST
66
include/tcg/tcg.h | 9 +-
49
tcg: Drop support for two address registers in gen_ldst
67
migration/xbzrle.h | 5 +-
50
tcg: Merge INDEX_op_qemu_*_{a32,a64}_*
68
target/ppc/cpu.h | 1 -
51
tcg/arm: Drop addrhi from prepare_host_addr
69
target/ppc/helper.h | 9 -
52
tcg/i386: Drop addrhi from prepare_host_addr
70
target/s390x/cpu.h | 3 -
53
tcg/mips: Drop addrhi from prepare_host_addr
71
target/s390x/helper.h | 4 -
54
tcg/ppc: Drop addrhi from prepare_host_addr
72
tcg/aarch64/tcg-target.h | 6 +-
55
tcg: Replace addr{lo,hi}_reg with addr_reg in TCGLabelQemuLdst
73
tcg/i386/tcg-target.h | 28 +-
56
plugins: Fix qemu_plugin_read_memory_vaddr parameters
74
accel/tcg/cpu-exec.c | 2 -
57
accel/tcg: Fix tlb_set_page_with_attrs, tlb_set_page
75
accel/tcg/cputlb.c | 211 ++++---------
58
target/loongarch: Use VADDR_PRIx for logging pc_next
76
accel/tcg/translate-all.c | 2 -
59
target/mips: Use VADDR_PRIx for logging pc_next
77
accel/tcg/translator.c | 2 -
60
include/exec: Change vaddr to uintptr_t
78
accel/tcg/user-exec.c | 332 ++++++--------------
61
include/exec: Use uintptr_t in CPUTLBEntry
79
migration/ram.c | 34 +--
62
tcg: Introduce the 'z' constraint for a hardware zero register
80
migration/xbzrle.c | 268 +++++++++--------
63
tcg/aarch64: Use 'z' constraint
81
target/arm/tcg/m_helper.c | 4 +-
64
tcg/loongarch64: Use 'z' constraint
82
target/ppc/mem_helper.c | 48 ---
65
tcg/mips: Use 'z' constraint
83
target/ppc/translate.c | 34 +--
66
tcg/riscv: Use 'z' constraint
84
target/s390x/tcg/mem_helper.c | 137 ++-------
67
tcg/sparc64: Use 'z' constraint
85
target/s390x/tcg/translate.c | 30 +-
68
tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2
86
target/sh4/translate.c | 2 -
69
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64
87
target/sparc/ldst_helper.c | 18 +-
70
88
target/sparc/translate.c | 2 -
71
include/exec/tlb-common.h | 10 +-
89
tcg/tcg.c | 14 +-
72
include/exec/vaddr.h | 16 +-
90
tests/bench/xbzrle-bench.c | 469 -----------------------------
73
include/qemu/atomic.h | 18 +-
91
tests/unit/test-xbzrle.c | 49 +--
74
include/tcg/oversized-guest.h | 23 ---
92
util/bufferiszero.c | 127 +++-----
75
include/tcg/tcg-opc.h | 28 +--
93
util/cpuinfo-aarch64.c | 67 +++++
76
include/tcg/tcg.h | 3 +-
94
util/cpuinfo-i386.c | 99 ++++++
77
linux-user/aarch64/target_signal.h | 2 +
95
MAINTAINERS | 3 +
78
linux-user/arm/target_signal.h | 2 +
96
accel/tcg/atomic_common.c.inc | 14 -
79
linux-user/generic/signal.h | 1 -
97
accel/tcg/ldst_atomicity.c.inc | 135 ++-------
80
linux-user/i386/target_signal.h | 2 +
98
accel/tcg/ldst_common.c.inc | 24 +-
81
linux-user/m68k/target_signal.h | 1 +
99
meson.build | 12 +-
82
linux-user/microblaze/target_signal.h | 2 +
100
migration/meson.build | 1 -
83
linux-user/ppc/target_signal.h | 2 +
101
target/ppc/translate/fixedpoint-impl.c.inc | 51 +---
84
linux-user/s390x/target_signal.h | 2 +
102
target/s390x/tcg/insn-data.h.inc | 2 +-
85
linux-user/sh4/target_signal.h | 2 +
103
tcg/aarch64/tcg-target.c.inc | 40 ---
86
linux-user/x86_64/target_signal.h | 2 +
104
tcg/i386/tcg-target.c.inc | 123 +-------
87
linux-user/xtensa/target_signal.h | 2 +
105
tests/bench/meson.build | 6 -
88
tcg/aarch64/tcg-target-con-set.h | 12 +-
106
util/meson.build | 6 +
89
tcg/aarch64/tcg-target.h | 2 +
107
54 files changed, 1035 insertions(+), 2042 deletions(-)
90
tcg/loongarch64/tcg-target-con-set.h | 15 +-
108
create mode 100644 host/include/aarch64/host/atomic128-cas.h
91
tcg/loongarch64/tcg-target-con-str.h | 1 -
109
create mode 100644 host/include/aarch64/host/atomic128-ldst.h
92
tcg/loongarch64/tcg-target-has.h | 2 -
110
create mode 100644 host/include/aarch64/host/cpuinfo.h
93
tcg/loongarch64/tcg-target.h | 2 +
111
create mode 100644 host/include/generic/host/atomic128-cas.h
94
tcg/mips/tcg-target-con-set.h | 26 +--
112
create mode 100644 host/include/generic/host/atomic128-ldst.h
95
tcg/mips/tcg-target-con-str.h | 1 -
113
create mode 100644 host/include/generic/host/cpuinfo.h
96
tcg/mips/tcg-target.h | 2 +
114
create mode 100644 host/include/i386/host/cpuinfo.h
97
tcg/riscv/tcg-target-con-set.h | 10 +-
115
create mode 100644 host/include/x86_64/host/cpuinfo.h
98
tcg/riscv/tcg-target-con-str.h | 1 -
116
create mode 100644 include/tcg/debug-assert.h
99
tcg/riscv/tcg-target-has.h | 2 -
117
delete mode 100644 tests/bench/xbzrle-bench.c
100
tcg/riscv/tcg-target.h | 2 +
118
create mode 100644 util/cpuinfo-aarch64.c
101
tcg/sparc64/tcg-target-con-set.h | 12 +-
119
create mode 100644 util/cpuinfo-i386.c
102
tcg/sparc64/tcg-target-con-str.h | 1 -
103
tcg/sparc64/tcg-target.h | 3 +-
104
tcg/tci/tcg-target.h | 1 -
105
accel/tcg/cputlb.c | 32 +---
106
accel/tcg/tcg-all.c | 9 +-
107
linux-user/elfload.c | 4 +-
108
plugins/api.c | 2 +-
109
target/arm/ptw.c | 34 ----
110
target/loongarch/tcg/translate.c | 2 +-
111
target/mips/tcg/octeon_translate.c | 4 +-
112
target/riscv/cpu_helper.c | 13 +-
113
target/sparc/gdbstub.c | 18 +-
114
target/sparc/translate.c | 19 +++
115
tcg/optimize.c | 21 +--
116
tcg/tcg-op-ldst.c | 103 +++--------
117
tcg/tcg.c | 97 +++++------
118
tcg/tci.c | 119 +++----------
119
docs/devel/multi-thread-tcg.rst | 1 -
120
docs/devel/tcg-ops.rst | 4 +-
121
target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 2 +-
122
target/sparc/insns.decode | 19 ++-
123
tcg/aarch64/tcg-target.c.inc | 86 ++++------
124
tcg/arm/tcg-target.c.inc | 114 ++++---------
125
tcg/i386/tcg-target.c.inc | 190 +++++----------------
126
tcg/loongarch64/tcg-target.c.inc | 72 +++-----
127
tcg/mips/tcg-target.c.inc | 169 ++++++------------
128
tcg/ppc/tcg-target.c.inc | 164 +++++-------------
129
tcg/riscv/tcg-target.c.inc | 56 +++---
130
tcg/s390x/tcg-target.c.inc | 40 ++---
131
tcg/sparc64/tcg-target.c.inc | 45 ++---
132
tcg/tci/tcg-target.c.inc | 60 ++-----
133
62 files changed, 550 insertions(+), 1162 deletions(-)
134
delete mode 100644 include/tcg/oversized-guest.h
diff view generated by jsdifflib
Deleted patch
1
The entire contents of the header is host-specific, but the
2
existence of such a header is not, which could prevent some
3
host specific ifdefs at the top of the file for the include.
4
1
5
Add host/include/{arch,generic} to the project arguments.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Juan Quintela <quintela@redhat.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
host/include/generic/host/cpuinfo.h | 4 ++++
13
meson.build | 10 ++++++++++
14
2 files changed, 14 insertions(+)
15
create mode 100644 host/include/generic/host/cpuinfo.h
16
17
diff --git a/host/include/generic/host/cpuinfo.h b/host/include/generic/host/cpuinfo.h
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/host/include/generic/host/cpuinfo.h
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * No host specific cpu indentification.
25
+ * SPDX-License-Identifier: GPL-2.0-or-later
26
+ */
27
diff --git a/meson.build b/meson.build
28
index XXXXXXX..XXXXXXX 100644
29
--- a/meson.build
30
+++ b/meson.build
31
@@ -XXX,XX +XXX,XX @@ add_project_arguments('-iquote', '.',
32
'-iquote', meson.current_source_dir() / 'include',
33
language: all_languages)
34
35
+# If a host-specific include directory exists, list that first...
36
+host_include = meson.current_source_dir() / 'host/include/'
37
+if fs.is_dir(host_include / host_arch)
38
+ add_project_arguments('-iquote', host_include / host_arch,
39
+ language: all_languages)
40
+endif
41
+# ... followed by the generic fallback.
42
+add_project_arguments('-iquote', host_include / 'generic',
43
+ language: all_languages)
44
+
45
sparse = find_program('cgcc', required: get_option('sparse'))
46
if sparse.found()
47
run_target('sparse',
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
Deleted patch
1
Add cpuinfo.h for i386 and x86_64, and the initialization
2
for that in util/. Populate that with a slightly altered
3
copy of the tcg host probing code. Other uses of cpuid.h
4
will be adjusted one patch at a time.
5
1
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Juan Quintela <quintela@redhat.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
host/include/i386/host/cpuinfo.h | 38 ++++++++++++
11
host/include/x86_64/host/cpuinfo.h | 1 +
12
util/cpuinfo-i386.c | 97 ++++++++++++++++++++++++++++++
13
MAINTAINERS | 2 +
14
util/meson.build | 4 ++
15
5 files changed, 142 insertions(+)
16
create mode 100644 host/include/i386/host/cpuinfo.h
17
create mode 100644 host/include/x86_64/host/cpuinfo.h
18
create mode 100644 util/cpuinfo-i386.c
19
20
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
21
new file mode 100644
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/host/include/i386/host/cpuinfo.h
25
@@ -XXX,XX +XXX,XX @@
26
+/*
27
+ * SPDX-License-Identifier: GPL-2.0-or-later
28
+ * Host specific cpu indentification for x86.
29
+ */
30
+
31
+#ifndef HOST_CPUINFO_H
32
+#define HOST_CPUINFO_H
33
+
34
+/* Digested version of <cpuid.h> */
35
+
36
+#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
37
+#define CPUINFO_CMOV (1u << 1)
38
+#define CPUINFO_MOVBE (1u << 2)
39
+#define CPUINFO_LZCNT (1u << 3)
40
+#define CPUINFO_POPCNT (1u << 4)
41
+#define CPUINFO_BMI1 (1u << 5)
42
+#define CPUINFO_BMI2 (1u << 6)
43
+#define CPUINFO_SSE2 (1u << 7)
44
+#define CPUINFO_SSE4 (1u << 8)
45
+#define CPUINFO_AVX1 (1u << 9)
46
+#define CPUINFO_AVX2 (1u << 10)
47
+#define CPUINFO_AVX512F (1u << 11)
48
+#define CPUINFO_AVX512VL (1u << 12)
49
+#define CPUINFO_AVX512BW (1u << 13)
50
+#define CPUINFO_AVX512DQ (1u << 14)
51
+#define CPUINFO_AVX512VBMI2 (1u << 15)
52
+#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
53
+
54
+/* Initialized with a constructor. */
55
+extern unsigned cpuinfo;
56
+
57
+/*
58
+ * We cannot rely on constructor ordering, so other constructors must
59
+ * use the function interface rather than the variable above.
60
+ */
61
+unsigned cpuinfo_init(void);
62
+
63
+#endif /* HOST_CPUINFO_H */
64
diff --git a/host/include/x86_64/host/cpuinfo.h b/host/include/x86_64/host/cpuinfo.h
65
new file mode 100644
66
index XXXXXXX..XXXXXXX
67
--- /dev/null
68
+++ b/host/include/x86_64/host/cpuinfo.h
69
@@ -0,0 +1 @@
70
+#include "host/include/i386/host/cpuinfo.h"
71
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
72
new file mode 100644
73
index XXXXXXX..XXXXXXX
74
--- /dev/null
75
+++ b/util/cpuinfo-i386.c
76
@@ -XXX,XX +XXX,XX @@
77
+/*
78
+ * SPDX-License-Identifier: GPL-2.0-or-later
79
+ * Host specific cpu indentification for x86.
80
+ */
81
+
82
+#include "qemu/osdep.h"
83
+#include "host/cpuinfo.h"
84
+#ifdef CONFIG_CPUID_H
85
+# include "qemu/cpuid.h"
86
+#endif
87
+
88
+unsigned cpuinfo;
89
+
90
+/* Called both as constructor and (possibly) via other constructors. */
91
+unsigned __attribute__((constructor)) cpuinfo_init(void)
92
+{
93
+ unsigned info = cpuinfo;
94
+
95
+ if (info) {
96
+ return info;
97
+ }
98
+
99
+#ifdef CONFIG_CPUID_H
100
+ unsigned max, a, b, c, d, b7 = 0, c7 = 0;
101
+
102
+ max = __get_cpuid_max(0, 0);
103
+
104
+ if (max >= 7) {
105
+ __cpuid_count(7, 0, a, b7, c7, d);
106
+ info |= (b7 & bit_BMI ? CPUINFO_BMI1 : 0);
107
+ info |= (b7 & bit_BMI2 ? CPUINFO_BMI2 : 0);
108
+ }
109
+
110
+ if (max >= 1) {
111
+ __cpuid(1, a, b, c, d);
112
+
113
+ info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
114
+ info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
115
+ info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
116
+ info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
117
+ info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
118
+
119
+ /* For AVX features, we must check available and usable. */
120
+ if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
121
+ unsigned bv = xgetbv_low(0);
122
+
123
+ if ((bv & 6) == 6) {
124
+ info |= CPUINFO_AVX1;
125
+ info |= (b7 & bit_AVX2 ? CPUINFO_AVX2 : 0);
126
+
127
+ if ((bv & 0xe0) == 0xe0) {
128
+ info |= (b7 & bit_AVX512F ? CPUINFO_AVX512F : 0);
129
+ info |= (b7 & bit_AVX512VL ? CPUINFO_AVX512VL : 0);
130
+ info |= (b7 & bit_AVX512BW ? CPUINFO_AVX512BW : 0);
131
+ info |= (b7 & bit_AVX512DQ ? CPUINFO_AVX512DQ : 0);
132
+ info |= (c7 & bit_AVX512VBMI2 ? CPUINFO_AVX512VBMI2 : 0);
133
+ }
134
+
135
+ /*
136
+ * The Intel SDM has added:
137
+ * Processors that enumerate support for Intel® AVX
138
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
139
+ * guarantee that the 16-byte memory operations performed
140
+ * by the following instructions will always be carried
141
+ * out atomically:
142
+ * - MOVAPD, MOVAPS, and MOVDQA.
143
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
144
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
145
+ * with EVEX.128 and k0 (masking disabled).
146
+ * Note that these instructions require the linear addresses
147
+ * of their memory operands to be 16-byte aligned.
148
+ *
149
+ * AMD has provided an even stronger guarantee that processors
150
+ * with AVX provide 16-byte atomicity for all cachable,
151
+ * naturally aligned single loads and stores, e.g. MOVDQU.
152
+ *
153
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
154
+ */
155
+ __cpuid(0, a, b, c, d);
156
+ if (c == signature_INTEL_ecx || c == signature_AMD_ecx) {
157
+ info |= CPUINFO_ATOMIC_VMOVDQA;
158
+ }
159
+ }
160
+ }
161
+ }
162
+
163
+ max = __get_cpuid_max(0x8000000, 0);
164
+ if (max >= 1) {
165
+ __cpuid(0x80000001, a, b, c, d);
166
+ info |= (c & bit_LZCNT ? CPUINFO_LZCNT : 0);
167
+ }
168
+#endif
169
+
170
+ info |= CPUINFO_ALWAYS;
171
+ cpuinfo = info;
172
+ return info;
173
+}
174
diff --git a/MAINTAINERS b/MAINTAINERS
175
index XXXXXXX..XXXXXXX 100644
176
--- a/MAINTAINERS
177
+++ b/MAINTAINERS
178
@@ -XXX,XX +XXX,XX @@ F: include/exec/helper*.h
179
F: include/sysemu/cpus.h
180
F: include/sysemu/tcg.h
181
F: include/hw/core/tcg-cpu-ops.h
182
+F: host/include/*/host/cpuinfo.h
183
+F: util/cpuinfo-*.c
184
185
FPU emulation
186
M: Aurelien Jarno <aurelien@aurel32.net>
187
diff --git a/util/meson.build b/util/meson.build
188
index XXXXXXX..XXXXXXX 100644
189
--- a/util/meson.build
190
+++ b/util/meson.build
191
@@ -XXX,XX +XXX,XX @@ if have_block
192
endif
193
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
194
endif
195
+
196
+if cpu in ['x86', 'x86_64']
197
+ util_ss.add(files('cpuinfo-i386.c'))
198
+endif
199
--
200
2.34.1
201
202
diff view generated by jsdifflib
Deleted patch
1
Add a bit to indicate when VMOVDQU is also atomic if aligned.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
host/include/i386/host/cpuinfo.h | 1 +
8
util/cpuinfo-i386.c | 4 +++-
9
2 files changed, 4 insertions(+), 1 deletion(-)
10
11
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/host/include/i386/host/cpuinfo.h
14
+++ b/host/include/i386/host/cpuinfo.h
15
@@ -XXX,XX +XXX,XX @@
16
#define CPUINFO_AVX512DQ (1u << 14)
17
#define CPUINFO_AVX512VBMI2 (1u << 15)
18
#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
19
+#define CPUINFO_ATOMIC_VMOVDQU (1u << 17)
20
21
/* Initialized with a constructor. */
22
extern unsigned cpuinfo;
23
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/util/cpuinfo-i386.c
26
+++ b/util/cpuinfo-i386.c
27
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
28
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
29
*/
30
__cpuid(0, a, b, c, d);
31
- if (c == signature_INTEL_ecx || c == signature_AMD_ecx) {
32
+ if (c == signature_INTEL_ecx) {
33
info |= CPUINFO_ATOMIC_VMOVDQA;
34
+ } else if (c == signature_AMD_ecx) {
35
+ info |= CPUINFO_ATOMIC_VMOVDQA | CPUINFO_ATOMIC_VMOVDQU;
36
}
37
}
38
}
39
--
40
2.34.1
41
42
diff view generated by jsdifflib
Deleted patch
1
Use the CPUINFO_* bits instead of the individual boolean
2
variables that we had been using. Remove all of the init
3
code that was moved over to cpuinfo-i386.c.
4
1
5
Note that have_avx512* check both AVX512{F,VL}, as we had
6
previously done during tcg_target_init.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/i386/tcg-target.h | 28 +++++----
13
tcg/i386/tcg-target.c.inc | 123 ++------------------------------------
14
2 files changed, 22 insertions(+), 129 deletions(-)
15
16
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/i386/tcg-target.h
19
+++ b/tcg/i386/tcg-target.h
20
@@ -XXX,XX +XXX,XX @@
21
#ifndef I386_TCG_TARGET_H
22
#define I386_TCG_TARGET_H
23
24
+#include "host/cpuinfo.h"
25
+
26
#define TCG_TARGET_INSN_UNIT_SIZE 1
27
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
28
29
@@ -XXX,XX +XXX,XX @@ typedef enum {
30
# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
31
#endif
32
33
-extern bool have_bmi1;
34
-extern bool have_popcnt;
35
-extern bool have_avx1;
36
-extern bool have_avx2;
37
-extern bool have_avx512bw;
38
-extern bool have_avx512dq;
39
-extern bool have_avx512vbmi2;
40
-extern bool have_avx512vl;
41
-extern bool have_movbe;
42
-extern bool have_atomic16;
43
+#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
44
+#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
45
+#define have_avx1 (cpuinfo & CPUINFO_AVX1)
46
+#define have_avx2 (cpuinfo & CPUINFO_AVX2)
47
+#define have_movbe (cpuinfo & CPUINFO_MOVBE)
48
+#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
49
+
50
+/*
51
+ * There are interesting instructions in AVX512, so long as we have AVX512VL,
52
+ * which indicates support for EVEX on sizes smaller than 512 bits.
53
+ */
54
+#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && \
55
+ (cpuinfo & CPUINFO_AVX512F))
56
+#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
57
+#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
58
+#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
59
60
/* optional instructions */
61
#define TCG_TARGET_HAS_div2_i32 1
62
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tcg/i386/tcg-target.c.inc
65
+++ b/tcg/i386/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
67
# define SOFTMMU_RESERVE_REGS 0
68
#endif
69
70
-/* The host compiler should supply <cpuid.h> to enable runtime features
71
- detection, as we're not going to go so far as our own inline assembly.
72
- If not available, default values will be assumed. */
73
-#if defined(CONFIG_CPUID_H)
74
-#include "qemu/cpuid.h"
75
-#endif
76
-
77
/* For 64-bit, we always know that CMOV is available. */
78
#if TCG_TARGET_REG_BITS == 64
79
-# define have_cmov 1
80
-#elif defined(CONFIG_CPUID_H)
81
-static bool have_cmov;
82
+# define have_cmov true
83
#else
84
-# define have_cmov 0
85
-#endif
86
-
87
-/* We need these symbols in tcg-target.h, and we can't properly conditionalize
88
- it there. Therefore we always define the variable. */
89
-bool have_bmi1;
90
-bool have_popcnt;
91
-bool have_avx1;
92
-bool have_avx2;
93
-bool have_avx512bw;
94
-bool have_avx512dq;
95
-bool have_avx512vbmi2;
96
-bool have_avx512vl;
97
-bool have_movbe;
98
-bool have_atomic16;
99
-
100
-#ifdef CONFIG_CPUID_H
101
-static bool have_bmi2;
102
-static bool have_lzcnt;
103
-#else
104
-# define have_bmi2 0
105
-# define have_lzcnt 0
106
+# define have_cmov (cpuinfo & CPUINFO_CMOV)
107
#endif
108
+#define have_bmi2 (cpuinfo & CPUINFO_BMI2)
109
+#define have_lzcnt (cpuinfo & CPUINFO_LZCNT)
110
111
static const tcg_insn_unit *tb_ret_addr;
112
113
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
114
115
static void tcg_target_init(TCGContext *s)
116
{
117
-#ifdef CONFIG_CPUID_H
118
- unsigned a, b, c, d, b7 = 0, c7 = 0;
119
- unsigned max = __get_cpuid_max(0, 0);
120
-
121
- if (max >= 7) {
122
- /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
123
- __cpuid_count(7, 0, a, b7, c7, d);
124
- have_bmi1 = (b7 & bit_BMI) != 0;
125
- have_bmi2 = (b7 & bit_BMI2) != 0;
126
- }
127
-
128
- if (max >= 1) {
129
- __cpuid(1, a, b, c, d);
130
-#ifndef have_cmov
131
- /* For 32-bit, 99% certainty that we're running on hardware that
132
- supports cmov, but we still need to check. In case cmov is not
133
- available, we'll use a small forward branch. */
134
- have_cmov = (d & bit_CMOV) != 0;
135
-#endif
136
-
137
- /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
138
- need to probe for it. */
139
- have_movbe = (c & bit_MOVBE) != 0;
140
- have_popcnt = (c & bit_POPCNT) != 0;
141
-
142
- /* There are a number of things we must check before we can be
143
- sure of not hitting invalid opcode. */
144
- if (c & bit_OSXSAVE) {
145
- unsigned bv = xgetbv_low(0);
146
-
147
- if ((bv & 6) == 6) {
148
- have_avx1 = (c & bit_AVX) != 0;
149
- have_avx2 = (b7 & bit_AVX2) != 0;
150
-
151
- /*
152
- * There are interesting instructions in AVX512, so long
153
- * as we have AVX512VL, which indicates support for EVEX
154
- * on sizes smaller than 512 bits. We are required to
155
- * check that OPMASK and all extended ZMM state are enabled
156
- * even if we're not using them -- the insns will fault.
157
- */
158
- if ((bv & 0xe0) == 0xe0
159
- && (b7 & bit_AVX512F)
160
- && (b7 & bit_AVX512VL)) {
161
- have_avx512vl = true;
162
- have_avx512bw = (b7 & bit_AVX512BW) != 0;
163
- have_avx512dq = (b7 & bit_AVX512DQ) != 0;
164
- have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
165
- }
166
-
167
- /*
168
- * The Intel SDM has added:
169
- * Processors that enumerate support for Intel® AVX
170
- * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
171
- * guarantee that the 16-byte memory operations performed
172
- * by the following instructions will always be carried
173
- * out atomically:
174
- * - MOVAPD, MOVAPS, and MOVDQA.
175
- * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
176
- * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
177
- * with EVEX.128 and k0 (masking disabled).
178
- * Note that these instructions require the linear addresses
179
- * of their memory operands to be 16-byte aligned.
180
- *
181
- * AMD has provided an even stronger guarantee that processors
182
- * with AVX provide 16-byte atomicity for all cachable,
183
- * naturally aligned single loads and stores, e.g. MOVDQU.
184
- *
185
- * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
186
- */
187
- if (have_avx1) {
188
- __cpuid(0, a, b, c, d);
189
- have_atomic16 = (c == signature_INTEL_ecx ||
190
- c == signature_AMD_ecx);
191
- }
192
- }
193
- }
194
- }
195
-
196
- max = __get_cpuid_max(0x8000000, 0);
197
- if (max >= 1) {
198
- __cpuid(0x80000001, a, b, c, d);
199
- /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
200
- have_lzcnt = (c & bit_LZCNT) != 0;
201
- }
202
-#endif /* CONFIG_CPUID_H */
203
-
204
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
205
if (TCG_TARGET_REG_BITS == 64) {
206
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
207
--
208
2.34.1
209
210
diff view generated by jsdifflib
Deleted patch
1
Use cpuinfo_init() during init_accel(), and the variable cpuinfo
2
during test_buffer_is_zero_next_accel(). Adjust the logic that
3
cycles through the set of accelerators for testing.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
util/bufferiszero.c | 127 ++++++++++++++++----------------------------
9
1 file changed, 46 insertions(+), 81 deletions(-)
10
11
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/util/bufferiszero.c
14
+++ b/util/bufferiszero.c
15
@@ -XXX,XX +XXX,XX @@
16
#include "qemu/osdep.h"
17
#include "qemu/cutils.h"
18
#include "qemu/bswap.h"
19
+#include "host/cpuinfo.h"
20
21
static bool
22
buffer_zero_int(const void *buf, size_t len)
23
@@ -XXX,XX +XXX,XX @@ buffer_zero_avx512(const void *buf, size_t len)
24
}
25
#endif /* CONFIG_AVX512F_OPT */
26
27
-
28
-/* Note that for test_buffer_is_zero_next_accel, the most preferred
29
- * ISA must have the least significant bit.
30
- */
31
-#define CACHE_AVX512F 1
32
-#define CACHE_AVX2 2
33
-#define CACHE_SSE4 4
34
-#define CACHE_SSE2 8
35
-
36
-/* Make sure that these variables are appropriately initialized when
37
+/*
38
+ * Make sure that these variables are appropriately initialized when
39
* SSE2 is enabled on the compiler command-line, but the compiler is
40
* too old to support CONFIG_AVX2_OPT.
41
*/
42
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
43
-# define INIT_CACHE 0
44
-# define INIT_ACCEL buffer_zero_int
45
+# define INIT_USED 0
46
+# define INIT_LENGTH 0
47
+# define INIT_ACCEL buffer_zero_int
48
#else
49
# ifndef __SSE2__
50
# error "ISA selection confusion"
51
# endif
52
-# define INIT_CACHE CACHE_SSE2
53
-# define INIT_ACCEL buffer_zero_sse2
54
+# define INIT_USED CPUINFO_SSE2
55
+# define INIT_LENGTH 64
56
+# define INIT_ACCEL buffer_zero_sse2
57
#endif
58
59
-static unsigned cpuid_cache = INIT_CACHE;
60
+static unsigned used_accel = INIT_USED;
61
+static unsigned length_to_accel = INIT_LENGTH;
62
static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
63
-static int length_to_accel = 64;
64
65
-static void init_accel(unsigned cache)
66
+static unsigned __attribute__((noinline))
67
+select_accel_cpuinfo(unsigned info)
68
{
69
- bool (*fn)(const void *, size_t) = buffer_zero_int;
70
- if (cache & CACHE_SSE2) {
71
- fn = buffer_zero_sse2;
72
- length_to_accel = 64;
73
- }
74
-#ifdef CONFIG_AVX2_OPT
75
- if (cache & CACHE_SSE4) {
76
- fn = buffer_zero_sse4;
77
- length_to_accel = 64;
78
- }
79
- if (cache & CACHE_AVX2) {
80
- fn = buffer_zero_avx2;
81
- length_to_accel = 128;
82
- }
83
-#endif
84
+ /* Array is sorted in order of algorithm preference. */
85
+ static const struct {
86
+ unsigned bit;
87
+ unsigned len;
88
+ bool (*fn)(const void *, size_t);
89
+ } all[] = {
90
#ifdef CONFIG_AVX512F_OPT
91
- if (cache & CACHE_AVX512F) {
92
- fn = buffer_zero_avx512;
93
- length_to_accel = 256;
94
- }
95
+ { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
96
#endif
97
- buffer_accel = fn;
98
+#ifdef CONFIG_AVX2_OPT
99
+ { CPUINFO_AVX2, 128, buffer_zero_avx2 },
100
+ { CPUINFO_SSE4, 64, buffer_zero_sse4 },
101
+#endif
102
+ { CPUINFO_SSE2, 64, buffer_zero_sse2 },
103
+ { CPUINFO_ALWAYS, 0, buffer_zero_int },
104
+ };
105
+
106
+ for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
107
+ if (info & all[i].bit) {
108
+ length_to_accel = all[i].len;
109
+ buffer_accel = all[i].fn;
110
+ return all[i].bit;
111
+ }
112
+ }
113
+ return 0;
114
}
115
116
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
117
-#include "qemu/cpuid.h"
118
-
119
-static void __attribute__((constructor)) init_cpuid_cache(void)
120
+static void __attribute__((constructor)) init_accel(void)
121
{
122
- unsigned max = __get_cpuid_max(0, NULL);
123
- int a, b, c, d;
124
- unsigned cache = 0;
125
-
126
- if (max >= 1) {
127
- __cpuid(1, a, b, c, d);
128
- if (d & bit_SSE2) {
129
- cache |= CACHE_SSE2;
130
- }
131
- if (c & bit_SSE4_1) {
132
- cache |= CACHE_SSE4;
133
- }
134
-
135
- /* We must check that AVX is not just available, but usable. */
136
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
137
- unsigned bv = xgetbv_low(0);
138
- __cpuid_count(7, 0, a, b, c, d);
139
- if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
140
- cache |= CACHE_AVX2;
141
- }
142
- /* 0xe6:
143
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
144
- * and ZMM16-ZMM31 state are enabled by OS)
145
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
146
- */
147
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
148
- cache |= CACHE_AVX512F;
149
- }
150
- }
151
- }
152
- cpuid_cache = cache;
153
- init_accel(cache);
154
+ used_accel = select_accel_cpuinfo(cpuinfo_init());
155
}
156
#endif /* CONFIG_AVX2_OPT */
157
158
bool test_buffer_is_zero_next_accel(void)
159
{
160
- /* If no bits set, we just tested buffer_zero_int, and there
161
- are no more acceleration options to test. */
162
- if (cpuid_cache == 0) {
163
- return false;
164
- }
165
- /* Disable the accelerator we used before and select a new one. */
166
- cpuid_cache &= cpuid_cache - 1;
167
- init_accel(cpuid_cache);
168
- return true;
169
+ /*
170
+ * Accumulate the accelerators that we've already tested, and
171
+ * remove them from the set to test this round. We'll get back
172
+ * a zero from select_accel_cpuinfo when there are no more.
173
+ */
174
+ unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
175
+ used_accel |= used;
176
+ return used;
177
}
178
179
static bool select_accel_fn(const void *buf, size_t len)
180
--
181
2.34.1
182
183
diff view generated by jsdifflib
Deleted patch
1
Place the CONFIG_AVX512BW_OPT block at the top,
2
which will aid function selection in the next patch.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Juan Quintela <quintela@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
migration/xbzrle.c | 244 ++++++++++++++++++++++-----------------------
9
1 file changed, 122 insertions(+), 122 deletions(-)
10
11
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/migration/xbzrle.c
14
+++ b/migration/xbzrle.c
15
@@ -XXX,XX +XXX,XX @@
16
#include "qemu/host-utils.h"
17
#include "xbzrle.h"
18
19
+#if defined(CONFIG_AVX512BW_OPT)
20
+#include <immintrin.h>
21
+
22
+int __attribute__((target("avx512bw")))
23
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
24
+ uint8_t *dst, int dlen)
25
+{
26
+ uint32_t zrun_len = 0, nzrun_len = 0;
27
+ int d = 0, i = 0, num = 0;
28
+ uint8_t *nzrun_start = NULL;
29
+ /* add 1 to include residual part in main loop */
30
+ uint32_t count512s = (slen >> 6) + 1;
31
+ /* countResidual is tail of data, i.e., countResidual = slen % 64 */
32
+ uint32_t count_residual = slen & 0b111111;
33
+ bool never_same = true;
34
+ uint64_t mask_residual = 1;
35
+ mask_residual <<= count_residual;
36
+ mask_residual -= 1;
37
+ __m512i r = _mm512_set1_epi32(0);
38
+
39
+ while (count512s) {
40
+ int bytes_to_check = 64;
41
+ uint64_t mask = 0xffffffffffffffff;
42
+ if (count512s == 1) {
43
+ bytes_to_check = count_residual;
44
+ mask = mask_residual;
45
+ }
46
+ __m512i old_data = _mm512_mask_loadu_epi8(r,
47
+ mask, old_buf + i);
48
+ __m512i new_data = _mm512_mask_loadu_epi8(r,
49
+ mask, new_buf + i);
50
+ uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
51
+ count512s--;
52
+
53
+ bool is_same = (comp & 0x1);
54
+ while (bytes_to_check) {
55
+ if (d + 2 > dlen) {
56
+ return -1;
57
+ }
58
+ if (is_same) {
59
+ if (nzrun_len) {
60
+ d += uleb128_encode_small(dst + d, nzrun_len);
61
+ if (d + nzrun_len > dlen) {
62
+ return -1;
63
+ }
64
+ nzrun_start = new_buf + i - nzrun_len;
65
+ memcpy(dst + d, nzrun_start, nzrun_len);
66
+ d += nzrun_len;
67
+ nzrun_len = 0;
68
+ }
69
+ /* 64 data at a time for speed */
70
+ if (count512s && (comp == 0xffffffffffffffff)) {
71
+ i += 64;
72
+ zrun_len += 64;
73
+ break;
74
+ }
75
+ never_same = false;
76
+ num = ctz64(~comp);
77
+ num = (num < bytes_to_check) ? num : bytes_to_check;
78
+ zrun_len += num;
79
+ bytes_to_check -= num;
80
+ comp >>= num;
81
+ i += num;
82
+ if (bytes_to_check) {
83
+ /* still has different data after same data */
84
+ d += uleb128_encode_small(dst + d, zrun_len);
85
+ zrun_len = 0;
86
+ } else {
87
+ break;
88
+ }
89
+ }
90
+ if (never_same || zrun_len) {
91
+ /*
92
+ * never_same only acts if
93
+ * data begins with diff in first count512s
94
+ */
95
+ d += uleb128_encode_small(dst + d, zrun_len);
96
+ zrun_len = 0;
97
+ never_same = false;
98
+ }
99
+ /* has diff, 64 data at a time for speed */
100
+ if ((bytes_to_check == 64) && (comp == 0x0)) {
101
+ i += 64;
102
+ nzrun_len += 64;
103
+ break;
104
+ }
105
+ num = ctz64(comp);
106
+ num = (num < bytes_to_check) ? num : bytes_to_check;
107
+ nzrun_len += num;
108
+ bytes_to_check -= num;
109
+ comp >>= num;
110
+ i += num;
111
+ if (bytes_to_check) {
112
+ /* mask like 111000 */
113
+ d += uleb128_encode_small(dst + d, nzrun_len);
114
+ /* overflow */
115
+ if (d + nzrun_len > dlen) {
116
+ return -1;
117
+ }
118
+ nzrun_start = new_buf + i - nzrun_len;
119
+ memcpy(dst + d, nzrun_start, nzrun_len);
120
+ d += nzrun_len;
121
+ nzrun_len = 0;
122
+ is_same = true;
123
+ }
124
+ }
125
+ }
126
+
127
+ if (nzrun_len != 0) {
128
+ d += uleb128_encode_small(dst + d, nzrun_len);
129
+ /* overflow */
130
+ if (d + nzrun_len > dlen) {
131
+ return -1;
132
+ }
133
+ nzrun_start = new_buf + i - nzrun_len;
134
+ memcpy(dst + d, nzrun_start, nzrun_len);
135
+ d += nzrun_len;
136
+ }
137
+ return d;
138
+}
139
+#endif
140
+
141
/*
142
page = zrun nzrun
143
| zrun nzrun page
144
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
145
146
return d;
147
}
148
-
149
-#if defined(CONFIG_AVX512BW_OPT)
150
-#include <immintrin.h>
151
-
152
-int __attribute__((target("avx512bw")))
153
-xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
154
- uint8_t *dst, int dlen)
155
-{
156
- uint32_t zrun_len = 0, nzrun_len = 0;
157
- int d = 0, i = 0, num = 0;
158
- uint8_t *nzrun_start = NULL;
159
- /* add 1 to include residual part in main loop */
160
- uint32_t count512s = (slen >> 6) + 1;
161
- /* countResidual is tail of data, i.e., countResidual = slen % 64 */
162
- uint32_t count_residual = slen & 0b111111;
163
- bool never_same = true;
164
- uint64_t mask_residual = 1;
165
- mask_residual <<= count_residual;
166
- mask_residual -= 1;
167
- __m512i r = _mm512_set1_epi32(0);
168
-
169
- while (count512s) {
170
- int bytes_to_check = 64;
171
- uint64_t mask = 0xffffffffffffffff;
172
- if (count512s == 1) {
173
- bytes_to_check = count_residual;
174
- mask = mask_residual;
175
- }
176
- __m512i old_data = _mm512_mask_loadu_epi8(r,
177
- mask, old_buf + i);
178
- __m512i new_data = _mm512_mask_loadu_epi8(r,
179
- mask, new_buf + i);
180
- uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
181
- count512s--;
182
-
183
- bool is_same = (comp & 0x1);
184
- while (bytes_to_check) {
185
- if (d + 2 > dlen) {
186
- return -1;
187
- }
188
- if (is_same) {
189
- if (nzrun_len) {
190
- d += uleb128_encode_small(dst + d, nzrun_len);
191
- if (d + nzrun_len > dlen) {
192
- return -1;
193
- }
194
- nzrun_start = new_buf + i - nzrun_len;
195
- memcpy(dst + d, nzrun_start, nzrun_len);
196
- d += nzrun_len;
197
- nzrun_len = 0;
198
- }
199
- /* 64 data at a time for speed */
200
- if (count512s && (comp == 0xffffffffffffffff)) {
201
- i += 64;
202
- zrun_len += 64;
203
- break;
204
- }
205
- never_same = false;
206
- num = ctz64(~comp);
207
- num = (num < bytes_to_check) ? num : bytes_to_check;
208
- zrun_len += num;
209
- bytes_to_check -= num;
210
- comp >>= num;
211
- i += num;
212
- if (bytes_to_check) {
213
- /* still has different data after same data */
214
- d += uleb128_encode_small(dst + d, zrun_len);
215
- zrun_len = 0;
216
- } else {
217
- break;
218
- }
219
- }
220
- if (never_same || zrun_len) {
221
- /*
222
- * never_same only acts if
223
- * data begins with diff in first count512s
224
- */
225
- d += uleb128_encode_small(dst + d, zrun_len);
226
- zrun_len = 0;
227
- never_same = false;
228
- }
229
- /* has diff, 64 data at a time for speed */
230
- if ((bytes_to_check == 64) && (comp == 0x0)) {
231
- i += 64;
232
- nzrun_len += 64;
233
- break;
234
- }
235
- num = ctz64(comp);
236
- num = (num < bytes_to_check) ? num : bytes_to_check;
237
- nzrun_len += num;
238
- bytes_to_check -= num;
239
- comp >>= num;
240
- i += num;
241
- if (bytes_to_check) {
242
- /* mask like 111000 */
243
- d += uleb128_encode_small(dst + d, nzrun_len);
244
- /* overflow */
245
- if (d + nzrun_len > dlen) {
246
- return -1;
247
- }
248
- nzrun_start = new_buf + i - nzrun_len;
249
- memcpy(dst + d, nzrun_start, nzrun_len);
250
- d += nzrun_len;
251
- nzrun_len = 0;
252
- is_same = true;
253
- }
254
- }
255
- }
256
-
257
- if (nzrun_len != 0) {
258
- d += uleb128_encode_small(dst + d, nzrun_len);
259
- /* overflow */
260
- if (d + nzrun_len > dlen) {
261
- return -1;
262
- }
263
- nzrun_start = new_buf + i - nzrun_len;
264
- memcpy(dst + d, nzrun_start, nzrun_len);
265
- d += nzrun_len;
266
- }
267
- return d;
268
-}
269
-#endif
270
--
271
2.34.1
272
273
diff view generated by jsdifflib
Deleted patch
1
Perform the function selection once, and only if CONFIG_AVX512_OPT
2
is enabled. Centralize the selection to xbzrle.c, instead of
3
spreading the init across 3 files.
4
1
5
Remove xbzrle-bench.c. The benefit of being able to benchmark
6
the different implementations is less important than not peeking
7
into the internals of the implementation.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Juan Quintela <quintela@redhat.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
migration/xbzrle.h | 5 +-
14
migration/ram.c | 34 +--
15
migration/xbzrle.c | 26 +-
16
tests/bench/xbzrle-bench.c | 469 -------------------------------------
17
tests/unit/test-xbzrle.c | 49 +---
18
tests/bench/meson.build | 6 -
19
6 files changed, 39 insertions(+), 550 deletions(-)
20
delete mode 100644 tests/bench/xbzrle-bench.c
21
22
diff --git a/migration/xbzrle.h b/migration/xbzrle.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/migration/xbzrle.h
25
+++ b/migration/xbzrle.h
26
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
27
uint8_t *dst, int dlen);
28
29
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
30
-#if defined(CONFIG_AVX512BW_OPT)
31
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
32
- uint8_t *dst, int dlen);
33
-#endif
34
+
35
#endif
36
diff --git a/migration/ram.c b/migration/ram.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/migration/ram.c
39
+++ b/migration/ram.c
40
@@ -XXX,XX +XXX,XX @@
41
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
42
/* We can't use any flag that is bigger than 0x200 */
43
44
-int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
45
- uint8_t *, int) = xbzrle_encode_buffer;
46
-#if defined(CONFIG_AVX512BW_OPT)
47
-#include "qemu/cpuid.h"
48
-static void __attribute__((constructor)) init_cpu_flag(void)
49
-{
50
- unsigned max = __get_cpuid_max(0, NULL);
51
- int a, b, c, d;
52
- if (max >= 1) {
53
- __cpuid(1, a, b, c, d);
54
- /* We must check that AVX is not just available, but usable. */
55
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
56
- int bv;
57
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
58
- __cpuid_count(7, 0, a, b, c, d);
59
- /* 0xe6:
60
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
61
- * and ZMM16-ZMM31 state are enabled by OS)
62
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
63
- */
64
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
65
- xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
66
- }
67
- }
68
- }
69
-}
70
-#endif
71
-
72
XBZRLECacheStats xbzrle_counters;
73
74
/* used by the search for pages to send */
75
@@ -XXX,XX +XXX,XX @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
76
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
77
78
/* XBZRLE encoding (if there is no overflow) */
79
- encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
80
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
81
- TARGET_PAGE_SIZE);
82
+ encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
83
+ TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
84
+ TARGET_PAGE_SIZE);
85
86
/*
87
* Update the cache contents, so that it corresponds to the data
88
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/migration/xbzrle.c
91
+++ b/migration/xbzrle.c
92
@@ -XXX,XX +XXX,XX @@
93
94
#if defined(CONFIG_AVX512BW_OPT)
95
#include <immintrin.h>
96
+#include "host/cpuinfo.h"
97
98
-int __attribute__((target("avx512bw")))
99
+static int __attribute__((target("avx512bw")))
100
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
101
uint8_t *dst, int dlen)
102
{
103
@@ -XXX,XX +XXX,XX @@ xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
104
}
105
return d;
106
}
107
+
108
+static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
109
+ int slen, uint8_t *dst, int dlen);
110
+
111
+static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
112
+
113
+static void __attribute__((constructor)) init_accel(void)
114
+{
115
+ unsigned info = cpuinfo_init();
116
+ if (info & CPUINFO_AVX512BW) {
117
+ accel_func = xbzrle_encode_buffer_avx512;
118
+ } else {
119
+ accel_func = xbzrle_encode_buffer_int;
120
+ }
121
+}
122
+
123
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
124
+ uint8_t *dst, int dlen)
125
+{
126
+ return accel_func(old_buf, new_buf, slen, dst, dlen);
127
+}
128
+
129
+#define xbzrle_encode_buffer xbzrle_encode_buffer_int
130
#endif
131
132
/*
133
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
134
deleted file mode 100644
135
index XXXXXXX..XXXXXXX
136
--- a/tests/bench/xbzrle-bench.c
137
+++ /dev/null
138
@@ -XXX,XX +XXX,XX @@
139
-/*
140
- * Xor Based Zero Run Length Encoding unit tests.
141
- *
142
- * Copyright 2013 Red Hat, Inc. and/or its affiliates
143
- *
144
- * Authors:
145
- * Orit Wasserman <owasserm@redhat.com>
146
- *
147
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
148
- * See the COPYING file in the top-level directory.
149
- *
150
- */
151
-#include "qemu/osdep.h"
152
-#include "qemu/cutils.h"
153
-#include "../migration/xbzrle.h"
154
-
155
-#if defined(CONFIG_AVX512BW_OPT)
156
-#define XBZRLE_PAGE_SIZE 4096
157
-static bool is_cpu_support_avx512bw;
158
-#include "qemu/cpuid.h"
159
-static void __attribute__((constructor)) init_cpu_flag(void)
160
-{
161
- unsigned max = __get_cpuid_max(0, NULL);
162
- int a, b, c, d;
163
- is_cpu_support_avx512bw = false;
164
- if (max >= 1) {
165
- __cpuid(1, a, b, c, d);
166
- /* We must check that AVX is not just available, but usable. */
167
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
168
- int bv;
169
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
170
- __cpuid_count(7, 0, a, b, c, d);
171
- /* 0xe6:
172
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
173
- * and ZMM16-ZMM31 state are enabled by OS)
174
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
175
- */
176
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
177
- is_cpu_support_avx512bw = true;
178
- }
179
- }
180
- }
181
- return ;
182
-}
183
-
184
-struct ResTime {
185
- float t_raw;
186
- float t_512;
187
-};
188
-
189
-
190
-/* Function prototypes
191
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
192
- uint8_t *dst, int dlen);
193
-*/
194
-static void encode_decode_zero(struct ResTime *res)
195
-{
196
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
197
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
198
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
199
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
200
- int i = 0;
201
- int dlen = 0, dlen512 = 0;
202
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
203
-
204
- for (i = diff_len; i > 0; i--) {
205
- buffer[1000 + i] = i;
206
- buffer512[1000 + i] = i;
207
- }
208
-
209
- buffer[1000 + diff_len + 3] = 103;
210
- buffer[1000 + diff_len + 5] = 105;
211
-
212
- buffer512[1000 + diff_len + 3] = 103;
213
- buffer512[1000 + diff_len + 5] = 105;
214
-
215
- /* encode zero page */
216
- time_t t_start, t_end, t_start512, t_end512;
217
- t_start = clock();
218
- dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
219
- XBZRLE_PAGE_SIZE);
220
- t_end = clock();
221
- float time_val = difftime(t_end, t_start);
222
- g_assert(dlen == 0);
223
-
224
- t_start512 = clock();
225
- dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
226
- compressed512, XBZRLE_PAGE_SIZE);
227
- t_end512 = clock();
228
- float time_val512 = difftime(t_end512, t_start512);
229
- g_assert(dlen512 == 0);
230
-
231
- res->t_raw = time_val;
232
- res->t_512 = time_val512;
233
-
234
- g_free(buffer);
235
- g_free(compressed);
236
- g_free(buffer512);
237
- g_free(compressed512);
238
-
239
-}
240
-
241
-static void test_encode_decode_zero_avx512(void)
242
-{
243
- int i;
244
- float time_raw = 0.0, time_512 = 0.0;
245
- struct ResTime res;
246
- for (i = 0; i < 10000; i++) {
247
- encode_decode_zero(&res);
248
- time_raw += res.t_raw;
249
- time_512 += res.t_512;
250
- }
251
- printf("Zero test:\n");
252
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
253
- printf("512 xbzrle_encode time is %f ms\n", time_512);
254
-}
255
-
256
-static void encode_decode_unchanged(struct ResTime *res)
257
-{
258
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
259
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
260
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
261
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
262
- int i = 0;
263
- int dlen = 0, dlen512 = 0;
264
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
265
-
266
- for (i = diff_len; i > 0; i--) {
267
- test[1000 + i] = i + 4;
268
- test512[1000 + i] = i + 4;
269
- }
270
-
271
- test[1000 + diff_len + 3] = 107;
272
- test[1000 + diff_len + 5] = 109;
273
-
274
- test512[1000 + diff_len + 3] = 107;
275
- test512[1000 + diff_len + 5] = 109;
276
-
277
- /* test unchanged buffer */
278
- time_t t_start, t_end, t_start512, t_end512;
279
- t_start = clock();
280
- dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
281
- XBZRLE_PAGE_SIZE);
282
- t_end = clock();
283
- float time_val = difftime(t_end, t_start);
284
- g_assert(dlen == 0);
285
-
286
- t_start512 = clock();
287
- dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
288
- compressed512, XBZRLE_PAGE_SIZE);
289
- t_end512 = clock();
290
- float time_val512 = difftime(t_end512, t_start512);
291
- g_assert(dlen512 == 0);
292
-
293
- res->t_raw = time_val;
294
- res->t_512 = time_val512;
295
-
296
- g_free(test);
297
- g_free(compressed);
298
- g_free(test512);
299
- g_free(compressed512);
300
-
301
-}
302
-
303
-static void test_encode_decode_unchanged_avx512(void)
304
-{
305
- int i;
306
- float time_raw = 0.0, time_512 = 0.0;
307
- struct ResTime res;
308
- for (i = 0; i < 10000; i++) {
309
- encode_decode_unchanged(&res);
310
- time_raw += res.t_raw;
311
- time_512 += res.t_512;
312
- }
313
- printf("Unchanged test:\n");
314
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
315
- printf("512 xbzrle_encode time is %f ms\n", time_512);
316
-}
317
-
318
-static void encode_decode_1_byte(struct ResTime *res)
319
-{
320
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
321
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
322
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
323
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
324
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
325
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
326
- int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
327
- uint8_t buf[2];
328
- uint8_t buf512[2];
329
-
330
- test[XBZRLE_PAGE_SIZE - 1] = 1;
331
- test512[XBZRLE_PAGE_SIZE - 1] = 1;
332
-
333
- time_t t_start, t_end, t_start512, t_end512;
334
- t_start = clock();
335
- dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
336
- XBZRLE_PAGE_SIZE);
337
- t_end = clock();
338
- float time_val = difftime(t_end, t_start);
339
- g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
340
-
341
- rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
342
- g_assert(rc == XBZRLE_PAGE_SIZE);
343
- g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
344
-
345
- t_start512 = clock();
346
- dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
347
- compressed512, XBZRLE_PAGE_SIZE);
348
- t_end512 = clock();
349
- float time_val512 = difftime(t_end512, t_start512);
350
- g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
351
-
352
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
353
- XBZRLE_PAGE_SIZE);
354
- g_assert(rc512 == XBZRLE_PAGE_SIZE);
355
- g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
356
-
357
- res->t_raw = time_val;
358
- res->t_512 = time_val512;
359
-
360
- g_free(buffer);
361
- g_free(compressed);
362
- g_free(test);
363
- g_free(buffer512);
364
- g_free(compressed512);
365
- g_free(test512);
366
-
367
-}
368
-
369
-static void test_encode_decode_1_byte_avx512(void)
370
-{
371
- int i;
372
- float time_raw = 0.0, time_512 = 0.0;
373
- struct ResTime res;
374
- for (i = 0; i < 10000; i++) {
375
- encode_decode_1_byte(&res);
376
- time_raw += res.t_raw;
377
- time_512 += res.t_512;
378
- }
379
- printf("1 byte test:\n");
380
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
381
- printf("512 xbzrle_encode time is %f ms\n", time_512);
382
-}
383
-
384
-static void encode_decode_overflow(struct ResTime *res)
385
-{
386
- uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
387
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
388
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
389
- uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
390
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
391
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
392
- int i = 0, rc = 0, rc512 = 0;
393
-
394
- for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
395
- test[i * 2] = 1;
396
- test512[i * 2] = 1;
397
- }
398
-
399
- /* encode overflow */
400
- time_t t_start, t_end, t_start512, t_end512;
401
- t_start = clock();
402
- rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
403
- XBZRLE_PAGE_SIZE);
404
- t_end = clock();
405
- float time_val = difftime(t_end, t_start);
406
- g_assert(rc == -1);
407
-
408
- t_start512 = clock();
409
- rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
410
- compressed512, XBZRLE_PAGE_SIZE);
411
- t_end512 = clock();
412
- float time_val512 = difftime(t_end512, t_start512);
413
- g_assert(rc512 == -1);
414
-
415
- res->t_raw = time_val;
416
- res->t_512 = time_val512;
417
-
418
- g_free(buffer);
419
- g_free(compressed);
420
- g_free(test);
421
- g_free(buffer512);
422
- g_free(compressed512);
423
- g_free(test512);
424
-
425
-}
426
-
427
-static void test_encode_decode_overflow_avx512(void)
428
-{
429
- int i;
430
- float time_raw = 0.0, time_512 = 0.0;
431
- struct ResTime res;
432
- for (i = 0; i < 10000; i++) {
433
- encode_decode_overflow(&res);
434
- time_raw += res.t_raw;
435
- time_512 += res.t_512;
436
- }
437
- printf("Overflow test:\n");
438
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
439
- printf("512 xbzrle_encode time is %f ms\n", time_512);
440
-}
441
-
442
-static void encode_decode_range_avx512(struct ResTime *res)
443
-{
444
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
445
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
446
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
447
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
448
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
449
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
450
- int i = 0, rc = 0, rc512 = 0;
451
- int dlen = 0, dlen512 = 0;
452
-
453
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
454
-
455
- for (i = diff_len; i > 0; i--) {
456
- buffer[1000 + i] = i;
457
- test[1000 + i] = i + 4;
458
- buffer512[1000 + i] = i;
459
- test512[1000 + i] = i + 4;
460
- }
461
-
462
- buffer[1000 + diff_len + 3] = 103;
463
- test[1000 + diff_len + 3] = 107;
464
-
465
- buffer[1000 + diff_len + 5] = 105;
466
- test[1000 + diff_len + 5] = 109;
467
-
468
- buffer512[1000 + diff_len + 3] = 103;
469
- test512[1000 + diff_len + 3] = 107;
470
-
471
- buffer512[1000 + diff_len + 5] = 105;
472
- test512[1000 + diff_len + 5] = 109;
473
-
474
- /* test encode/decode */
475
- time_t t_start, t_end, t_start512, t_end512;
476
- t_start = clock();
477
- dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
478
- XBZRLE_PAGE_SIZE);
479
- t_end = clock();
480
- float time_val = difftime(t_end, t_start);
481
- rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
482
- g_assert(rc < XBZRLE_PAGE_SIZE);
483
- g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
484
-
485
- t_start512 = clock();
486
- dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
487
- compressed512, XBZRLE_PAGE_SIZE);
488
- t_end512 = clock();
489
- float time_val512 = difftime(t_end512, t_start512);
490
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
491
- g_assert(rc512 < XBZRLE_PAGE_SIZE);
492
- g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
493
-
494
- res->t_raw = time_val;
495
- res->t_512 = time_val512;
496
-
497
- g_free(buffer);
498
- g_free(compressed);
499
- g_free(test);
500
- g_free(buffer512);
501
- g_free(compressed512);
502
- g_free(test512);
503
-
504
-}
505
-
506
-static void test_encode_decode_avx512(void)
507
-{
508
- int i;
509
- float time_raw = 0.0, time_512 = 0.0;
510
- struct ResTime res;
511
- for (i = 0; i < 10000; i++) {
512
- encode_decode_range_avx512(&res);
513
- time_raw += res.t_raw;
514
- time_512 += res.t_512;
515
- }
516
- printf("Encode decode test:\n");
517
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
518
- printf("512 xbzrle_encode time is %f ms\n", time_512);
519
-}
520
-
521
-static void encode_decode_random(struct ResTime *res)
522
-{
523
- uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
524
- uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
525
- uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
526
- uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
527
- uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
528
- uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
529
- int i = 0, rc = 0, rc512 = 0;
530
- int dlen = 0, dlen512 = 0;
531
-
532
- int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
533
- /* store the index of diff */
534
- int dirty_index[diff_len];
535
- for (int j = 0; j < diff_len; j++) {
536
- dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
537
- }
538
- for (i = diff_len - 1; i >= 0; i--) {
539
- buffer[dirty_index[i]] = i;
540
- test[dirty_index[i]] = i + 4;
541
- buffer512[dirty_index[i]] = i;
542
- test512[dirty_index[i]] = i + 4;
543
- }
544
-
545
- time_t t_start, t_end, t_start512, t_end512;
546
- t_start = clock();
547
- dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
548
- XBZRLE_PAGE_SIZE);
549
- t_end = clock();
550
- float time_val = difftime(t_end, t_start);
551
- rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
552
- g_assert(rc < XBZRLE_PAGE_SIZE);
553
-
554
- t_start512 = clock();
555
- dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
556
- compressed512, XBZRLE_PAGE_SIZE);
557
- t_end512 = clock();
558
- float time_val512 = difftime(t_end512, t_start512);
559
- rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
560
- g_assert(rc512 < XBZRLE_PAGE_SIZE);
561
-
562
- res->t_raw = time_val;
563
- res->t_512 = time_val512;
564
-
565
- g_free(buffer);
566
- g_free(compressed);
567
- g_free(test);
568
- g_free(buffer512);
569
- g_free(compressed512);
570
- g_free(test512);
571
-
572
-}
573
-
574
-static void test_encode_decode_random_avx512(void)
575
-{
576
- int i;
577
- float time_raw = 0.0, time_512 = 0.0;
578
- struct ResTime res;
579
- for (i = 0; i < 10000; i++) {
580
- encode_decode_random(&res);
581
- time_raw += res.t_raw;
582
- time_512 += res.t_512;
583
- }
584
- printf("Random test:\n");
585
- printf("Raw xbzrle_encode time is %f ms\n", time_raw);
586
- printf("512 xbzrle_encode time is %f ms\n", time_512);
587
-}
588
-#endif
589
-
590
-int main(int argc, char **argv)
591
-{
592
- g_test_init(&argc, &argv, NULL);
593
- g_test_rand_int();
594
- #if defined(CONFIG_AVX512BW_OPT)
595
- if (likely(is_cpu_support_avx512bw)) {
596
- g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
597
- g_test_add_func("/xbzrle/encode_decode_unchanged",
598
- test_encode_decode_unchanged_avx512);
599
- g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
600
- g_test_add_func("/xbzrle/encode_decode_overflow",
601
- test_encode_decode_overflow_avx512);
602
- g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
603
- g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
604
- }
605
- #endif
606
- return g_test_run();
607
-}
608
diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c
609
index XXXXXXX..XXXXXXX 100644
610
--- a/tests/unit/test-xbzrle.c
611
+++ b/tests/unit/test-xbzrle.c
612
@@ -XXX,XX +XXX,XX @@
613
614
#define XBZRLE_PAGE_SIZE 4096
615
616
-int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
617
- uint8_t *, int) = xbzrle_encode_buffer;
618
-#if defined(CONFIG_AVX512BW_OPT)
619
-#include "qemu/cpuid.h"
620
-static void __attribute__((constructor)) init_cpu_flag(void)
621
-{
622
- unsigned max = __get_cpuid_max(0, NULL);
623
- int a, b, c, d;
624
- if (max >= 1) {
625
- __cpuid(1, a, b, c, d);
626
- /* We must check that AVX is not just available, but usable. */
627
- if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
628
- int bv;
629
- __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
630
- __cpuid_count(7, 0, a, b, c, d);
631
- /* 0xe6:
632
- * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
633
- * and ZMM16-ZMM31 state are enabled by OS)
634
- * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
635
- */
636
- if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
637
- xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
638
- }
639
- }
640
- }
641
- return ;
642
-}
643
-#endif
644
-
645
static void test_uleb(void)
646
{
647
uint32_t i, val;
648
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_zero(void)
649
buffer[1000 + diff_len + 5] = 105;
650
651
/* encode zero page */
652
- dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
653
- XBZRLE_PAGE_SIZE);
654
+ dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE,
655
+ compressed, XBZRLE_PAGE_SIZE);
656
g_assert(dlen == 0);
657
658
g_free(buffer);
659
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_unchanged(void)
660
test[1000 + diff_len + 5] = 109;
661
662
/* test unchanged buffer */
663
- dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
664
- XBZRLE_PAGE_SIZE);
665
+ dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE,
666
+ compressed, XBZRLE_PAGE_SIZE);
667
g_assert(dlen == 0);
668
669
g_free(test);
670
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_1_byte(void)
671
672
test[XBZRLE_PAGE_SIZE - 1] = 1;
673
674
- dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
675
- XBZRLE_PAGE_SIZE);
676
+ dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
677
+ compressed, XBZRLE_PAGE_SIZE);
678
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
679
680
rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
681
@@ -XXX,XX +XXX,XX @@ static void test_encode_decode_overflow(void)
682
}
683
684
/* encode overflow */
685
- rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
686
- XBZRLE_PAGE_SIZE);
687
+ rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
688
+ compressed, XBZRLE_PAGE_SIZE);
689
g_assert(rc == -1);
690
691
g_free(buffer);
692
@@ -XXX,XX +XXX,XX @@ static void encode_decode_range(void)
693
test[1000 + diff_len + 5] = 109;
694
695
/* test encode/decode */
696
- dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
697
- XBZRLE_PAGE_SIZE);
698
+ dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE,
699
+ compressed, XBZRLE_PAGE_SIZE);
700
701
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
702
g_assert(rc < XBZRLE_PAGE_SIZE);
703
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
704
index XXXXXXX..XXXXXXX 100644
705
--- a/tests/bench/meson.build
706
+++ b/tests/bench/meson.build
707
@@ -XXX,XX +XXX,XX @@ qht_bench = executable('qht-bench',
708
sources: 'qht-bench.c',
709
dependencies: [qemuutil])
710
711
-if have_system
712
-xbzrle_bench = executable('xbzrle-bench',
713
- sources: 'xbzrle-bench.c',
714
- dependencies: [qemuutil,migration])
715
-endif
716
-
717
qtree_bench = executable('qtree-bench',
718
sources: 'qtree-bench.c',
719
dependencies: [qemuutil])
720
--
721
2.34.1
722
723
diff view generated by jsdifflib
Deleted patch
1
The items in migration_files are built for libmigration and included
2
info softmmu_ss from there; no need to also include them directly.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Juan Quintela <quintela@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
migration/meson.build | 1 -
9
1 file changed, 1 deletion(-)
10
11
diff --git a/migration/meson.build b/migration/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/migration/meson.build
14
+++ b/migration/meson.build
15
@@ -XXX,XX +XXX,XX @@ migration_files = files(
16
'qemu-file.c',
17
'yank_functions.c',
18
)
19
-softmmu_ss.add(migration_files)
20
21
softmmu_ss.add(files(
22
'block-dirty-bitmap.c',
23
--
24
2.34.1
25
26
diff view generated by jsdifflib
Deleted patch
1
Move the code from tcg/. The only use of these bits so far
2
is with respect to the atomicity of tcg operations.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
host/include/aarch64/host/cpuinfo.h | 22 ++++++++++
9
tcg/aarch64/tcg-target.h | 6 ++-
10
util/cpuinfo-aarch64.c | 67 +++++++++++++++++++++++++++++
11
tcg/aarch64/tcg-target.c.inc | 40 -----------------
12
util/meson.build | 4 +-
13
5 files changed, 96 insertions(+), 43 deletions(-)
14
create mode 100644 host/include/aarch64/host/cpuinfo.h
15
create mode 100644 util/cpuinfo-aarch64.c
16
17
diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
18
new file mode 100644
19
index XXXXXXX..XXXXXXX
20
--- /dev/null
21
+++ b/host/include/aarch64/host/cpuinfo.h
22
@@ -XXX,XX +XXX,XX @@
23
+/*
24
+ * SPDX-License-Identifier: GPL-2.0-or-later
25
+ * Host specific cpu indentification for AArch64.
26
+ */
27
+
28
+#ifndef HOST_CPUINFO_H
29
+#define HOST_CPUINFO_H
30
+
31
+#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
32
+#define CPUINFO_LSE (1u << 1)
33
+#define CPUINFO_LSE2 (1u << 2)
34
+
35
+/* Initialized with a constructor. */
36
+extern unsigned cpuinfo;
37
+
38
+/*
39
+ * We cannot rely on constructor ordering, so other constructors must
40
+ * use the function interface rather than the variable above.
41
+ */
42
+unsigned cpuinfo_init(void);
43
+
44
+#endif /* HOST_CPUINFO_H */
45
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/aarch64/tcg-target.h
48
+++ b/tcg/aarch64/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@
50
#ifndef AARCH64_TCG_TARGET_H
51
#define AARCH64_TCG_TARGET_H
52
53
+#include "host/cpuinfo.h"
54
+
55
#define TCG_TARGET_INSN_UNIT_SIZE 4
56
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
57
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
58
@@ -XXX,XX +XXX,XX @@ typedef enum {
59
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
60
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
61
62
-extern bool have_lse;
63
-extern bool have_lse2;
64
+#define have_lse (cpuinfo & CPUINFO_LSE)
65
+#define have_lse2 (cpuinfo & CPUINFO_LSE2)
66
67
/* optional instructions */
68
#define TCG_TARGET_HAS_div_i32 1
69
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
70
new file mode 100644
71
index XXXXXXX..XXXXXXX
72
--- /dev/null
73
+++ b/util/cpuinfo-aarch64.c
74
@@ -XXX,XX +XXX,XX @@
75
+/*
76
+ * SPDX-License-Identifier: GPL-2.0-or-later
77
+ * Host specific cpu indentification for AArch64.
78
+ */
79
+
80
+#include "qemu/osdep.h"
81
+#include "host/cpuinfo.h"
82
+
83
+#ifdef CONFIG_LINUX
84
+# ifdef CONFIG_GETAUXVAL
85
+# include <sys/auxv.h>
86
+# else
87
+# include <asm/hwcap.h>
88
+# include "elf.h"
89
+# endif
90
+#endif
91
+#ifdef CONFIG_DARWIN
92
+# include <sys/sysctl.h>
93
+#endif
94
+
95
+unsigned cpuinfo;
96
+
97
+#ifdef CONFIG_DARWIN
98
+static bool sysctl_for_bool(const char *name)
99
+{
100
+ int val = 0;
101
+ size_t len = sizeof(val);
102
+
103
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
104
+ return val != 0;
105
+ }
106
+
107
+ /*
108
+ * We might in the future ask for properties not present in older kernels,
109
+ * but we're only asking about static properties, all of which should be
110
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
111
+ * more exotic errors.
112
+ */
113
+ assert(errno == ENOENT);
114
+ return false;
115
+}
116
+#endif
117
+
118
+/* Called both as constructor and (possibly) via other constructors. */
119
+unsigned __attribute__((constructor)) cpuinfo_init(void)
120
+{
121
+ unsigned info = cpuinfo;
122
+
123
+ if (info) {
124
+ return info;
125
+ }
126
+
127
+ info = CPUINFO_ALWAYS;
128
+
129
+#ifdef CONFIG_LINUX
130
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
131
+ info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
132
+ info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
133
+#endif
134
+#ifdef CONFIG_DARWIN
135
+ info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
136
+ info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
137
+#endif
138
+
139
+ cpuinfo = info;
140
+ return info;
141
+}
142
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
143
index XXXXXXX..XXXXXXX 100644
144
--- a/tcg/aarch64/tcg-target.c.inc
145
+++ b/tcg/aarch64/tcg-target.c.inc
146
@@ -XXX,XX +XXX,XX @@
147
#include "../tcg-ldst.c.inc"
148
#include "../tcg-pool.c.inc"
149
#include "qemu/bitops.h"
150
-#ifdef __linux__
151
-#include <asm/hwcap.h>
152
-#endif
153
-#ifdef CONFIG_DARWIN
154
-#include <sys/sysctl.h>
155
-#endif
156
157
/* We're going to re-use TCGType in setting of the SF bit, which controls
158
the size of the operation performed. If we know the values match, it
159
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
160
return TCG_REG_X0 + slot;
161
}
162
163
-bool have_lse;
164
-bool have_lse2;
165
-
166
#define TCG_REG_TMP TCG_REG_X30
167
#define TCG_VEC_TMP TCG_REG_V31
168
169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
170
}
171
}
172
173
-#ifdef CONFIG_DARWIN
174
-static bool sysctl_for_bool(const char *name)
175
-{
176
- int val = 0;
177
- size_t len = sizeof(val);
178
-
179
- if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
180
- return val != 0;
181
- }
182
-
183
- /*
184
- * We might in the future ask for properties not present in older kernels,
185
- * but we're only asking about static properties, all of which should be
186
- * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
187
- * more exotic errors.
188
- */
189
- assert(errno == ENOENT);
190
- return false;
191
-}
192
-#endif
193
-
194
static void tcg_target_init(TCGContext *s)
195
{
196
-#ifdef __linux__
197
- unsigned long hwcap = qemu_getauxval(AT_HWCAP);
198
- have_lse = hwcap & HWCAP_ATOMICS;
199
- have_lse2 = hwcap & HWCAP_USCAT;
200
-#endif
201
-#ifdef CONFIG_DARWIN
202
- have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
203
- have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
204
-#endif
205
-
206
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
207
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
208
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
209
diff --git a/util/meson.build b/util/meson.build
210
index XXXXXXX..XXXXXXX 100644
211
--- a/util/meson.build
212
+++ b/util/meson.build
213
@@ -XXX,XX +XXX,XX @@ if have_block
214
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
215
endif
216
217
-if cpu in ['x86', 'x86_64']
218
+if cpu == 'aarch64'
219
+ util_ss.add(files('cpuinfo-aarch64.c'))
220
+elif cpu in ['x86', 'x86_64']
221
util_ss.add(files('cpuinfo-i386.c'))
222
endif
223
--
224
2.34.1
225
226
diff view generated by jsdifflib
Deleted patch
1
Separates the aarch64-specific portion into its own file.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
host/include/aarch64/host/atomic128-cas.h | 43 ++++++++++++++++++
7
host/include/generic/host/atomic128-cas.h | 43 ++++++++++++++++++
8
include/qemu/atomic128.h | 55 +----------------------
9
3 files changed, 87 insertions(+), 54 deletions(-)
10
create mode 100644 host/include/aarch64/host/atomic128-cas.h
11
create mode 100644 host/include/generic/host/atomic128-cas.h
12
13
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
17
+++ b/host/include/aarch64/host/atomic128-cas.h
18
@@ -XXX,XX +XXX,XX @@
19
+/*
20
+ * SPDX-License-Identifier: GPL-2.0-or-later
21
+ * Compare-and-swap for 128-bit atomic operations, AArch64 version.
22
+ *
23
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
24
+ *
25
+ * See docs/devel/atomics.rst for discussion about the guarantees each
26
+ * atomic primitive is meant to provide.
27
+ */
28
+
29
+#ifndef AARCH64_ATOMIC128_CAS_H
30
+#define AARCH64_ATOMIC128_CAS_H
31
+
32
+/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
33
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
34
+#include "host/include/generic/host/atomic128-cas.h"
35
+#else
36
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
+{
38
+ uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
39
+ uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
40
+ uint64_t oldl, oldh;
41
+ uint32_t tmp;
42
+
43
+ asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
44
+ "cmp %[oldl], %[cmpl]\n\t"
45
+ "ccmp %[oldh], %[cmph], #0, eq\n\t"
46
+ "b.ne 1f\n\t"
47
+ "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
48
+ "cbnz %w[tmp], 0b\n"
49
+ "1:"
50
+ : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
51
+ [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
52
+ : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
53
+ [newl] "r"(newl), [newh] "r"(newh)
54
+ : "memory", "cc");
55
+
56
+ return int128_make128(oldl, oldh);
57
+}
58
+# define HAVE_CMPXCHG128 1
59
+#endif
60
+
61
+#endif /* AARCH64_ATOMIC128_CAS_H */
62
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h
63
new file mode 100644
64
index XXXXXXX..XXXXXXX
65
--- /dev/null
66
+++ b/host/include/generic/host/atomic128-cas.h
67
@@ -XXX,XX +XXX,XX @@
68
+/*
69
+ * SPDX-License-Identifier: GPL-2.0-or-later
70
+ * Compare-and-swap for 128-bit atomic operations, generic version.
71
+ *
72
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
73
+ *
74
+ * See docs/devel/atomics.rst for discussion about the guarantees each
75
+ * atomic primitive is meant to provide.
76
+ */
77
+
78
+#ifndef HOST_ATOMIC128_CAS_H
79
+#define HOST_ATOMIC128_CAS_H
80
+
81
+#if defined(CONFIG_ATOMIC128)
82
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
83
+{
84
+ Int128Alias r, c, n;
85
+
86
+ c.s = cmp;
87
+ n.s = new;
88
+ r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
89
+ return r.s;
90
+}
91
+# define HAVE_CMPXCHG128 1
92
+#elif defined(CONFIG_CMPXCHG128)
93
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
94
+{
95
+ Int128Alias r, c, n;
96
+
97
+ c.s = cmp;
98
+ n.s = new;
99
+ r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
100
+ return r.s;
101
+}
102
+# define HAVE_CMPXCHG128 1
103
+#else
104
+/* Fallback definition that must be optimized away, or error. */
105
+Int128 QEMU_ERROR("unsupported atomic")
106
+ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
107
+# define HAVE_CMPXCHG128 0
108
+#endif
109
+
110
+#endif /* HOST_ATOMIC128_CAS_H */
111
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/include/qemu/atomic128.h
114
+++ b/include/qemu/atomic128.h
115
@@ -XXX,XX +XXX,XX @@
116
* Therefore, special case each platform.
117
*/
118
119
-#if defined(CONFIG_ATOMIC128)
120
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
121
-{
122
- Int128Alias r, c, n;
123
-
124
- c.s = cmp;
125
- n.s = new;
126
- r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
127
- return r.s;
128
-}
129
-# define HAVE_CMPXCHG128 1
130
-#elif defined(CONFIG_CMPXCHG128)
131
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
132
-{
133
- Int128Alias r, c, n;
134
-
135
- c.s = cmp;
136
- n.s = new;
137
- r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
138
- return r.s;
139
-}
140
-# define HAVE_CMPXCHG128 1
141
-#elif defined(__aarch64__)
142
-/* Through gcc 8, aarch64 has no support for 128-bit at all. */
143
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
144
-{
145
- uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
146
- uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
147
- uint64_t oldl, oldh;
148
- uint32_t tmp;
149
-
150
- asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
151
- "cmp %[oldl], %[cmpl]\n\t"
152
- "ccmp %[oldh], %[cmph], #0, eq\n\t"
153
- "b.ne 1f\n\t"
154
- "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
155
- "cbnz %w[tmp], 0b\n"
156
- "1:"
157
- : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
158
- [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
159
- : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
160
- [newl] "r"(newl), [newh] "r"(newh)
161
- : "memory", "cc");
162
-
163
- return int128_make128(oldl, oldh);
164
-}
165
-# define HAVE_CMPXCHG128 1
166
-#else
167
-/* Fallback definition that must be optimized away, or error. */
168
-Int128 QEMU_ERROR("unsupported atomic")
169
- atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
170
-# define HAVE_CMPXCHG128 0
171
-#endif /* Some definition for HAVE_CMPXCHG128 */
172
-
173
+#include "host/atomic128-cas.h"
174
175
#if defined(CONFIG_ATOMIC128)
176
static inline Int128 atomic16_read(Int128 *ptr)
177
--
178
2.34.1
179
180
diff view generated by jsdifflib
Deleted patch
1
Separates the aarch64-specific portion into its own file.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
host/include/aarch64/host/atomic128-ldst.h | 49 ++++++++++++++
7
host/include/generic/host/atomic128-ldst.h | 57 +++++++++++++++++
8
include/qemu/atomic128.h | 74 +---------------------
9
3 files changed, 107 insertions(+), 73 deletions(-)
10
create mode 100644 host/include/aarch64/host/atomic128-ldst.h
11
create mode 100644 host/include/generic/host/atomic128-ldst.h
12
13
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
17
+++ b/host/include/aarch64/host/atomic128-ldst.h
18
@@ -XXX,XX +XXX,XX @@
19
+/*
20
+ * SPDX-License-Identifier: GPL-2.0-or-later
21
+ * Load/store for 128-bit atomic operations, AArch64 version.
22
+ *
23
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
24
+ *
25
+ * See docs/devel/atomics.rst for discussion about the guarantees each
26
+ * atomic primitive is meant to provide.
27
+ */
28
+
29
+#ifndef AARCH64_ATOMIC128_LDST_H
30
+#define AARCH64_ATOMIC128_LDST_H
31
+
32
+/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
33
+#if !defined(CONFIG_ATOMIC128) && !defined(CONFIG_USER_ONLY)
34
+/* We can do better than cmpxchg for AArch64. */
35
+static inline Int128 atomic16_read(Int128 *ptr)
36
+{
37
+ uint64_t l, h;
38
+ uint32_t tmp;
39
+
40
+ /* The load must be paired with the store to guarantee not tearing. */
41
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
42
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
43
+ "cbnz %w[tmp], 0b"
44
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
45
+
46
+ return int128_make128(l, h);
47
+}
48
+
49
+static inline void atomic16_set(Int128 *ptr, Int128 val)
50
+{
51
+ uint64_t l = int128_getlo(val), h = int128_gethi(val);
52
+ uint64_t t1, t2;
53
+
54
+ /* Load into temporaries to acquire the exclusive access lock. */
55
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
56
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
57
+ "cbnz %w[t1], 0b"
58
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
59
+ : [l] "r"(l), [h] "r"(h));
60
+}
61
+
62
+# define HAVE_ATOMIC128 1
63
+#else
64
+#include "host/include/generic/host/atomic128-ldst.h"
65
+#endif
66
+
67
+#endif /* AARCH64_ATOMIC128_LDST_H */
68
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
69
new file mode 100644
70
index XXXXXXX..XXXXXXX
71
--- /dev/null
72
+++ b/host/include/generic/host/atomic128-ldst.h
73
@@ -XXX,XX +XXX,XX @@
74
+/*
75
+ * SPDX-License-Identifier: GPL-2.0-or-later
76
+ * Load/store for 128-bit atomic operations, generic version.
77
+ *
78
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
79
+ *
80
+ * See docs/devel/atomics.rst for discussion about the guarantees each
81
+ * atomic primitive is meant to provide.
82
+ */
83
+
84
+#ifndef HOST_ATOMIC128_LDST_H
85
+#define HOST_ATOMIC128_LDST_H
86
+
87
+#if defined(CONFIG_ATOMIC128)
88
+static inline Int128 atomic16_read(Int128 *ptr)
89
+{
90
+ Int128Alias r;
91
+
92
+ r.i = qatomic_read__nocheck((__int128_t *)ptr);
93
+ return r.s;
94
+}
95
+
96
+static inline void atomic16_set(Int128 *ptr, Int128 val)
97
+{
98
+ Int128Alias v;
99
+
100
+ v.s = val;
101
+ qatomic_set__nocheck((__int128_t *)ptr, v.i);
102
+}
103
+
104
+# define HAVE_ATOMIC128 1
105
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
106
+static inline Int128 atomic16_read(Int128 *ptr)
107
+{
108
+ /* Maybe replace 0 with 0, returning the old value. */
109
+ Int128 z = int128_make64(0);
110
+ return atomic16_cmpxchg(ptr, z, z);
111
+}
112
+
113
+static inline void atomic16_set(Int128 *ptr, Int128 val)
114
+{
115
+ Int128 old = *ptr, cmp;
116
+ do {
117
+ cmp = old;
118
+ old = atomic16_cmpxchg(ptr, cmp, val);
119
+ } while (int128_ne(old, cmp));
120
+}
121
+
122
+# define HAVE_ATOMIC128 1
123
+#else
124
+/* Fallback definitions that must be optimized away, or error. */
125
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
126
+void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
127
+# define HAVE_ATOMIC128 0
128
+#endif
129
+
130
+#endif /* HOST_ATOMIC128_LDST_H */
131
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/include/qemu/atomic128.h
134
+++ b/include/qemu/atomic128.h
135
@@ -XXX,XX +XXX,XX @@
136
*/
137
138
#include "host/atomic128-cas.h"
139
-
140
-#if defined(CONFIG_ATOMIC128)
141
-static inline Int128 atomic16_read(Int128 *ptr)
142
-{
143
- Int128Alias r;
144
-
145
- r.i = qatomic_read__nocheck((__int128_t *)ptr);
146
- return r.s;
147
-}
148
-
149
-static inline void atomic16_set(Int128 *ptr, Int128 val)
150
-{
151
- Int128Alias v;
152
-
153
- v.s = val;
154
- qatomic_set__nocheck((__int128_t *)ptr, v.i);
155
-}
156
-
157
-# define HAVE_ATOMIC128 1
158
-#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
159
-/* We can do better than cmpxchg for AArch64. */
160
-static inline Int128 atomic16_read(Int128 *ptr)
161
-{
162
- uint64_t l, h;
163
- uint32_t tmp;
164
-
165
- /* The load must be paired with the store to guarantee not tearing. */
166
- asm("0: ldxp %[l], %[h], %[mem]\n\t"
167
- "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
168
- "cbnz %w[tmp], 0b"
169
- : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
170
-
171
- return int128_make128(l, h);
172
-}
173
-
174
-static inline void atomic16_set(Int128 *ptr, Int128 val)
175
-{
176
- uint64_t l = int128_getlo(val), h = int128_gethi(val);
177
- uint64_t t1, t2;
178
-
179
- /* Load into temporaries to acquire the exclusive access lock. */
180
- asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
181
- "stxp %w[t1], %[l], %[h], %[mem]\n\t"
182
- "cbnz %w[t1], 0b"
183
- : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
184
- : [l] "r"(l), [h] "r"(h));
185
-}
186
-
187
-# define HAVE_ATOMIC128 1
188
-#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
189
-static inline Int128 atomic16_read(Int128 *ptr)
190
-{
191
- /* Maybe replace 0 with 0, returning the old value. */
192
- Int128 z = int128_make64(0);
193
- return atomic16_cmpxchg(ptr, z, z);
194
-}
195
-
196
-static inline void atomic16_set(Int128 *ptr, Int128 val)
197
-{
198
- Int128 old = *ptr, cmp;
199
- do {
200
- cmp = old;
201
- old = atomic16_cmpxchg(ptr, cmp, val);
202
- } while (int128_ne(old, cmp));
203
-}
204
-
205
-# define HAVE_ATOMIC128 1
206
-#else
207
-/* Fallback definitions that must be optimized away, or error. */
208
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
209
-void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
210
-# define HAVE_ATOMIC128 0
211
-#endif /* Some definition for HAVE_ATOMIC128 */
212
+#include "host/atomic128-ldst.h"
213
214
#endif /* QEMU_ATOMIC128_H */
215
--
216
2.34.1
217
218
diff view generated by jsdifflib
Deleted patch
1
Silly typo: sizeof(16) != 16.
2
1
3
Fixes: e61f1efeb730 ("meson: Detect atomic128 support with optimization")
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
meson.build | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ if has_int128
16
# __alignof(unsigned __int128) for the host.
17
atomic_test_128 = '''
18
int main(int ac, char **av) {
19
- unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
20
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
21
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
22
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
23
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
24
--
25
2.34.1
26
27
diff view generated by jsdifflib
Deleted patch
1
Not only the routines in ldst_atomicity.c.inc need markup,
2
but also the ones in the headers.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
host/include/generic/host/atomic128-cas.h | 12 ++++++++----
8
host/include/generic/host/atomic128-ldst.h | 18 ++++++++++++------
9
include/qemu/atomic128.h | 17 +++++++++++++++++
10
accel/tcg/ldst_atomicity.c.inc | 17 -----------------
11
4 files changed, 37 insertions(+), 27 deletions(-)
12
13
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/host/include/generic/host/atomic128-cas.h
16
+++ b/host/include/generic/host/atomic128-cas.h
17
@@ -XXX,XX +XXX,XX @@
18
#define HOST_ATOMIC128_CAS_H
19
20
#if defined(CONFIG_ATOMIC128)
21
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
22
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
23
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
24
{
25
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
26
Int128Alias r, c, n;
27
28
c.s = cmp;
29
n.s = new;
30
- r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
31
+ r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i);
32
return r.s;
33
}
34
# define HAVE_CMPXCHG128 1
35
#elif defined(CONFIG_CMPXCHG128)
36
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
38
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
39
{
40
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
41
Int128Alias r, c, n;
42
43
c.s = cmp;
44
n.s = new;
45
- r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
46
+ r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i);
47
return r.s;
48
}
49
# define HAVE_CMPXCHG128 1
50
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/host/include/generic/host/atomic128-ldst.h
53
+++ b/host/include/generic/host/atomic128-ldst.h
54
@@ -XXX,XX +XXX,XX @@
55
#define HOST_ATOMIC128_LDST_H
56
57
#if defined(CONFIG_ATOMIC128)
58
-static inline Int128 atomic16_read(Int128 *ptr)
59
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
60
+atomic16_read(Int128 *ptr)
61
{
62
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
63
Int128Alias r;
64
65
- r.i = qatomic_read__nocheck((__int128_t *)ptr);
66
+ r.i = qatomic_read__nocheck(ptr_align);
67
return r.s;
68
}
69
70
-static inline void atomic16_set(Int128 *ptr, Int128 val)
71
+static inline void ATTRIBUTE_ATOMIC128_OPT
72
+atomic16_set(Int128 *ptr, Int128 val)
73
{
74
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
75
Int128Alias v;
76
77
v.s = val;
78
- qatomic_set__nocheck((__int128_t *)ptr, v.i);
79
+ qatomic_set__nocheck(ptr_align, v.i);
80
}
81
82
# define HAVE_ATOMIC128 1
83
#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
84
-static inline Int128 atomic16_read(Int128 *ptr)
85
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
86
+atomic16_read(Int128 *ptr)
87
{
88
/* Maybe replace 0 with 0, returning the old value. */
89
Int128 z = int128_make64(0);
90
return atomic16_cmpxchg(ptr, z, z);
91
}
92
93
-static inline void atomic16_set(Int128 *ptr, Int128 val)
94
+static inline void ATTRIBUTE_ATOMIC128_OPT
95
+atomic16_set(Int128 *ptr, Int128 val)
96
{
97
Int128 old = *ptr, cmp;
98
do {
99
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/include/qemu/atomic128.h
102
+++ b/include/qemu/atomic128.h
103
@@ -XXX,XX +XXX,XX @@
104
105
#include "qemu/int128.h"
106
107
+/*
108
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
109
+ * that are supported by the host, e.g. s390x. We can force the pointer to
110
+ * have our known alignment with __builtin_assume_aligned, however prior to
111
+ * GCC 13 that was only reliable with optimization enabled. See
112
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
113
+ */
114
+#if defined(CONFIG_ATOMIC128_OPT)
115
+# if !defined(__OPTIMIZE__)
116
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
117
+# endif
118
+# define CONFIG_ATOMIC128
119
+#endif
120
+#ifndef ATTRIBUTE_ATOMIC128_OPT
121
+# define ATTRIBUTE_ATOMIC128_OPT
122
+#endif
123
+
124
/*
125
* GCC is a house divided about supporting large atomic operations.
126
*
127
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
128
index XXXXXXX..XXXXXXX 100644
129
--- a/accel/tcg/ldst_atomicity.c.inc
130
+++ b/accel/tcg/ldst_atomicity.c.inc
131
@@ -XXX,XX +XXX,XX @@
132
#endif
133
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
134
135
-/*
136
- * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
137
- * that are supported by the host, e.g. s390x. We can force the pointer to
138
- * have our known alignment with __builtin_assume_aligned, however prior to
139
- * GCC 13 that was only reliable with optimization enabled. See
140
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
141
- */
142
-#if defined(CONFIG_ATOMIC128_OPT)
143
-# if !defined(__OPTIMIZE__)
144
-# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
145
-# endif
146
-# define CONFIG_ATOMIC128
147
-#endif
148
-#ifndef ATTRIBUTE_ATOMIC128_OPT
149
-# define ATTRIBUTE_ATOMIC128_OPT
150
-#endif
151
-
152
#if defined(CONFIG_ATOMIC128)
153
# define HAVE_al16_fast true
154
#else
155
--
156
2.34.1
157
158
diff view generated by jsdifflib
Deleted patch
1
No need to roll our own, as this is now provided by tcg.
2
This was the last use of retxl, so remove that too.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/ppc/cpu.h | 1 -
8
target/ppc/helper.h | 9 ----
9
target/ppc/mem_helper.c | 48 --------------------
10
target/ppc/translate.c | 34 ++-------------
11
target/ppc/translate/fixedpoint-impl.c.inc | 51 +++-------------------
12
5 files changed, 11 insertions(+), 132 deletions(-)
13
14
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/ppc/cpu.h
17
+++ b/target/ppc/cpu.h
18
@@ -XXX,XX +XXX,XX @@ struct CPUArchState {
19
/* used to speed-up TLB assist handlers */
20
21
target_ulong nip; /* next instruction pointer */
22
- uint64_t retxh; /* high part of 128-bit helper return */
23
24
/* when a memory exception occurs, the access type is stored here */
25
int access_type;
26
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/ppc/helper.h
29
+++ b/target/ppc/helper.h
30
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
31
32
DEF_HELPER_1(tbegin, void, env)
33
DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
34
-
35
-#ifdef TARGET_PPC64
36
-DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
37
-DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
38
-DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
39
- void, env, tl, i64, i64, i32)
40
-DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
41
- void, env, tl, i64, i64, i32)
42
-#endif
43
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/ppc/mem_helper.c
46
+++ b/target/ppc/mem_helper.c
47
@@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
48
return i;
49
}
50
51
-#ifdef TARGET_PPC64
52
-uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
53
- uint32_t opidx)
54
-{
55
- Int128 ret;
56
-
57
- /* We will have raised EXCP_ATOMIC from the translator. */
58
- assert(HAVE_ATOMIC128);
59
- ret = cpu_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
60
- env->retxh = int128_gethi(ret);
61
- return int128_getlo(ret);
62
-}
63
-
64
-uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
65
- uint32_t opidx)
66
-{
67
- Int128 ret;
68
-
69
- /* We will have raised EXCP_ATOMIC from the translator. */
70
- assert(HAVE_ATOMIC128);
71
- ret = cpu_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
72
- env->retxh = int128_gethi(ret);
73
- return int128_getlo(ret);
74
-}
75
-
76
-void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
77
- uint64_t lo, uint64_t hi, uint32_t opidx)
78
-{
79
- Int128 val;
80
-
81
- /* We will have raised EXCP_ATOMIC from the translator. */
82
- assert(HAVE_ATOMIC128);
83
- val = int128_make128(lo, hi);
84
- cpu_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
85
-}
86
-
87
-void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
88
- uint64_t lo, uint64_t hi, uint32_t opidx)
89
-{
90
- Int128 val;
91
-
92
- /* We will have raised EXCP_ATOMIC from the translator. */
93
- assert(HAVE_ATOMIC128);
94
- val = int128_make128(lo, hi);
95
- cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
96
-}
97
-#endif
98
-
99
/*****************************************************************************/
100
/* Altivec extension helpers */
101
#if HOST_BIG_ENDIAN
102
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/ppc/translate.c
105
+++ b/target/ppc/translate.c
106
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
107
{
108
int rd = rD(ctx->opcode);
109
TCGv EA, hi, lo;
110
+ TCGv_i128 t16;
111
112
if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
113
(rd == rB(ctx->opcode)))) {
114
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
115
lo = cpu_gpr[rd + 1];
116
hi = cpu_gpr[rd];
117
118
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
119
- if (HAVE_ATOMIC128) {
120
- TCGv_i32 oi = tcg_temp_new_i32();
121
- if (ctx->le_mode) {
122
- tcg_gen_movi_i32(oi, make_memop_idx(MO_LE | MO_128 | MO_ALIGN,
123
- ctx->mem_idx));
124
- gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
125
- } else {
126
- tcg_gen_movi_i32(oi, make_memop_idx(MO_BE | MO_128 | MO_ALIGN,
127
- ctx->mem_idx));
128
- gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
129
- }
130
- tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
131
- } else {
132
- /* Restart with exclusive lock. */
133
- gen_helper_exit_atomic(cpu_env);
134
- ctx->base.is_jmp = DISAS_NORETURN;
135
- return;
136
- }
137
- } else if (ctx->le_mode) {
138
- tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEUQ | MO_ALIGN_16);
139
- tcg_gen_mov_tl(cpu_reserve, EA);
140
- gen_addr_add(ctx, EA, EA, 8);
141
- tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEUQ);
142
- } else {
143
- tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEUQ | MO_ALIGN_16);
144
- tcg_gen_mov_tl(cpu_reserve, EA);
145
- gen_addr_add(ctx, EA, EA, 8);
146
- tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEUQ);
147
- }
148
+ t16 = tcg_temp_new_i128();
149
+ tcg_gen_qemu_ld_i128(t16, EA, ctx->mem_idx, DEF_MEMOP(MO_128 | MO_ALIGN));
150
+ tcg_gen_extr_i128_i64(lo, hi, t16);
151
152
tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
153
tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));
154
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/target/ppc/translate/fixedpoint-impl.c.inc
157
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
158
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
159
#if defined(TARGET_PPC64)
160
TCGv ea;
161
TCGv_i64 low_addr_gpr, high_addr_gpr;
162
- MemOp mop;
163
+ TCGv_i128 t16;
164
165
REQUIRE_INSNS_FLAGS(ctx, 64BX);
166
167
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
168
low_addr_gpr = cpu_gpr[a->rt + 1];
169
high_addr_gpr = cpu_gpr[a->rt];
170
}
171
+ t16 = tcg_temp_new_i128();
172
173
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
174
- if (HAVE_ATOMIC128) {
175
- mop = DEF_MEMOP(MO_128);
176
- TCGv_i32 oi = tcg_constant_i32(make_memop_idx(mop, ctx->mem_idx));
177
- if (store) {
178
- if (ctx->le_mode) {
179
- gen_helper_stq_le_parallel(cpu_env, ea, low_addr_gpr,
180
- high_addr_gpr, oi);
181
- } else {
182
- gen_helper_stq_be_parallel(cpu_env, ea, high_addr_gpr,
183
- low_addr_gpr, oi);
184
-
185
- }
186
- } else {
187
- if (ctx->le_mode) {
188
- gen_helper_lq_le_parallel(low_addr_gpr, cpu_env, ea, oi);
189
- tcg_gen_ld_i64(high_addr_gpr, cpu_env,
190
- offsetof(CPUPPCState, retxh));
191
- } else {
192
- gen_helper_lq_be_parallel(high_addr_gpr, cpu_env, ea, oi);
193
- tcg_gen_ld_i64(low_addr_gpr, cpu_env,
194
- offsetof(CPUPPCState, retxh));
195
- }
196
- }
197
- } else {
198
- /* Restart with exclusive lock. */
199
- gen_helper_exit_atomic(cpu_env);
200
- ctx->base.is_jmp = DISAS_NORETURN;
201
- }
202
+ if (store) {
203
+ tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr);
204
+ tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
205
} else {
206
- mop = DEF_MEMOP(MO_UQ);
207
- if (store) {
208
- tcg_gen_qemu_st_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
209
- } else {
210
- tcg_gen_qemu_ld_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
211
- }
212
-
213
- gen_addr_add(ctx, ea, ea, 8);
214
-
215
- if (store) {
216
- tcg_gen_qemu_st_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
217
- } else {
218
- tcg_gen_qemu_ld_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
219
- }
220
+ tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
221
+ tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16);
222
}
223
#else
224
qemu_build_not_reached();
225
--
226
2.34.1
227
228
diff view generated by jsdifflib
Deleted patch
1
No need to roll our own, as this is now provided by tcg.
2
This was the last use of retxl, so remove that too.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/s390x/cpu.h | 3 --
9
target/s390x/helper.h | 4 ---
10
target/s390x/tcg/mem_helper.c | 61 --------------------------------
11
target/s390x/tcg/translate.c | 30 +++++-----------
12
target/s390x/tcg/insn-data.h.inc | 2 +-
13
5 files changed, 9 insertions(+), 91 deletions(-)
14
15
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/s390x/cpu.h
18
+++ b/target/s390x/cpu.h
19
@@ -XXX,XX +XXX,XX @@ struct CPUArchState {
20
21
float_status fpu_status; /* passed to softfloat lib */
22
23
- /* The low part of a 128-bit return, or remainder of a divide. */
24
- uint64_t retxl;
25
-
26
PSW psw;
27
28
S390CrashReason crash_reason;
29
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/s390x/helper.h
32
+++ b/target/s390x/helper.h
33
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
34
DEF_HELPER_FLAGS_2(srnm, TCG_CALL_NO_WG, void, env, i64)
35
DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
36
DEF_HELPER_2(stfle, i32, env, i64)
37
-DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
38
-DEF_HELPER_FLAGS_2(lpq_parallel, TCG_CALL_NO_WG, i64, env, i64)
39
-DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
40
-DEF_HELPER_FLAGS_4(stpq_parallel, TCG_CALL_NO_WG, void, env, i64, i64, i64)
41
DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
42
DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
43
DEF_HELPER_4(cu14, i32, env, i32, i32, i32)
44
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/s390x/tcg/mem_helper.c
47
+++ b/target/s390x/tcg/mem_helper.c
48
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
49
}
50
#endif
51
52
-/* load pair from quadword */
53
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
54
-{
55
- uintptr_t ra = GETPC();
56
- uint64_t hi, lo;
57
-
58
- check_alignment(env, addr, 16, ra);
59
- hi = cpu_ldq_data_ra(env, addr + 0, ra);
60
- lo = cpu_ldq_data_ra(env, addr + 8, ra);
61
-
62
- env->retxl = lo;
63
- return hi;
64
-}
65
-
66
-uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
67
-{
68
- uintptr_t ra = GETPC();
69
- uint64_t hi, lo;
70
- int mem_idx;
71
- MemOpIdx oi;
72
- Int128 v;
73
-
74
- assert(HAVE_ATOMIC128);
75
-
76
- mem_idx = cpu_mmu_index(env, false);
77
- oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
78
- v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
79
- hi = int128_gethi(v);
80
- lo = int128_getlo(v);
81
-
82
- env->retxl = lo;
83
- return hi;
84
-}
85
-
86
-/* store pair to quadword */
87
-void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
88
- uint64_t low, uint64_t high)
89
-{
90
- uintptr_t ra = GETPC();
91
-
92
- check_alignment(env, addr, 16, ra);
93
- cpu_stq_data_ra(env, addr + 0, high, ra);
94
- cpu_stq_data_ra(env, addr + 8, low, ra);
95
-}
96
-
97
-void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
98
- uint64_t low, uint64_t high)
99
-{
100
- uintptr_t ra = GETPC();
101
- int mem_idx;
102
- MemOpIdx oi;
103
- Int128 v;
104
-
105
- assert(HAVE_ATOMIC128);
106
-
107
- mem_idx = cpu_mmu_index(env, false);
108
- oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
109
- v = int128_make128(low, high);
110
- cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
111
-}
112
-
113
/* Execute instruction. This instruction executes an insn modified with
114
the contents of r1. It does not change the executed instruction in memory;
115
it does not change the program counter.
116
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/s390x/tcg/translate.c
119
+++ b/target/s390x/tcg/translate.c
120
@@ -XXX,XX +XXX,XX @@ static void store_freg32_i64(int reg, TCGv_i64 v)
121
tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
122
}
123
124
-static void return_low128(TCGv_i64 dest)
125
-{
126
- tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
127
-}
128
-
129
static void update_psw_addr(DisasContext *s)
130
{
131
/* psw.addr */
132
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
133
134
static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
135
{
136
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
137
- gen_helper_lpq(o->out, cpu_env, o->in2);
138
- } else if (HAVE_ATOMIC128) {
139
- gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
140
- } else {
141
- gen_helper_exit_atomic(cpu_env);
142
- return DISAS_NORETURN;
143
- }
144
- return_low128(o->out2);
145
+ o->out_128 = tcg_temp_new_i128();
146
+ tcg_gen_qemu_ld_i128(o->out_128, o->in2, get_mem_index(s),
147
+ MO_TE | MO_128 | MO_ALIGN);
148
return DISAS_NEXT;
149
}
150
151
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
152
153
static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
154
{
155
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
156
- gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
157
- } else if (HAVE_ATOMIC128) {
158
- gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
159
- } else {
160
- gen_helper_exit_atomic(cpu_env);
161
- return DISAS_NORETURN;
162
- }
163
+ TCGv_i128 t16 = tcg_temp_new_i128();
164
+
165
+ tcg_gen_concat_i64_i128(t16, o->out2, o->out);
166
+ tcg_gen_qemu_st_i128(t16, o->in2, get_mem_index(s),
167
+ MO_TE | MO_128 | MO_ALIGN);
168
return DISAS_NEXT;
169
}
170
171
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
172
index XXXXXXX..XXXXXXX 100644
173
--- a/target/s390x/tcg/insn-data.h.inc
174
+++ b/target/s390x/tcg/insn-data.h.inc
175
@@ -XXX,XX +XXX,XX @@
176
D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
177
D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEUQ)
178
/* LOAD PAIR FROM QUADWORD */
179
- C(0xe38f, LPQ, RXY_a, Z, 0, a2, r1_P, 0, lpq, 0)
180
+ C(0xe38f, LPQ, RXY_a, Z, 0, a2, 0, r1_D64, lpq, 0)
181
/* LOAD POSITIVE */
182
C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32)
183
C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64)
184
--
185
2.34.1
186
187
diff view generated by jsdifflib
Deleted patch
1
With the current structure of cputlb.c, there is no difference
2
between the little-endian and big-endian entry points, aside
3
from the assert. Unify the pairs of functions.
4
1
5
The only use of the functions with explicit endianness was in
6
target/sparc64, and that was only to satisfy the assert: the
7
correct endianness is already built into memop.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
include/exec/cpu_ldst.h | 58 ++-----
14
accel/tcg/cputlb.c | 122 +++-----------
15
accel/tcg/user-exec.c | 322 ++++++++++--------------------------
16
target/arm/tcg/m_helper.c | 4 +-
17
target/sparc/ldst_helper.c | 18 +-
18
accel/tcg/ldst_common.c.inc | 24 +--
19
6 files changed, 137 insertions(+), 411 deletions(-)
20
21
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/exec/cpu_ldst.h
24
+++ b/include/exec/cpu_ldst.h
25
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
26
int mmu_idx, uintptr_t ra);
27
28
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
29
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr ptr,
30
- MemOpIdx oi, uintptr_t ra);
31
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr ptr,
32
- MemOpIdx oi, uintptr_t ra);
33
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr ptr,
34
- MemOpIdx oi, uintptr_t ra);
35
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr ptr,
36
- MemOpIdx oi, uintptr_t ra);
37
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
38
- MemOpIdx oi, uintptr_t ra);
39
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
40
- MemOpIdx oi, uintptr_t ra);
41
-
42
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
43
- MemOpIdx oi, uintptr_t ra);
44
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
45
- MemOpIdx oi, uintptr_t ra);
46
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
47
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
48
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
49
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra);
50
51
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
52
MemOpIdx oi, uintptr_t ra);
53
-void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
54
- MemOpIdx oi, uintptr_t ra);
55
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
56
- MemOpIdx oi, uintptr_t ra);
57
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
58
- MemOpIdx oi, uintptr_t ra);
59
-void cpu_stw_le_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
60
- MemOpIdx oi, uintptr_t ra);
61
-void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
62
- MemOpIdx oi, uintptr_t ra);
63
-void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
64
- MemOpIdx oi, uintptr_t ra);
65
-
66
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
67
- MemOpIdx oi, uintptr_t ra);
68
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
69
- MemOpIdx oi, uintptr_t ra);
70
+void cpu_stw_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
71
+ MemOpIdx oi, uintptr_t ra);
72
+void cpu_stl_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
73
+ MemOpIdx oi, uintptr_t ra);
74
+void cpu_stq_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
75
+ MemOpIdx oi, uintptr_t ra);
76
+void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
77
+ MemOpIdx oi, uintptr_t ra);
78
79
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
80
uint32_t cmpv, uint32_t newv,
81
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
82
# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra
83
# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra
84
# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra
85
-# define cpu_ldw_mmu cpu_ldw_be_mmu
86
-# define cpu_ldl_mmu cpu_ldl_be_mmu
87
-# define cpu_ldq_mmu cpu_ldq_be_mmu
88
# define cpu_stw_data cpu_stw_be_data
89
# define cpu_stl_data cpu_stl_be_data
90
# define cpu_stq_data cpu_stq_be_data
91
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
92
# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra
93
# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra
94
# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra
95
-# define cpu_stw_mmu cpu_stw_be_mmu
96
-# define cpu_stl_mmu cpu_stl_be_mmu
97
-# define cpu_stq_mmu cpu_stq_be_mmu
98
#else
99
# define cpu_lduw_data cpu_lduw_le_data
100
# define cpu_ldsw_data cpu_ldsw_le_data
101
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
102
# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra
103
# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra
104
# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra
105
-# define cpu_ldw_mmu cpu_ldw_le_mmu
106
-# define cpu_ldl_mmu cpu_ldl_le_mmu
107
-# define cpu_ldq_mmu cpu_ldq_le_mmu
108
# define cpu_stw_data cpu_stw_le_data
109
# define cpu_stl_data cpu_stl_le_data
110
# define cpu_stq_data cpu_stq_le_data
111
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
112
# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra
113
# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra
114
# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra
115
-# define cpu_stw_mmu cpu_stw_le_mmu
116
-# define cpu_stl_mmu cpu_stl_le_mmu
117
-# define cpu_stq_mmu cpu_stq_le_mmu
118
#endif
119
120
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
121
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/accel/tcg/cputlb.c
124
+++ b/accel/tcg/cputlb.c
125
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
126
return ret;
127
}
128
129
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
130
- MemOpIdx oi, uintptr_t ra)
131
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
132
+ MemOpIdx oi, uintptr_t ra)
133
{
134
uint16_t ret;
135
136
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
137
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
138
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
139
plugin_load_cb(env, addr, oi);
140
return ret;
141
}
142
143
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
144
- MemOpIdx oi, uintptr_t ra)
145
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
146
+ MemOpIdx oi, uintptr_t ra)
147
{
148
uint32_t ret;
149
150
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
151
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
152
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
153
plugin_load_cb(env, addr, oi);
154
return ret;
155
}
156
157
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
158
- MemOpIdx oi, uintptr_t ra)
159
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
160
+ MemOpIdx oi, uintptr_t ra)
161
{
162
uint64_t ret;
163
164
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
165
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
166
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
167
plugin_load_cb(env, addr, oi);
168
return ret;
169
}
170
171
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
172
- MemOpIdx oi, uintptr_t ra)
173
-{
174
- uint16_t ret;
175
-
176
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
177
- ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
178
- plugin_load_cb(env, addr, oi);
179
- return ret;
180
-}
181
-
182
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
183
- MemOpIdx oi, uintptr_t ra)
184
-{
185
- uint32_t ret;
186
-
187
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
188
- ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
189
- plugin_load_cb(env, addr, oi);
190
- return ret;
191
-}
192
-
193
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
194
- MemOpIdx oi, uintptr_t ra)
195
-{
196
- uint64_t ret;
197
-
198
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
199
- ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
200
- plugin_load_cb(env, addr, oi);
201
- return ret;
202
-}
203
-
204
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
205
- MemOpIdx oi, uintptr_t ra)
206
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
207
+ MemOpIdx oi, uintptr_t ra)
208
{
209
Int128 ret;
210
211
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
212
- ret = do_ld16_mmu(env, addr, oi, ra);
213
- plugin_load_cb(env, addr, oi);
214
- return ret;
215
-}
216
-
217
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
218
- MemOpIdx oi, uintptr_t ra)
219
-{
220
- Int128 ret;
221
-
222
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
223
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
224
ret = do_ld16_mmu(env, addr, oi, ra);
225
plugin_load_cb(env, addr, oi);
226
return ret;
227
@@ -XXX,XX +XXX,XX @@ void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
228
plugin_store_cb(env, addr, oi);
229
}
230
231
-void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
232
- MemOpIdx oi, uintptr_t retaddr)
233
+void cpu_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
234
+ MemOpIdx oi, uintptr_t retaddr)
235
{
236
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
237
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
238
do_st2_mmu(env, addr, val, oi, retaddr);
239
plugin_store_cb(env, addr, oi);
240
}
241
242
-void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
243
+void cpu_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
244
MemOpIdx oi, uintptr_t retaddr)
245
{
246
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
247
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
248
do_st4_mmu(env, addr, val, oi, retaddr);
249
plugin_store_cb(env, addr, oi);
250
}
251
252
-void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
253
- MemOpIdx oi, uintptr_t retaddr)
254
+void cpu_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
255
+ MemOpIdx oi, uintptr_t retaddr)
256
{
257
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
258
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
259
do_st8_mmu(env, addr, val, oi, retaddr);
260
plugin_store_cb(env, addr, oi);
261
}
262
263
-void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
264
- MemOpIdx oi, uintptr_t retaddr)
265
+void cpu_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
266
+ MemOpIdx oi, uintptr_t retaddr)
267
{
268
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
269
- do_st2_mmu(env, addr, val, oi, retaddr);
270
- plugin_store_cb(env, addr, oi);
271
-}
272
-
273
-void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
- MemOpIdx oi, uintptr_t retaddr)
275
-{
276
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
277
- do_st4_mmu(env, addr, val, oi, retaddr);
278
- plugin_store_cb(env, addr, oi);
279
-}
280
-
281
-void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
282
- MemOpIdx oi, uintptr_t retaddr)
283
-{
284
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
285
- do_st8_mmu(env, addr, val, oi, retaddr);
286
- plugin_store_cb(env, addr, oi);
287
-}
288
-
289
-void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
290
- MemOpIdx oi, uintptr_t retaddr)
291
-{
292
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
293
- do_st16_mmu(env, addr, val, oi, retaddr);
294
- plugin_store_cb(env, addr, oi);
295
-}
296
-
297
-void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
298
- MemOpIdx oi, uintptr_t retaddr)
299
-{
300
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
302
do_st16_mmu(env, addr, val, oi, retaddr);
303
plugin_store_cb(env, addr, oi);
304
}
305
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/accel/tcg/user-exec.c
308
+++ b/accel/tcg/user-exec.c
309
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
310
return ret;
311
}
312
313
-static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
314
- MemOp mop, uintptr_t ra)
315
+static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
316
+ MemOp mop, uintptr_t ra)
317
{
318
void *haddr;
319
uint16_t ret;
320
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
321
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
322
ret = load_atom_2(env, ra, haddr, mop);
323
clear_helper_retaddr();
324
+
325
+ if (mop & MO_BSWAP) {
326
+ ret = bswap16(ret);
327
+ }
328
return ret;
329
}
330
331
tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
332
MemOpIdx oi, uintptr_t ra)
333
{
334
- MemOp mop = get_memop(oi);
335
- uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
336
-
337
- if (mop & MO_BSWAP) {
338
- ret = bswap16(ret);
339
- }
340
- return ret;
341
+ return do_ld2_mmu(env, addr, get_memop(oi), ra);
342
}
343
344
tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
345
MemOpIdx oi, uintptr_t ra)
346
{
347
- MemOp mop = get_memop(oi);
348
- int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
349
+ return (int16_t)do_ld2_mmu(env, addr, get_memop(oi), ra);
350
+}
351
352
- if (mop & MO_BSWAP) {
353
- ret = bswap16(ret);
354
- }
355
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
356
+ MemOpIdx oi, uintptr_t ra)
357
+{
358
+ uint16_t ret = do_ld2_mmu(env, addr, get_memop(oi), ra);
359
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
360
return ret;
361
}
362
363
-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
364
- MemOpIdx oi, uintptr_t ra)
365
-{
366
- MemOp mop = get_memop(oi);
367
- uint16_t ret;
368
-
369
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
370
- ret = do_ld2_he_mmu(env, addr, mop, ra);
371
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
372
- return cpu_to_be16(ret);
373
-}
374
-
375
-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
376
- MemOpIdx oi, uintptr_t ra)
377
-{
378
- MemOp mop = get_memop(oi);
379
- uint16_t ret;
380
-
381
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
382
- ret = do_ld2_he_mmu(env, addr, mop, ra);
383
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
384
- return cpu_to_le16(ret);
385
-}
386
-
387
-static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
388
- MemOp mop, uintptr_t ra)
389
+static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
390
+ MemOp mop, uintptr_t ra)
391
{
392
void *haddr;
393
uint32_t ret;
394
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
395
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
396
ret = load_atom_4(env, ra, haddr, mop);
397
clear_helper_retaddr();
398
+
399
+ if (mop & MO_BSWAP) {
400
+ ret = bswap32(ret);
401
+ }
402
return ret;
403
}
404
405
tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
406
MemOpIdx oi, uintptr_t ra)
407
{
408
- MemOp mop = get_memop(oi);
409
- uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
410
-
411
- if (mop & MO_BSWAP) {
412
- ret = bswap32(ret);
413
- }
414
- return ret;
415
+ return do_ld4_mmu(env, addr, get_memop(oi), ra);
416
}
417
418
tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
419
MemOpIdx oi, uintptr_t ra)
420
{
421
- MemOp mop = get_memop(oi);
422
- int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
423
+ return (int32_t)do_ld4_mmu(env, addr, get_memop(oi), ra);
424
+}
425
426
- if (mop & MO_BSWAP) {
427
- ret = bswap32(ret);
428
- }
429
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
430
+ MemOpIdx oi, uintptr_t ra)
431
+{
432
+ uint32_t ret = do_ld4_mmu(env, addr, get_memop(oi), ra);
433
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
434
return ret;
435
}
436
437
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
438
- MemOpIdx oi, uintptr_t ra)
439
-{
440
- MemOp mop = get_memop(oi);
441
- uint32_t ret;
442
-
443
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
- ret = do_ld4_he_mmu(env, addr, mop, ra);
445
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
446
- return cpu_to_be32(ret);
447
-}
448
-
449
-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
450
- MemOpIdx oi, uintptr_t ra)
451
-{
452
- MemOp mop = get_memop(oi);
453
- uint32_t ret;
454
-
455
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
456
- ret = do_ld4_he_mmu(env, addr, mop, ra);
457
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
458
- return cpu_to_le32(ret);
459
-}
460
-
461
-static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
462
- MemOp mop, uintptr_t ra)
463
+static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
464
+ MemOp mop, uintptr_t ra)
465
{
466
void *haddr;
467
uint64_t ret;
468
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
469
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
470
ret = load_atom_8(env, ra, haddr, mop);
471
clear_helper_retaddr();
472
- return ret;
473
-}
474
-
475
-uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
476
- MemOpIdx oi, uintptr_t ra)
477
-{
478
- MemOp mop = get_memop(oi);
479
- uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
480
481
if (mop & MO_BSWAP) {
482
ret = bswap64(ret);
483
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
484
return ret;
485
}
486
487
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
488
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
489
MemOpIdx oi, uintptr_t ra)
490
{
491
- MemOp mop = get_memop(oi);
492
- uint64_t ret;
493
-
494
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
495
- ret = do_ld8_he_mmu(env, addr, mop, ra);
496
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
497
- return cpu_to_be64(ret);
498
+ return do_ld8_mmu(env, addr, get_memop(oi), ra);
499
}
500
501
-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
502
- MemOpIdx oi, uintptr_t ra)
503
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
504
+ MemOpIdx oi, uintptr_t ra)
505
{
506
- MemOp mop = get_memop(oi);
507
- uint64_t ret;
508
-
509
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
510
- ret = do_ld8_he_mmu(env, addr, mop, ra);
511
+ uint64_t ret = do_ld8_mmu(env, addr, get_memop(oi), ra);
512
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
513
- return cpu_to_le64(ret);
514
+ return ret;
515
}
516
517
-static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
518
- MemOp mop, uintptr_t ra)
519
+static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
520
+ MemOp mop, uintptr_t ra)
521
{
522
void *haddr;
523
Int128 ret;
524
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
525
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
526
ret = load_atom_16(env, ra, haddr, mop);
527
clear_helper_retaddr();
528
- return ret;
529
-}
530
-
531
-Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
532
- MemOpIdx oi, uintptr_t ra)
533
-{
534
- MemOp mop = get_memop(oi);
535
- Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
536
537
if (mop & MO_BSWAP) {
538
ret = bswap128(ret);
539
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
540
return ret;
541
}
542
543
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
544
+ MemOpIdx oi, uintptr_t ra)
545
+{
546
+ return do_ld16_mmu(env, addr, get_memop(oi), ra);
547
+}
548
+
549
Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
550
{
551
return helper_ld16_mmu(env, addr, oi, GETPC());
552
}
553
554
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
555
- MemOpIdx oi, uintptr_t ra)
556
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
557
+ MemOpIdx oi, uintptr_t ra)
558
{
559
- MemOp mop = get_memop(oi);
560
- Int128 ret;
561
-
562
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
563
- ret = do_ld16_he_mmu(env, addr, mop, ra);
564
+ Int128 ret = do_ld16_mmu(env, addr, get_memop(oi), ra);
565
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
566
- if (!HOST_BIG_ENDIAN) {
567
- ret = bswap128(ret);
568
- }
569
- return ret;
570
-}
571
-
572
-Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
573
- MemOpIdx oi, uintptr_t ra)
574
-{
575
- MemOp mop = get_memop(oi);
576
- Int128 ret;
577
-
578
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
579
- ret = do_ld16_he_mmu(env, addr, mop, ra);
580
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
581
- if (HOST_BIG_ENDIAN) {
582
- ret = bswap128(ret);
583
- }
584
return ret;
585
}
586
587
@@ -XXX,XX +XXX,XX @@ void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
589
}
590
591
-static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
592
- MemOp mop, uintptr_t ra)
593
+static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
594
+ MemOp mop, uintptr_t ra)
595
{
596
void *haddr;
597
598
tcg_debug_assert((mop & MO_SIZE) == MO_16);
599
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
600
+
601
+ if (mop & MO_BSWAP) {
602
+ val = bswap16(val);
603
+ }
604
store_atom_2(env, ra, haddr, mop, val);
605
clear_helper_retaddr();
606
}
607
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
608
void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
609
MemOpIdx oi, uintptr_t ra)
610
{
611
- MemOp mop = get_memop(oi);
612
-
613
- if (mop & MO_BSWAP) {
614
- val = bswap16(val);
615
- }
616
- do_st2_he_mmu(env, addr, val, mop, ra);
617
+ do_st2_mmu(env, addr, val, get_memop(oi), ra);
618
}
619
620
-void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
621
+void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
622
MemOpIdx oi, uintptr_t ra)
623
{
624
- MemOp mop = get_memop(oi);
625
-
626
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
627
- do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
628
+ do_st2_mmu(env, addr, val, get_memop(oi), ra);
629
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
630
}
631
632
-void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
633
- MemOpIdx oi, uintptr_t ra)
634
-{
635
- MemOp mop = get_memop(oi);
636
-
637
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
638
- do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
639
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
640
-}
641
-
642
-static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
643
- MemOp mop, uintptr_t ra)
644
+static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
645
+ MemOp mop, uintptr_t ra)
646
{
647
void *haddr;
648
649
tcg_debug_assert((mop & MO_SIZE) == MO_32);
650
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
651
+
652
+ if (mop & MO_BSWAP) {
653
+ val = bswap32(val);
654
+ }
655
store_atom_4(env, ra, haddr, mop, val);
656
clear_helper_retaddr();
657
}
658
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
659
void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
660
MemOpIdx oi, uintptr_t ra)
661
{
662
- MemOp mop = get_memop(oi);
663
-
664
- if (mop & MO_BSWAP) {
665
- val = bswap32(val);
666
- }
667
- do_st4_he_mmu(env, addr, val, mop, ra);
668
+ do_st4_mmu(env, addr, val, get_memop(oi), ra);
669
}
670
671
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
672
- MemOpIdx oi, uintptr_t ra)
673
+void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
674
+ MemOpIdx oi, uintptr_t ra)
675
{
676
- MemOp mop = get_memop(oi);
677
-
678
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
679
- do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
680
+ do_st4_mmu(env, addr, val, get_memop(oi), ra);
681
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
682
}
683
684
-void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
685
- MemOpIdx oi, uintptr_t ra)
686
-{
687
- MemOp mop = get_memop(oi);
688
-
689
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
690
- do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
691
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
692
-}
693
-
694
-static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
695
- MemOp mop, uintptr_t ra)
696
+static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
697
+ MemOp mop, uintptr_t ra)
698
{
699
void *haddr;
700
701
tcg_debug_assert((mop & MO_SIZE) == MO_64);
702
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
703
+
704
+ if (mop & MO_BSWAP) {
705
+ val = bswap64(val);
706
+ }
707
store_atom_8(env, ra, haddr, mop, val);
708
clear_helper_retaddr();
709
}
710
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
711
void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
712
MemOpIdx oi, uintptr_t ra)
713
{
714
- MemOp mop = get_memop(oi);
715
-
716
- if (mop & MO_BSWAP) {
717
- val = bswap64(val);
718
- }
719
- do_st8_he_mmu(env, addr, val, mop, ra);
720
+ do_st8_mmu(env, addr, val, get_memop(oi), ra);
721
}
722
723
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
724
+void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
725
MemOpIdx oi, uintptr_t ra)
726
{
727
- MemOp mop = get_memop(oi);
728
-
729
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
730
- do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
731
+ do_st8_mmu(env, addr, val, get_memop(oi), ra);
732
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
733
}
734
735
-void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
736
- MemOpIdx oi, uintptr_t ra)
737
-{
738
- MemOp mop = get_memop(oi);
739
-
740
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
741
- do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
742
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
743
-}
744
-
745
-static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
746
- MemOp mop, uintptr_t ra)
747
+static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
748
+ MemOp mop, uintptr_t ra)
749
{
750
void *haddr;
751
752
tcg_debug_assert((mop & MO_SIZE) == MO_128);
753
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
754
+
755
+ if (mop & MO_BSWAP) {
756
+ val = bswap128(val);
757
+ }
758
store_atom_16(env, ra, haddr, mop, val);
759
clear_helper_retaddr();
760
}
761
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
762
void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
763
MemOpIdx oi, uintptr_t ra)
764
{
765
- MemOp mop = get_memop(oi);
766
-
767
- if (mop & MO_BSWAP) {
768
- val = bswap128(val);
769
- }
770
- do_st16_he_mmu(env, addr, val, mop, ra);
771
+ do_st16_mmu(env, addr, val, get_memop(oi), ra);
772
}
773
774
void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
775
@@ -XXX,XX +XXX,XX @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
776
helper_st16_mmu(env, addr, val, oi, GETPC());
777
}
778
779
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
780
- Int128 val, MemOpIdx oi, uintptr_t ra)
781
+void cpu_st16_mmu(CPUArchState *env, abi_ptr addr,
782
+ Int128 val, MemOpIdx oi, uintptr_t ra)
783
{
784
- MemOp mop = get_memop(oi);
785
-
786
- tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
787
- if (!HOST_BIG_ENDIAN) {
788
- val = bswap128(val);
789
- }
790
- do_st16_he_mmu(env, addr, val, mop, ra);
791
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
792
-}
793
-
794
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
795
- Int128 val, MemOpIdx oi, uintptr_t ra)
796
-{
797
- MemOp mop = get_memop(oi);
798
-
799
- tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
800
- if (HOST_BIG_ENDIAN) {
801
- val = bswap128(val);
802
- }
803
- do_st16_he_mmu(env, addr, val, mop, ra);
804
+ do_st16_mmu(env, addr, val, get_memop(oi), ra);
805
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
806
}
807
808
diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
809
index XXXXXXX..XXXXXXX 100644
810
--- a/target/arm/tcg/m_helper.c
811
+++ b/target/arm/tcg/m_helper.c
812
@@ -XXX,XX +XXX,XX @@ static bool do_v7m_function_return(ARMCPU *cpu)
813
*/
814
mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
815
oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
816
- newpc = cpu_ldl_le_mmu(env, frameptr, oi, 0);
817
- newpsr = cpu_ldl_le_mmu(env, frameptr + 4, oi, 0);
818
+ newpc = cpu_ldl_mmu(env, frameptr, oi, 0);
819
+ newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0);
820
821
/* Consistency checks on new IPSR */
822
newpsr_exc = newpsr & XPSR_EXCP;
823
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
824
index XXXXXXX..XXXXXXX 100644
825
--- a/target/sparc/ldst_helper.c
826
+++ b/target/sparc/ldst_helper.c
827
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
828
ret = cpu_ldb_mmu(env, addr, oi, GETPC());
829
break;
830
case 2:
831
- if (asi & 8) {
832
- ret = cpu_ldw_le_mmu(env, addr, oi, GETPC());
833
- } else {
834
- ret = cpu_ldw_be_mmu(env, addr, oi, GETPC());
835
- }
836
+ ret = cpu_ldw_mmu(env, addr, oi, GETPC());
837
break;
838
case 4:
839
- if (asi & 8) {
840
- ret = cpu_ldl_le_mmu(env, addr, oi, GETPC());
841
- } else {
842
- ret = cpu_ldl_be_mmu(env, addr, oi, GETPC());
843
- }
844
+ ret = cpu_ldl_mmu(env, addr, oi, GETPC());
845
break;
846
case 8:
847
- if (asi & 8) {
848
- ret = cpu_ldq_le_mmu(env, addr, oi, GETPC());
849
- } else {
850
- ret = cpu_ldq_be_mmu(env, addr, oi, GETPC());
851
- }
852
+ ret = cpu_ldq_mmu(env, addr, oi, GETPC());
853
break;
854
default:
855
g_assert_not_reached();
856
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
857
index XXXXXXX..XXXXXXX 100644
858
--- a/accel/tcg/ldst_common.c.inc
859
+++ b/accel/tcg/ldst_common.c.inc
860
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
861
int mmu_idx, uintptr_t ra)
862
{
863
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
864
- return cpu_ldw_be_mmu(env, addr, oi, ra);
865
+ return cpu_ldw_mmu(env, addr, oi, ra);
866
}
867
868
int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
869
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
870
int mmu_idx, uintptr_t ra)
871
{
872
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
873
- return cpu_ldl_be_mmu(env, addr, oi, ra);
874
+ return cpu_ldl_mmu(env, addr, oi, ra);
875
}
876
877
uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
878
int mmu_idx, uintptr_t ra)
879
{
880
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
881
- return cpu_ldq_be_mmu(env, addr, oi, ra);
882
+ return cpu_ldq_mmu(env, addr, oi, ra);
883
}
884
885
uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
886
int mmu_idx, uintptr_t ra)
887
{
888
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
889
- return cpu_ldw_le_mmu(env, addr, oi, ra);
890
+ return cpu_ldw_mmu(env, addr, oi, ra);
891
}
892
893
int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
894
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
895
int mmu_idx, uintptr_t ra)
896
{
897
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
898
- return cpu_ldl_le_mmu(env, addr, oi, ra);
899
+ return cpu_ldl_mmu(env, addr, oi, ra);
900
}
901
902
uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
903
int mmu_idx, uintptr_t ra)
904
{
905
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
906
- return cpu_ldq_le_mmu(env, addr, oi, ra);
907
+ return cpu_ldq_mmu(env, addr, oi, ra);
908
}
909
910
void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
911
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
912
int mmu_idx, uintptr_t ra)
913
{
914
MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
915
- cpu_stw_be_mmu(env, addr, val, oi, ra);
916
+ cpu_stw_mmu(env, addr, val, oi, ra);
917
}
918
919
void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
920
int mmu_idx, uintptr_t ra)
921
{
922
MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
923
- cpu_stl_be_mmu(env, addr, val, oi, ra);
924
+ cpu_stl_mmu(env, addr, val, oi, ra);
925
}
926
927
void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
928
int mmu_idx, uintptr_t ra)
929
{
930
MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
931
- cpu_stq_be_mmu(env, addr, val, oi, ra);
932
+ cpu_stq_mmu(env, addr, val, oi, ra);
933
}
934
935
void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
936
int mmu_idx, uintptr_t ra)
937
{
938
MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
939
- cpu_stw_le_mmu(env, addr, val, oi, ra);
940
+ cpu_stw_mmu(env, addr, val, oi, ra);
941
}
942
943
void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
944
int mmu_idx, uintptr_t ra)
945
{
946
MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
947
- cpu_stl_le_mmu(env, addr, val, oi, ra);
948
+ cpu_stl_mmu(env, addr, val, oi, ra);
949
}
950
951
void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
952
int mmu_idx, uintptr_t ra)
953
{
954
MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
955
- cpu_stq_le_mmu(env, addr, val, oi, ra);
956
+ cpu_stq_mmu(env, addr, val, oi, ra);
957
}
958
959
/*--------------------------*/
960
--
961
2.34.1
962
963
diff view generated by jsdifflib
Deleted patch
1
Use cpu_ld16_mmu and cpu_st16_mmu to eliminate the special case,
2
and change all of the *_data_ra functions to match.
3
1
4
Note that we check the alignment of both compare and store
5
pointers at the top of the function, so MO_ALIGN* may be
6
safely removed from the individual memory operations.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: David Hildenbrand <david@redhat.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
target/s390x/tcg/mem_helper.c | 66 ++++++++++++++---------------------
13
1 file changed, 27 insertions(+), 39 deletions(-)
14
15
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/s390x/tcg/mem_helper.c
18
+++ b/target/s390x/tcg/mem_helper.c
19
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
20
uint64_t a2, bool parallel)
21
{
22
uint32_t mem_idx = cpu_mmu_index(env, false);
23
+ MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
24
+ MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
25
+ MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
26
+ MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
27
+ MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
28
uintptr_t ra = GETPC();
29
uint32_t fc = extract32(env->regs[0], 0, 8);
30
uint32_t sc = extract32(env->regs[0], 8, 8);
31
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
32
}
33
}
34
35
- /* All loads happen before all stores. For simplicity, load the entire
36
- store value area from the parameter list. */
37
- svh = cpu_ldq_data_ra(env, pl + 16, ra);
38
- svl = cpu_ldq_data_ra(env, pl + 24, ra);
39
+ /*
40
+ * All loads happen before all stores. For simplicity, load the entire
41
+ * store value area from the parameter list.
42
+ */
43
+ svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
44
+ svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
45
46
switch (fc) {
47
case 0:
48
{
49
- uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
50
+ uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
51
uint32_t cv = env->regs[r3];
52
uint32_t ov;
53
54
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
55
ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
56
#endif
57
} else {
58
- ov = cpu_ldl_data_ra(env, a1, ra);
59
- cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
60
+ ov = cpu_ldl_mmu(env, a1, oi4, ra);
61
+ cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
62
}
63
cc = (ov != cv);
64
env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
65
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
66
67
case 1:
68
{
69
- uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
70
+ uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
71
uint64_t cv = env->regs[r3];
72
uint64_t ov;
73
74
if (parallel) {
75
#ifdef CONFIG_ATOMIC64
76
- MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
77
- ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
78
+ ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
79
#else
80
/* Note that we asserted !parallel above. */
81
g_assert_not_reached();
82
#endif
83
} else {
84
- ov = cpu_ldq_data_ra(env, a1, ra);
85
- cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
86
+ ov = cpu_ldq_mmu(env, a1, oi8, ra);
87
+ cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
88
}
89
cc = (ov != cv);
90
env->regs[r3] = ov;
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
92
93
case 2:
94
{
95
- uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
96
- uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
97
- Int128 nv = int128_make128(nvl, nvh);
98
+ Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
99
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
100
Int128 ov;
101
102
if (!parallel) {
103
- uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
104
- uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
105
-
106
- ov = int128_make128(ol, oh);
107
+ ov = cpu_ld16_mmu(env, a1, oi16, ra);
108
cc = !int128_eq(ov, cv);
109
if (cc) {
110
nv = ov;
111
}
112
-
113
- cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
114
- cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
115
+ cpu_st16_mmu(env, a1, nv, oi16, ra);
116
} else if (HAVE_CMPXCHG128) {
117
- MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
118
- ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
119
+ ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
120
cc = !int128_eq(ov, cv);
121
} else {
122
/* Note that we asserted !parallel above. */
123
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
124
if (cc == 0) {
125
switch (sc) {
126
case 0:
127
- cpu_stb_data_ra(env, a2, svh >> 56, ra);
128
+ cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
129
break;
130
case 1:
131
- cpu_stw_data_ra(env, a2, svh >> 48, ra);
132
+ cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
133
break;
134
case 2:
135
- cpu_stl_data_ra(env, a2, svh >> 32, ra);
136
+ cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
137
break;
138
case 3:
139
- cpu_stq_data_ra(env, a2, svh, ra);
140
+ cpu_stq_mmu(env, a2, svh, oi8, ra);
141
break;
142
case 4:
143
- if (!parallel) {
144
- cpu_stq_data_ra(env, a2 + 0, svh, ra);
145
- cpu_stq_data_ra(env, a2 + 8, svl, ra);
146
- } else if (HAVE_ATOMIC128) {
147
- MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
148
- Int128 sv = int128_make128(svl, svh);
149
- cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
150
- } else {
151
- /* Note that we asserted !parallel above. */
152
- g_assert_not_reached();
153
- }
154
+ cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
155
break;
156
default:
157
g_assert_not_reached();
158
--
159
2.34.1
160
161
diff view generated by jsdifflib
Deleted patch
1
Eliminate the CONFIG_USER_ONLY specialization.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/s390x/tcg/mem_helper.c | 8 +-------
8
1 file changed, 1 insertion(+), 7 deletions(-)
9
10
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/tcg/mem_helper.c
13
+++ b/target/s390x/tcg/mem_helper.c
14
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
15
uint32_t ov;
16
17
if (parallel) {
18
-#ifdef CONFIG_USER_ONLY
19
- uint32_t *haddr = g2h(env_cpu(env), a1);
20
- ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
21
-#else
22
- MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
23
- ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
24
-#endif
25
+ ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
26
} else {
27
ov = cpu_ldl_mmu(env, a1, oi4, ra);
28
cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
29
--
30
2.34.1
31
32
diff view generated by jsdifflib
Deleted patch
1
Atomic load/store of 128-byte quantities is now handled
2
by cpu_{ld,st}16_mmu.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/atomic_template.h | 61 +++--------------------------------
8
include/exec/cpu_ldst.h | 9 ------
9
accel/tcg/atomic_common.c.inc | 14 --------
10
3 files changed, 4 insertions(+), 80 deletions(-)
11
12
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/atomic_template.h
15
+++ b/accel/tcg/atomic_template.h
16
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
17
return ret;
18
}
19
20
-#if DATA_SIZE >= 16
21
-#if HAVE_ATOMIC128
22
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
23
- MemOpIdx oi, uintptr_t retaddr)
24
-{
25
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
26
- PAGE_READ, retaddr);
27
- DATA_TYPE val;
28
-
29
- val = atomic16_read(haddr);
30
- ATOMIC_MMU_CLEANUP;
31
- atomic_trace_ld_post(env, addr, oi);
32
- return val;
33
-}
34
-
35
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
36
- MemOpIdx oi, uintptr_t retaddr)
37
-{
38
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
39
- PAGE_WRITE, retaddr);
40
-
41
- atomic16_set(haddr, val);
42
- ATOMIC_MMU_CLEANUP;
43
- atomic_trace_st_post(env, addr, oi);
44
-}
45
-#endif
46
-#else
47
+#if DATA_SIZE < 16
48
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
49
MemOpIdx oi, uintptr_t retaddr)
50
{
51
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
52
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
53
54
#undef GEN_ATOMIC_HELPER_FN
55
-#endif /* DATA SIZE >= 16 */
56
+#endif /* DATA SIZE < 16 */
57
58
#undef END
59
60
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
61
return BSWAP(ret);
62
}
63
64
-#if DATA_SIZE >= 16
65
-#if HAVE_ATOMIC128
66
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
67
- MemOpIdx oi, uintptr_t retaddr)
68
-{
69
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
70
- PAGE_READ, retaddr);
71
- DATA_TYPE val;
72
-
73
- val = atomic16_read(haddr);
74
- ATOMIC_MMU_CLEANUP;
75
- atomic_trace_ld_post(env, addr, oi);
76
- return BSWAP(val);
77
-}
78
-
79
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
80
- MemOpIdx oi, uintptr_t retaddr)
81
-{
82
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
83
- PAGE_WRITE, retaddr);
84
-
85
- val = BSWAP(val);
86
- atomic16_set(haddr, val);
87
- ATOMIC_MMU_CLEANUP;
88
- atomic_trace_st_post(env, addr, oi);
89
-}
90
-#endif
91
-#else
92
+#if DATA_SIZE < 16
93
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
94
MemOpIdx oi, uintptr_t retaddr)
95
{
96
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
97
#undef ADD
98
99
#undef GEN_ATOMIC_HELPER_FN
100
-#endif /* DATA_SIZE >= 16 */
101
+#endif /* DATA_SIZE < 16 */
102
103
#undef END
104
#endif /* DATA_SIZE > 1 */
105
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/exec/cpu_ldst.h
108
+++ b/include/exec/cpu_ldst.h
109
@@ -XXX,XX +XXX,XX @@ Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
110
Int128 cmpv, Int128 newv,
111
MemOpIdx oi, uintptr_t retaddr);
112
113
-Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
114
- MemOpIdx oi, uintptr_t retaddr);
115
-Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
116
- MemOpIdx oi, uintptr_t retaddr);
117
-void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
118
- MemOpIdx oi, uintptr_t retaddr);
119
-void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
120
- MemOpIdx oi, uintptr_t retaddr);
121
-
122
#if defined(CONFIG_USER_ONLY)
123
124
extern __thread uintptr_t helper_retaddr;
125
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
126
index XXXXXXX..XXXXXXX 100644
127
--- a/accel/tcg/atomic_common.c.inc
128
+++ b/accel/tcg/atomic_common.c.inc
129
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
130
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
131
}
132
133
-#if HAVE_ATOMIC128
134
-static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
135
- MemOpIdx oi)
136
-{
137
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
138
-}
139
-
140
-static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
141
- MemOpIdx oi)
142
-{
143
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
144
-}
145
-#endif
146
-
147
/*
148
* Atomic helpers callable from TCG.
149
* These have a common interface and all defer to cpu_atomic_*
150
--
151
2.34.1
152
153
diff view generated by jsdifflib
Deleted patch
1
Now that load/store are gone, we're always passing
2
PAGE_READ | PAGE_WRITE for RMW atomic operations.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/atomic_template.h | 32 ++++++--------
8
accel/tcg/cputlb.c | 85 ++++++++++++++-----------------------
9
accel/tcg/user-exec.c | 8 +---
10
3 files changed, 45 insertions(+), 80 deletions(-)
11
12
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/atomic_template.h
15
+++ b/accel/tcg/atomic_template.h
16
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
17
ABI_TYPE cmpv, ABI_TYPE newv,
18
MemOpIdx oi, uintptr_t retaddr)
19
{
20
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
21
- PAGE_READ | PAGE_WRITE, retaddr);
22
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
23
DATA_TYPE ret;
24
25
#if DATA_SIZE == 16
26
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
27
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
28
MemOpIdx oi, uintptr_t retaddr)
29
{
30
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
31
- PAGE_READ | PAGE_WRITE, retaddr);
32
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
33
DATA_TYPE ret;
34
35
ret = qatomic_xchg__nocheck(haddr, val);
36
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
37
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
38
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
39
{ \
40
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
41
- PAGE_READ | PAGE_WRITE, retaddr); \
42
- DATA_TYPE ret; \
43
+ DATA_TYPE *haddr, ret; \
44
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
45
ret = qatomic_##X(haddr, val); \
46
ATOMIC_MMU_CLEANUP; \
47
atomic_trace_rmw_post(env, addr, oi); \
48
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
49
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
50
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
51
{ \
52
- XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
53
- PAGE_READ | PAGE_WRITE, retaddr); \
54
- XDATA_TYPE cmp, old, new, val = xval; \
55
+ XDATA_TYPE *haddr, cmp, old, new, val = xval; \
56
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
57
smp_mb(); \
58
cmp = qatomic_read__nocheck(haddr); \
59
do { \
60
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
61
ABI_TYPE cmpv, ABI_TYPE newv,
62
MemOpIdx oi, uintptr_t retaddr)
63
{
64
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
65
- PAGE_READ | PAGE_WRITE, retaddr);
66
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
67
DATA_TYPE ret;
68
69
#if DATA_SIZE == 16
70
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
71
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
72
MemOpIdx oi, uintptr_t retaddr)
73
{
74
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
75
- PAGE_READ | PAGE_WRITE, retaddr);
76
+ DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
77
ABI_TYPE ret;
78
79
ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
80
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
81
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
82
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
83
{ \
84
- DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
85
- PAGE_READ | PAGE_WRITE, retaddr); \
86
- DATA_TYPE ret; \
87
+ DATA_TYPE *haddr, ret; \
88
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
89
ret = qatomic_##X(haddr, BSWAP(val)); \
90
ATOMIC_MMU_CLEANUP; \
91
atomic_trace_rmw_post(env, addr, oi); \
92
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
93
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
94
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
95
{ \
96
- XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
97
- PAGE_READ | PAGE_WRITE, retaddr); \
98
- XDATA_TYPE ldo, ldn, old, new, val = xval; \
99
+ XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval; \
100
+ haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr); \
101
smp_mb(); \
102
ldn = qatomic_read__nocheck(haddr); \
103
do { \
104
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/accel/tcg/cputlb.c
107
+++ b/accel/tcg/cputlb.c
108
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
109
/*
110
* Probe for an atomic operation. Do not allow unaligned operations,
111
* or io operations to proceed. Return the host address.
112
- *
113
- * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
114
*/
115
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
116
- MemOpIdx oi, int size, int prot,
117
- uintptr_t retaddr)
118
+ MemOpIdx oi, int size, uintptr_t retaddr)
119
{
120
uintptr_t mmu_idx = get_mmuidx(oi);
121
MemOp mop = get_memop(oi);
122
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
123
tlbe = tlb_entry(env, mmu_idx, addr);
124
125
/* Check TLB entry and enforce page permissions. */
126
- if (prot & PAGE_WRITE) {
127
- tlb_addr = tlb_addr_write(tlbe);
128
- if (!tlb_hit(tlb_addr, addr)) {
129
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
130
- addr & TARGET_PAGE_MASK)) {
131
- tlb_fill(env_cpu(env), addr, size,
132
- MMU_DATA_STORE, mmu_idx, retaddr);
133
- index = tlb_index(env, mmu_idx, addr);
134
- tlbe = tlb_entry(env, mmu_idx, addr);
135
- }
136
- tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
137
- }
138
-
139
- if (prot & PAGE_READ) {
140
- /*
141
- * Let the guest notice RMW on a write-only page.
142
- * We have just verified that the page is writable.
143
- * Subpage lookups may have left TLB_INVALID_MASK set,
144
- * but addr_read will only be -1 if PAGE_READ was unset.
145
- */
146
- if (unlikely(tlbe->addr_read == -1)) {
147
- tlb_fill(env_cpu(env), addr, size,
148
- MMU_DATA_LOAD, mmu_idx, retaddr);
149
- /*
150
- * Since we don't support reads and writes to different
151
- * addresses, and we do have the proper page loaded for
152
- * write, this shouldn't ever return. But just in case,
153
- * handle via stop-the-world.
154
- */
155
- goto stop_the_world;
156
- }
157
- /* Collect TLB_WATCHPOINT for read. */
158
- tlb_addr |= tlbe->addr_read;
159
- }
160
- } else /* if (prot & PAGE_READ) */ {
161
- tlb_addr = tlbe->addr_read;
162
- if (!tlb_hit(tlb_addr, addr)) {
163
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
164
- addr & TARGET_PAGE_MASK)) {
165
- tlb_fill(env_cpu(env), addr, size,
166
- MMU_DATA_LOAD, mmu_idx, retaddr);
167
- index = tlb_index(env, mmu_idx, addr);
168
- tlbe = tlb_entry(env, mmu_idx, addr);
169
- }
170
- tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
171
+ tlb_addr = tlb_addr_write(tlbe);
172
+ if (!tlb_hit(tlb_addr, addr)) {
173
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
174
+ addr & TARGET_PAGE_MASK)) {
175
+ tlb_fill(env_cpu(env), addr, size,
176
+ MMU_DATA_STORE, mmu_idx, retaddr);
177
+ index = tlb_index(env, mmu_idx, addr);
178
+ tlbe = tlb_entry(env, mmu_idx, addr);
179
}
180
+ tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
181
}
182
183
+ /*
184
+ * Let the guest notice RMW on a write-only page.
185
+ * We have just verified that the page is writable.
186
+ * Subpage lookups may have left TLB_INVALID_MASK set,
187
+ * but addr_read will only be -1 if PAGE_READ was unset.
188
+ */
189
+ if (unlikely(tlbe->addr_read == -1)) {
190
+ tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
191
+ /*
192
+ * Since we don't support reads and writes to different
193
+ * addresses, and we do have the proper page loaded for
194
+ * write, this shouldn't ever return. But just in case,
195
+ * handle via stop-the-world.
196
+ */
197
+ goto stop_the_world;
198
+ }
199
+ /* Collect TLB_WATCHPOINT for read. */
200
+ tlb_addr |= tlbe->addr_read;
201
+
202
/* Notice an IO access or a needs-MMU-lookup access */
203
if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
204
/* There's really nothing that can be done to
205
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
206
}
207
208
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
209
- QEMU_BUILD_BUG_ON(PAGE_READ != BP_MEM_READ);
210
- QEMU_BUILD_BUG_ON(PAGE_WRITE != BP_MEM_WRITE);
211
- /* therefore prot == watchpoint bits */
212
- cpu_check_watchpoint(env_cpu(env), addr, size,
213
- full->attrs, prot, retaddr);
214
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs,
215
+ BP_MEM_READ | BP_MEM_WRITE, retaddr);
216
}
217
218
return hostaddr;
219
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/user-exec.c
222
+++ b/accel/tcg/user-exec.c
223
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
224
225
/*
226
* Do not allow unaligned operations to proceed. Return the host address.
227
- *
228
- * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
229
*/
230
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
231
- MemOpIdx oi, int size, int prot,
232
- uintptr_t retaddr)
233
+ MemOpIdx oi, int size, uintptr_t retaddr)
234
{
235
MemOp mop = get_memop(oi);
236
int a_bits = get_alignment_bits(mop);
237
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
238
239
/* Enforce guest required alignment. */
240
if (unlikely(addr & ((1 << a_bits) - 1))) {
241
- MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE;
242
- cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
243
+ cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, retaddr);
244
}
245
246
/* Enforce qemu required alignment. */
247
--
248
2.34.1
249
250
diff view generated by jsdifflib
Deleted patch
1
These symbols will shortly become dynamic runtime tests and
2
therefore not appropriate for the preprocessor. Use the
3
matching CONFIG_* symbols for that purpose.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
host/include/aarch64/host/atomic128-cas.h | 2 ++
9
host/include/generic/host/atomic128-ldst.h | 2 +-
10
accel/tcg/cputlb.c | 2 +-
11
accel/tcg/user-exec.c | 2 +-
12
4 files changed, 5 insertions(+), 3 deletions(-)
13
14
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/host/include/aarch64/host/atomic128-cas.h
17
+++ b/host/include/aarch64/host/atomic128-cas.h
18
@@ -XXX,XX +XXX,XX @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
19
20
return int128_make128(oldl, oldh);
21
}
22
+
23
+# define CONFIG_CMPXCHG128 1
24
# define HAVE_CMPXCHG128 1
25
#endif
26
27
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/host/include/generic/host/atomic128-ldst.h
30
+++ b/host/include/generic/host/atomic128-ldst.h
31
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
32
}
33
34
# define HAVE_ATOMIC128 1
35
-#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
36
+#elif defined(CONFIG_CMPXCHG128) && !defined(CONFIG_USER_ONLY)
37
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
38
atomic16_read(Int128 *ptr)
39
{
40
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/accel/tcg/cputlb.c
43
+++ b/accel/tcg/cputlb.c
44
@@ -XXX,XX +XXX,XX @@ void cpu_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
45
#include "atomic_template.h"
46
#endif
47
48
-#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
49
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
50
#define DATA_SIZE 16
51
#include "atomic_template.h"
52
#endif
53
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/accel/tcg/user-exec.c
56
+++ b/accel/tcg/user-exec.c
57
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
58
#include "atomic_template.h"
59
#endif
60
61
-#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
62
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
63
#define DATA_SIZE 16
64
#include "atomic_template.h"
65
#endif
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
1
Create both atomic16_read_ro and atomic16_read_rw.
1
DisasContextBase.pc_next has type vaddr; use the correct log format.
2
Previously we pretended that we had atomic16_read in system mode,
3
because we "know" that all ram is always writable to the host.
4
Now, expose read-only and read-write versions all of the time.
5
2
6
For aarch64, do not fall back to __atomic_read_16 even if
3
Fixes: 85c19af63e7 ("include/exec: Use vaddr in DisasContextBase for virtual addresses")
7
supported by the compiler, to work around a clang bug.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
host/include/aarch64/host/atomic128-ldst.h | 21 ++++++++-------
6
target/mips/tcg/octeon_translate.c | 4 ++--
13
host/include/generic/host/atomic128-ldst.h | 31 ++++++++++++++++------
7
1 file changed, 2 insertions(+), 2 deletions(-)
14
target/s390x/tcg/mem_helper.c | 2 +-
15
3 files changed, 36 insertions(+), 18 deletions(-)
16
8
17
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
9
diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/host/include/aarch64/host/atomic128-ldst.h
11
--- a/target/mips/tcg/octeon_translate.c
20
+++ b/host/include/aarch64/host/atomic128-ldst.h
12
+++ b/target/mips/tcg/octeon_translate.c
21
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool trans_BBIT(DisasContext *ctx, arg_BBIT *a)
22
#ifndef AARCH64_ATOMIC128_LDST_H
14
TCGv p;
23
#define AARCH64_ATOMIC128_LDST_H
15
24
16
if (ctx->hflags & MIPS_HFLAG_BMASK) {
25
-/* Through gcc 10, aarch64 has no support for 128-bit atomics. */
17
- LOG_DISAS("Branch in delay / forbidden slot at PC 0x"
26
-#if !defined(CONFIG_ATOMIC128) && !defined(CONFIG_USER_ONLY)
18
- TARGET_FMT_lx "\n", ctx->base.pc_next);
27
-/* We can do better than cmpxchg for AArch64. */
19
+ LOG_DISAS("Branch in delay / forbidden slot at PC 0x%" VADDR_PRIx "\n",
28
-static inline Int128 atomic16_read(Int128 *ptr)
20
+ ctx->base.pc_next);
29
+/*
21
generate_exception_end(ctx, EXCP_RI);
30
+ * Through gcc 10, aarch64 has no support for 128-bit atomics.
22
return true;
31
+ * Through clang 16, without -march=armv8.4-a, __atomic_load_16
32
+ * is incorrectly expanded to a read-write operation.
33
+ */
34
+
35
+#define HAVE_ATOMIC128_RO 0
36
+#define HAVE_ATOMIC128_RW 1
37
+
38
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
39
+
40
+static inline Int128 atomic16_read_rw(Int128 *ptr)
41
{
42
uint64_t l, h;
43
uint32_t tmp;
44
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
45
: [l] "r"(l), [h] "r"(h));
46
}
47
48
-# define HAVE_ATOMIC128 1
49
-#else
50
-#include "host/include/generic/host/atomic128-ldst.h"
51
-#endif
52
-
53
#endif /* AARCH64_ATOMIC128_LDST_H */
54
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/host/include/generic/host/atomic128-ldst.h
57
+++ b/host/include/generic/host/atomic128-ldst.h
58
@@ -XXX,XX +XXX,XX @@
59
#define HOST_ATOMIC128_LDST_H
60
61
#if defined(CONFIG_ATOMIC128)
62
+# define HAVE_ATOMIC128_RO 1
63
+# define HAVE_ATOMIC128_RW 1
64
+
65
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
66
-atomic16_read(Int128 *ptr)
67
+atomic16_read_ro(const Int128 *ptr)
68
{
69
- __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
70
+ const __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
71
Int128Alias r;
72
73
r.i = qatomic_read__nocheck(ptr_align);
74
return r.s;
75
}
76
77
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
78
+atomic16_read_rw(Int128 *ptr)
79
+{
80
+ return atomic16_read_ro(ptr);
81
+}
82
+
83
static inline void ATTRIBUTE_ATOMIC128_OPT
84
atomic16_set(Int128 *ptr, Int128 val)
85
{
86
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
87
qatomic_set__nocheck(ptr_align, v.i);
88
}
89
90
-# define HAVE_ATOMIC128 1
91
-#elif defined(CONFIG_CMPXCHG128) && !defined(CONFIG_USER_ONLY)
92
+#elif defined(CONFIG_CMPXCHG128)
93
+# define HAVE_ATOMIC128_RO 0
94
+# define HAVE_ATOMIC128_RW 1
95
+
96
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
97
+
98
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
99
-atomic16_read(Int128 *ptr)
100
+atomic16_read_rw(Int128 *ptr)
101
{
102
/* Maybe replace 0 with 0, returning the old value. */
103
Int128 z = int128_make64(0);
104
@@ -XXX,XX +XXX,XX @@ atomic16_set(Int128 *ptr, Int128 val)
105
} while (int128_ne(old, cmp));
106
}
107
108
-# define HAVE_ATOMIC128 1
109
#else
110
+# define HAVE_ATOMIC128_RO 0
111
+# define HAVE_ATOMIC128_RW 0
112
+
113
/* Fallback definitions that must be optimized away, or error. */
114
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
115
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
116
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_rw(Int128 *ptr);
117
void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
118
-# define HAVE_ATOMIC128 0
119
#endif
120
121
#endif /* HOST_ATOMIC128_LDST_H */
122
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/target/s390x/tcg/mem_helper.c
125
+++ b/target/s390x/tcg/mem_helper.c
126
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
127
max = 3;
128
#endif
129
if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
130
- (HAVE_ATOMIC128 ? 0 : sc > max)) {
131
+ (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
132
cpu_loop_exit_atomic(env_cpu(env), ra);
133
}
134
}
23
}
135
--
24
--
136
2.34.1
25
2.43.0
137
138
diff view generated by jsdifflib
Deleted patch
1
Remove the locally defined load_atomic16 and store_atomic16,
2
along with HAVE_al16 and HAVE_al16_fast in favor of the
3
routines defined in atomic128.h.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
accel/tcg/cputlb.c | 2 +-
9
accel/tcg/ldst_atomicity.c.inc | 118 +++++++--------------------------
10
2 files changed, 24 insertions(+), 96 deletions(-)
11
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
17
18
case MO_ATOM_WITHIN16_PAIR:
19
/* Since size > 8, this is the half that must be atomic. */
20
- if (!HAVE_al16) {
21
+ if (!HAVE_ATOMIC128_RW) {
22
cpu_loop_exit_atomic(env_cpu(env), ra);
23
}
24
return store_whole_le16(p->haddr, p->size, val_le);
25
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/accel/tcg/ldst_atomicity.c.inc
28
+++ b/accel/tcg/ldst_atomicity.c.inc
29
@@ -XXX,XX +XXX,XX @@
30
#endif
31
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
32
33
-#if defined(CONFIG_ATOMIC128)
34
-# define HAVE_al16_fast true
35
-#else
36
-# define HAVE_al16_fast false
37
-#endif
38
-#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
39
-# define HAVE_al16 true
40
-#else
41
-# define HAVE_al16 false
42
-#endif
43
-
44
-
45
/**
46
* required_atomicity:
47
*
48
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
49
return qatomic_read__nocheck(p);
50
}
51
52
-/**
53
- * load_atomic16:
54
- * @pv: host address
55
- *
56
- * Atomically load 16 aligned bytes from @pv.
57
- */
58
-static inline Int128 ATTRIBUTE_ATOMIC128_OPT
59
-load_atomic16(void *pv)
60
-{
61
-#ifdef CONFIG_ATOMIC128
62
- __uint128_t *p = __builtin_assume_aligned(pv, 16);
63
- Int128Alias r;
64
-
65
- r.u = qatomic_read__nocheck(p);
66
- return r.s;
67
-#else
68
- qemu_build_not_reached();
69
-#endif
70
-}
71
-
72
/**
73
* load_atomic8_or_exit:
74
* @env: cpu context
75
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
76
{
77
Int128 *p = __builtin_assume_aligned(pv, 16);
78
79
- if (HAVE_al16_fast) {
80
- return load_atomic16(p);
81
+ if (HAVE_ATOMIC128_RO) {
82
+ return atomic16_read_ro(p);
83
}
84
85
#ifdef CONFIG_USER_ONLY
86
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
87
* In system mode all guest pages are writable, and for user-only
88
* we have just checked writability. Try cmpxchg.
89
*/
90
-#if defined(CONFIG_CMPXCHG128)
91
- /* Swap 0 with 0, with the side-effect of returning the old value. */
92
- {
93
- Int128Alias r;
94
- r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
95
- return r.s;
96
+ if (HAVE_ATOMIC128_RW) {
97
+ return atomic16_read_rw(p);
98
}
99
-#endif
100
101
/* Ultimate fallback: re-execute in serial context. */
102
cpu_loop_exit_atomic(env_cpu(env), ra);
103
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
104
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
105
load_atom_extract_al16_or_al8(void *pv, int s)
106
{
107
-#if defined(CONFIG_ATOMIC128)
108
uintptr_t pi = (uintptr_t)pv;
109
int o = pi & 7;
110
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
111
- __uint128_t r;
112
+ Int128 r;
113
114
pv = (void *)(pi & ~7);
115
if (pi & 8) {
116
@@ -XXX,XX +XXX,XX @@ load_atom_extract_al16_or_al8(void *pv, int s)
117
uint64_t b = qatomic_read__nocheck(p8 + 1);
118
119
if (HOST_BIG_ENDIAN) {
120
- r = ((__uint128_t)a << 64) | b;
121
+ r = int128_make128(b, a);
122
} else {
123
- r = ((__uint128_t)b << 64) | a;
124
+ r = int128_make128(a, b);
125
}
126
} else {
127
- __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
128
- r = qatomic_read__nocheck(p16);
129
+ r = atomic16_read_ro(pv);
130
}
131
- return r >> shr;
132
-#else
133
- qemu_build_not_reached();
134
-#endif
135
+ return int128_getlo(int128_urshift(r, shr));
136
}
137
138
/**
139
@@ -XXX,XX +XXX,XX @@ static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
140
if (likely((pi & 1) == 0)) {
141
return load_atomic2(pv);
142
}
143
- if (HAVE_al16_fast) {
144
+ if (HAVE_ATOMIC128_RO) {
145
return load_atom_extract_al16_or_al8(pv, 2);
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
149
if (likely((pi & 3) == 0)) {
150
return load_atomic4(pv);
151
}
152
- if (HAVE_al16_fast) {
153
+ if (HAVE_ATOMIC128_RO) {
154
return load_atom_extract_al16_or_al8(pv, 4);
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
158
if (HAVE_al8 && likely((pi & 7) == 0)) {
159
return load_atomic8(pv);
160
}
161
- if (HAVE_al16_fast) {
162
+ if (HAVE_ATOMIC128_RO) {
163
return load_atom_extract_al16_or_al8(pv, 8);
164
}
165
166
@@ -XXX,XX +XXX,XX @@ static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
167
* If the host does not support 16-byte atomics, wait until we have
168
* examined the atomicity parameters below.
169
*/
170
- if (HAVE_al16_fast && likely((pi & 15) == 0)) {
171
- return load_atomic16(pv);
172
+ if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) {
173
+ return atomic16_read_ro(pv);
174
}
175
176
atmax = required_atomicity(env, pi, memop);
177
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
178
qatomic_set__nocheck(p, val);
179
}
180
181
-/**
182
- * store_atomic16:
183
- * @pv: host address
184
- * @val: value to store
185
- *
186
- * Atomically store 16 aligned bytes to @pv.
187
- */
188
-static inline void ATTRIBUTE_ATOMIC128_OPT
189
-store_atomic16(void *pv, Int128Alias val)
190
-{
191
-#if defined(CONFIG_ATOMIC128)
192
- __uint128_t *pu = __builtin_assume_aligned(pv, 16);
193
- qatomic_set__nocheck(pu, val.u);
194
-#elif defined(CONFIG_CMPXCHG128)
195
- __uint128_t *pu = __builtin_assume_aligned(pv, 16);
196
- __uint128_t o;
197
-
198
- /*
199
- * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
200
- * defer to libatomic, so we must use __sync_*_compare_and_swap_16
201
- * and accept the sequential consistency that comes with it.
202
- */
203
- do {
204
- o = *pu;
205
- } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
206
-#else
207
- qemu_build_not_reached();
208
-#endif
209
-}
210
-
211
/**
212
* store_atom_4x2
213
*/
214
@@ -XXX,XX +XXX,XX @@ static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
215
int sh = o * 8;
216
Int128 m, v;
217
218
- qemu_build_assert(HAVE_al16);
219
+ qemu_build_assert(HAVE_ATOMIC128_RW);
220
221
/* Like MAKE_64BIT_MASK(0, sz), but larger. */
222
if (sz <= 64) {
223
@@ -XXX,XX +XXX,XX @@ static void store_atom_2(CPUArchState *env, uintptr_t ra,
224
return;
225
}
226
} else if ((pi & 15) == 7) {
227
- if (HAVE_al16) {
228
+ if (HAVE_ATOMIC128_RW) {
229
Int128 v = int128_lshift(int128_make64(val), 56);
230
Int128 m = int128_lshift(int128_make64(0xffff), 56);
231
store_atom_insert_al16(pv - 7, v, m);
232
@@ -XXX,XX +XXX,XX @@ static void store_atom_4(CPUArchState *env, uintptr_t ra,
233
return;
234
}
235
} else {
236
- if (HAVE_al16) {
237
+ if (HAVE_ATOMIC128_RW) {
238
store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
239
return;
240
}
241
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
242
}
243
break;
244
case MO_64:
245
- if (HAVE_al16) {
246
+ if (HAVE_ATOMIC128_RW) {
247
store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
248
return;
249
}
250
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
251
uint64_t a, b;
252
int atmax;
253
254
- if (HAVE_al16_fast && likely((pi & 15) == 0)) {
255
- store_atomic16(pv, val);
256
+ if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) {
257
+ atomic16_set(pv, val);
258
return;
259
}
260
261
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
262
}
263
break;
264
case -MO_64:
265
- if (HAVE_al16) {
266
+ if (HAVE_ATOMIC128_RW) {
267
uint64_t val_le;
268
int s2 = pi & 15;
269
int s1 = 16 - s2;
270
@@ -XXX,XX +XXX,XX @@ static void store_atom_16(CPUArchState *env, uintptr_t ra,
271
}
272
break;
273
case MO_128:
274
- if (HAVE_al16) {
275
- store_atomic16(pv, val);
276
+ if (HAVE_ATOMIC128_RW) {
277
+ atomic16_set(pv, val);
278
return;
279
}
280
break;
281
--
282
2.34.1
283
284
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
include/tcg/debug-assert.h | 17 +++++++++++++++++
6
include/tcg/tcg.h | 9 +--------
7
MAINTAINERS | 1 +
8
3 files changed, 19 insertions(+), 8 deletions(-)
9
create mode 100644 include/tcg/debug-assert.h
10
1
11
diff --git a/include/tcg/debug-assert.h b/include/tcg/debug-assert.h
12
new file mode 100644
13
index XXXXXXX..XXXXXXX
14
--- /dev/null
15
+++ b/include/tcg/debug-assert.h
16
@@ -XXX,XX +XXX,XX @@
17
+/* SPDX-License-Identifier: MIT */
18
+/*
19
+ * Define tcg_debug_assert
20
+ * Copyright (c) 2008 Fabrice Bellard
21
+ */
22
+
23
+#ifndef TCG_DEBUG_ASSERT_H
24
+#define TCG_DEBUG_ASSERT_H
25
+
26
+#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
27
+# define tcg_debug_assert(X) do { assert(X); } while (0)
28
+#else
29
+# define tcg_debug_assert(X) \
30
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
31
+#endif
32
+
33
+#endif
34
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/tcg/tcg.h
37
+++ b/include/tcg/tcg.h
38
@@ -XXX,XX +XXX,XX @@
39
#include "tcg/tcg-mo.h"
40
#include "tcg-target.h"
41
#include "tcg/tcg-cond.h"
42
+#include "tcg/debug-assert.h"
43
44
/* XXX: make safe guess about sizes */
45
#define MAX_OP_PER_INSTR 266
46
@@ -XXX,XX +XXX,XX @@ typedef uint64_t tcg_insn_unit;
47
/* The port better have done this. */
48
#endif
49
50
-
51
-#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
52
-# define tcg_debug_assert(X) do { assert(X); } while (0)
53
-#else
54
-# define tcg_debug_assert(X) \
55
- do { if (!(X)) { __builtin_unreachable(); } } while (0)
56
-#endif
57
-
58
typedef struct TCGRelocation TCGRelocation;
59
struct TCGRelocation {
60
QSIMPLEQ_ENTRY(TCGRelocation) next;
61
diff --git a/MAINTAINERS b/MAINTAINERS
62
index XXXXXXX..XXXXXXX 100644
63
--- a/MAINTAINERS
64
+++ b/MAINTAINERS
65
@@ -XXX,XX +XXX,XX @@ F: include/sysemu/tcg.h
66
F: include/hw/core/tcg-cpu-ops.h
67
F: host/include/*/host/cpuinfo.h
68
F: util/cpuinfo-*.c
69
+F: include/tcg/
70
71
FPU emulation
72
M: Aurelien Jarno <aurelien@aurel32.net>
73
--
74
2.34.1
75
76
diff view generated by jsdifflib
Deleted patch
1
Use __sync_bool_compare_and_swap_16 to control the loop,
2
rather than a separate comparison.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
host/include/generic/host/atomic128-ldst.h | 11 +++++++----
8
1 file changed, 7 insertions(+), 4 deletions(-)
9
10
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/host/include/generic/host/atomic128-ldst.h
13
+++ b/host/include/generic/host/atomic128-ldst.h
14
@@ -XXX,XX +XXX,XX @@ atomic16_read_rw(Int128 *ptr)
15
static inline void ATTRIBUTE_ATOMIC128_OPT
16
atomic16_set(Int128 *ptr, Int128 val)
17
{
18
- Int128 old = *ptr, cmp;
19
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
20
+ __int128_t old;
21
+ Int128Alias new;
22
+
23
+ new.s = val;
24
do {
25
- cmp = old;
26
- old = atomic16_cmpxchg(ptr, cmp, val);
27
- } while (int128_ne(old, cmp));
28
+ old = *ptr_align;
29
+ } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
30
}
31
32
#else
33
--
34
2.34.1
35
36
diff view generated by jsdifflib
Deleted patch
1
With FEAT_LSE2, load and store of int128 is directly supported.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
host/include/aarch64/host/atomic128-ldst.h | 53 ++++++++++++++++------
7
1 file changed, 40 insertions(+), 13 deletions(-)
8
9
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/host/include/aarch64/host/atomic128-ldst.h
12
+++ b/host/include/aarch64/host/atomic128-ldst.h
13
@@ -XXX,XX +XXX,XX @@
14
#ifndef AARCH64_ATOMIC128_LDST_H
15
#define AARCH64_ATOMIC128_LDST_H
16
17
+#include "host/cpuinfo.h"
18
+#include "tcg/debug-assert.h"
19
+
20
/*
21
* Through gcc 10, aarch64 has no support for 128-bit atomics.
22
* Through clang 16, without -march=armv8.4-a, __atomic_load_16
23
* is incorrectly expanded to a read-write operation.
24
+ *
25
+ * Anyway, this method allows runtime detection of FEAT_LSE2.
26
*/
27
28
-#define HAVE_ATOMIC128_RO 0
29
+#define HAVE_ATOMIC128_RO (cpuinfo & CPUINFO_LSE2)
30
#define HAVE_ATOMIC128_RW 1
31
32
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
33
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
34
+{
35
+ uint64_t l, h;
36
+
37
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
38
+ /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
39
+ asm("ldp %[l], %[h], %[mem]"
40
+ : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
41
+
42
+ return int128_make128(l, h);
43
+}
44
45
static inline Int128 atomic16_read_rw(Int128 *ptr)
46
{
47
uint64_t l, h;
48
uint32_t tmp;
49
50
- /* The load must be paired with the store to guarantee not tearing. */
51
- asm("0: ldxp %[l], %[h], %[mem]\n\t"
52
- "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
53
- "cbnz %w[tmp], 0b"
54
- : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
55
+ if (cpuinfo & CPUINFO_LSE2) {
56
+ /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
57
+ asm("ldp %[l], %[h], %[mem]"
58
+ : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
59
+ } else {
60
+ /* The load must be paired with the store to guarantee not tearing. */
61
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
62
+ "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
63
+ "cbnz %w[tmp], 0b"
64
+ : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
65
+ }
66
67
return int128_make128(l, h);
68
}
69
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
70
uint64_t l = int128_getlo(val), h = int128_gethi(val);
71
uint64_t t1, t2;
72
73
- /* Load into temporaries to acquire the exclusive access lock. */
74
- asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
75
- "stxp %w[t1], %[l], %[h], %[mem]\n\t"
76
- "cbnz %w[t1], 0b"
77
- : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
78
- : [l] "r"(l), [h] "r"(h));
79
+ if (cpuinfo & CPUINFO_LSE2) {
80
+ /* With FEAT_LSE2, 16-byte aligned STP is atomic. */
81
+ asm("stp %[l], %[h], %[mem]"
82
+ : [mem] "=m"(*ptr) : [l] "r"(l), [h] "r"(h));
83
+ } else {
84
+ /* Load into temporaries to acquire the exclusive access lock. */
85
+ asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
86
+ "stxp %w[t1], %[l], %[h], %[mem]\n\t"
87
+ "cbnz %w[t1], 0b"
88
+ : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
89
+ : [l] "r"(l), [h] "r"(h));
90
+ }
91
}
92
93
#endif /* AARCH64_ATOMIC128_LDST_H */
94
--
95
2.34.1
96
97
diff view generated by jsdifflib
Deleted patch
1
This had been set since the beginning, is never undefined,
2
and it would seem to be harmful to debugging to do so.
3
1
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/exec-all.h | 3 ---
8
accel/tcg/cpu-exec.c | 2 --
9
accel/tcg/translate-all.c | 2 --
10
accel/tcg/translator.c | 2 --
11
target/sh4/translate.c | 2 --
12
target/sparc/translate.c | 2 --
13
tcg/tcg.c | 9 +--------
14
7 files changed, 1 insertion(+), 21 deletions(-)
15
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/exec-all.h
19
+++ b/include/exec/exec-all.h
20
@@ -XXX,XX +XXX,XX @@
21
#include "qemu/interval-tree.h"
22
#include "qemu/clang-tsa.h"
23
24
-/* allow to see translation results - the slowdown should be negligible, so we leave it */
25
-#define DEBUG_DISAS
26
-
27
/* Page tracking code uses ram addresses in system mode, and virtual
28
addresses in userspace mode. Define tb_page_addr_t to be an appropriate
29
type. */
30
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/accel/tcg/cpu-exec.c
33
+++ b/accel/tcg/cpu-exec.c
34
@@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
35
cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
36
tb->flags, tb->cflags, lookup_symbol(pc));
37
38
-#if defined(DEBUG_DISAS)
39
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
40
FILE *logfile = qemu_log_trylock();
41
if (logfile) {
42
@@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
43
qemu_log_unlock(logfile);
44
}
45
}
46
-#endif /* DEBUG_DISAS */
47
}
48
}
49
50
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/accel/tcg/translate-all.c
53
+++ b/accel/tcg/translate-all.c
54
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
55
qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
56
#endif
57
58
-#ifdef DEBUG_DISAS
59
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
60
qemu_log_in_addr_range(pc)) {
61
FILE *logfile = qemu_log_trylock();
62
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
63
qemu_log_unlock(logfile);
64
}
65
}
66
-#endif
67
68
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
69
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
70
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/accel/tcg/translator.c
73
+++ b/accel/tcg/translator.c
74
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
75
tb->size = db->pc_next - db->pc_first;
76
tb->icount = db->num_insns;
77
78
-#ifdef DEBUG_DISAS
79
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
80
&& qemu_log_in_addr_range(db->pc_first)) {
81
FILE *logfile = qemu_log_trylock();
82
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
83
qemu_log_unlock(logfile);
84
}
85
}
86
-#endif
87
}
88
89
static void *translator_access(CPUArchState *env, DisasContextBase *db,
90
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/sh4/translate.c
93
+++ b/target/sh4/translate.c
94
@@ -XXX,XX +XXX,XX @@
95
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
96
*/
97
98
-#define DEBUG_DISAS
99
-
100
#include "qemu/osdep.h"
101
#include "cpu.h"
102
#include "disas/disas.h"
103
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/sparc/translate.c
106
+++ b/target/sparc/translate.c
107
@@ -XXX,XX +XXX,XX @@
108
#include "asi.h"
109
110
111
-#define DEBUG_DISAS
112
-
113
#define DYNAMIC_PC 1 /* dynamic pc value */
114
#define JUMP_PC 2 /* dynamic pc value which takes only two values
115
according to jump_pc[T2] */
116
diff --git a/tcg/tcg.c b/tcg/tcg.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/tcg.c
119
+++ b/tcg/tcg.c
120
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
121
(uintptr_t)s->code_buf, prologue_size);
122
#endif
123
124
-#ifdef DEBUG_DISAS
125
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
126
FILE *logfile = qemu_log_trylock();
127
if (logfile) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s)
129
qemu_log_unlock(logfile);
130
}
131
}
132
-#endif
133
134
#ifndef CONFIG_TCG_INTERPRETER
135
/*
136
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
137
}
138
#endif
139
140
-#ifdef DEBUG_DISAS
141
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
142
&& qemu_log_in_addr_range(pc_start))) {
143
FILE *logfile = qemu_log_trylock();
144
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
145
qemu_log_unlock(logfile);
146
}
147
}
148
-#endif
149
150
#ifdef CONFIG_DEBUG_TCG
151
/* Ensure all labels referenced have been emitted. */
152
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
153
liveness_pass_1(s);
154
155
if (s->nb_indirects > 0) {
156
-#ifdef DEBUG_DISAS
157
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
158
&& qemu_log_in_addr_range(pc_start))) {
159
FILE *logfile = qemu_log_trylock();
160
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
161
qemu_log_unlock(logfile);
162
}
163
}
164
-#endif
165
+
166
/* Replace indirect temps with direct temps. */
167
if (liveness_pass_2(s)) {
168
/* If changes were made, re-run liveness. */
169
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
170
qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
171
#endif
172
173
-#ifdef DEBUG_DISAS
174
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
175
&& qemu_log_in_addr_range(pc_start))) {
176
FILE *logfile = qemu_log_trylock();
177
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
178
qemu_log_unlock(logfile);
179
}
180
}
181
-#endif
182
183
/* Initialize goto_tb jump offsets. */
184
tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
185
--
186
2.34.1
187
188
diff view generated by jsdifflib
Deleted patch
1
This is always defined, and the optimization pass is
2
essential to producing reasonable code.
3
1
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 5 -----
8
1 file changed, 5 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@
15
* THE SOFTWARE.
16
*/
17
18
-/* define it to use liveness analysis (better code) */
19
-#define USE_TCG_OPTIMIZATIONS
20
-
21
#include "qemu/osdep.h"
22
23
/* Define to jump the ELF file used to communicate with GDB. */
24
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
25
qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
26
#endif
27
28
-#ifdef USE_TCG_OPTIMIZATIONS
29
tcg_optimize(s);
30
-#endif
31
32
#ifdef CONFIG_PROFILER
33
qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
34
--
35
2.34.1
36
37
diff view generated by jsdifflib