1
I have not been able to prod reviews of all of the rotate patches
1
The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:
2
in 4 weeks, but let's not let that block ARM work forever.
3
2
4
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)
5
r~
6
7
8
The following changes since commit cccdd8c7971896c339d59c9c5d4647d4ffd9568a:
9
10
Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request' into staging (2020-06-02 10:25:55 +0100)
11
4
12
are available in the Git repository at:
5
are available in the Git repository at:
13
6
14
https://github.com/rth7680/qemu.git tags/pull-tcg-20200602
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210
15
8
16
for you to fetch changes up to 71b04329c4f7d5824a289ca5225e1883a278cf3b:
9
for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:
17
10
18
accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler (2020-06-02 08:42:37 -0700)
11
accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)
19
12
20
----------------------------------------------------------------
13
----------------------------------------------------------------
21
Vector rotate support
14
Split CpusAccel for tcg variants
22
Signal handling support for NetBSD arm/aarch64
23
15
24
----------------------------------------------------------------
16
----------------------------------------------------------------
25
Nick Hudson (2):
17
Claudio Fontana (3):
26
accel/tcg: Adjust cpu_signal_handler for NetBSD/arm
18
accel/tcg: split CpusAccel into three TCG variants
27
accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler
19
accel/tcg: split tcg_start_vcpu_thread
20
accel/tcg: rename tcg-cpus functions to match module name
28
21
29
Richard Henderson (10):
22
accel/tcg/tcg-cpus-icount.h | 17 ++
30
tcg: Implement gvec support for rotate by immediate
23
accel/tcg/tcg-cpus-rr.h | 21 ++
31
tcg: Implement gvec support for rotate by vector
24
accel/tcg/tcg-cpus.h | 12 +-
32
tcg: Remove expansion to shift by vector from do_shifts
25
accel/tcg/tcg-all.c | 13 +-
33
tcg: Implement gvec support for rotate by scalar
26
accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
34
tcg/i386: Implement INDEX_op_rotl{i,s,v}_vec
27
accel/tcg/tcg-cpus-mttcg.c | 140 ++++++++++++
35
tcg/aarch64: Implement INDEX_op_rotl{i,v}_vec
28
accel/tcg/tcg-cpus-rr.c | 305 ++++++++++++++++++++++++++
36
tcg/ppc: Implement INDEX_op_rot[lr]v_vec
29
accel/tcg/tcg-cpus.c | 506 +-------------------------------------------
37
target/ppc: Use tcg_gen_gvec_rotlv
30
softmmu/icount.c | 2 +-
38
target/s390x: Use tcg_gen_gvec_rotl{i,s,v}
31
accel/tcg/meson.build | 9 +-
39
tcg: Improve move ops in liveness_pass_2
32
10 files changed, 670 insertions(+), 502 deletions(-)
33
create mode 100644 accel/tcg/tcg-cpus-icount.h
34
create mode 100644 accel/tcg/tcg-cpus-rr.h
35
create mode 100644 accel/tcg/tcg-cpus-icount.c
36
create mode 100644 accel/tcg/tcg-cpus-mttcg.c
37
create mode 100644 accel/tcg/tcg-cpus-rr.c
40
38
41
accel/tcg/tcg-runtime.h | 15 +++
42
include/tcg/tcg-op-gvec.h | 12 ++
43
include/tcg/tcg-op.h | 5 +
44
include/tcg/tcg-opc.h | 4 +
45
include/tcg/tcg.h | 3 +
46
target/ppc/helper.h | 4 -
47
target/s390x/helper.h | 4 -
48
tcg/aarch64/tcg-target.h | 3 +
49
tcg/aarch64/tcg-target.opc.h | 1 +
50
tcg/i386/tcg-target.h | 3 +
51
tcg/ppc/tcg-target.h | 3 +
52
tcg/ppc/tcg-target.opc.h | 1 -
53
accel/tcg/tcg-runtime-gvec.c | 144 ++++++++++++++++++++++++
54
accel/tcg/user-exec.c | 43 +++++++-
55
target/ppc/int_helper.c | 17 ---
56
target/ppc/translate/vmx-impl.inc.c | 8 +-
57
target/s390x/translate_vx.inc.c | 66 ++---------
58
target/s390x/vec_int_helper.c | 31 ------
59
tcg/aarch64/tcg-target.inc.c | 53 ++++++++-
60
tcg/i386/tcg-target.inc.c | 116 +++++++++++++++++---
61
tcg/ppc/tcg-target.inc.c | 23 +++-
62
tcg/tcg-op-gvec.c | 212 ++++++++++++++++++++++++++++++++++++
63
tcg/tcg-op-vec.c | 62 +++++++----
64
tcg/tcg.c | 85 +++++++++++----
65
target/s390x/insn-data.def | 4 +-
66
tcg/README | 7 +-
67
26 files changed, 736 insertions(+), 193 deletions(-)
68
diff view generated by jsdifflib
Deleted patch
1
No host backend support yet, but the interfaces for rotli
2
are in place. Canonicalize immediate rotate to the left,
3
based on a survey of architectures, but provide both left
4
and right shift interfaces to the translators.
5
1
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
accel/tcg/tcg-runtime.h | 5 +++
10
include/tcg/tcg-op-gvec.h | 6 ++++
11
include/tcg/tcg-op.h | 2 ++
12
include/tcg/tcg-opc.h | 1 +
13
include/tcg/tcg.h | 1 +
14
tcg/aarch64/tcg-target.h | 1 +
15
tcg/i386/tcg-target.h | 1 +
16
tcg/ppc/tcg-target.h | 1 +
17
accel/tcg/tcg-runtime-gvec.c | 48 +++++++++++++++++++++++++
18
tcg/tcg-op-gvec.c | 68 ++++++++++++++++++++++++++++++++++++
19
tcg/tcg-op-vec.c | 12 +++++++
20
tcg/tcg.c | 2 ++
21
tcg/README | 3 +-
22
13 files changed, 150 insertions(+), 1 deletion(-)
23
24
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/tcg-runtime.h
27
+++ b/accel/tcg/tcg-runtime.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
32
+DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+
37
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
41
index XXXXXXX..XXXXXXX 100644
42
--- a/include/tcg/tcg-op-gvec.h
43
+++ b/include/tcg/tcg-op-gvec.h
44
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
45
int64_t shift, uint32_t oprsz, uint32_t maxsz);
46
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
47
int64_t shift, uint32_t oprsz, uint32_t maxsz);
48
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
49
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
50
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
51
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
52
53
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
54
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
55
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
56
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
57
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
58
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
59
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
60
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
61
62
#endif
63
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
64
index XXXXXXX..XXXXXXX 100644
65
--- a/include/tcg/tcg-op.h
66
+++ b/include/tcg/tcg-op.h
67
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
68
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
69
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
70
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
71
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
72
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
73
74
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
75
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
76
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/tcg/tcg-opc.h
79
+++ b/include/tcg/tcg-opc.h
80
@@ -XXX,XX +XXX,XX @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
81
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
82
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
83
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
84
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
85
86
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
87
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
88
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg.h
91
+++ b/include/tcg/tcg.h
92
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
93
#define TCG_TARGET_HAS_not_vec 0
94
#define TCG_TARGET_HAS_andc_vec 0
95
#define TCG_TARGET_HAS_orc_vec 0
96
+#define TCG_TARGET_HAS_roti_vec 0
97
#define TCG_TARGET_HAS_shi_vec 0
98
#define TCG_TARGET_HAS_shs_vec 0
99
#define TCG_TARGET_HAS_shv_vec 0
100
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/aarch64/tcg-target.h
103
+++ b/tcg/aarch64/tcg-target.h
104
@@ -XXX,XX +XXX,XX @@ typedef enum {
105
#define TCG_TARGET_HAS_not_vec 1
106
#define TCG_TARGET_HAS_neg_vec 1
107
#define TCG_TARGET_HAS_abs_vec 1
108
+#define TCG_TARGET_HAS_roti_vec 0
109
#define TCG_TARGET_HAS_shi_vec 1
110
#define TCG_TARGET_HAS_shs_vec 0
111
#define TCG_TARGET_HAS_shv_vec 1
112
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
113
index XXXXXXX..XXXXXXX 100644
114
--- a/tcg/i386/tcg-target.h
115
+++ b/tcg/i386/tcg-target.h
116
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
117
#define TCG_TARGET_HAS_not_vec 0
118
#define TCG_TARGET_HAS_neg_vec 0
119
#define TCG_TARGET_HAS_abs_vec 1
120
+#define TCG_TARGET_HAS_roti_vec 0
121
#define TCG_TARGET_HAS_shi_vec 1
122
#define TCG_TARGET_HAS_shs_vec 1
123
#define TCG_TARGET_HAS_shv_vec have_avx2
124
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
125
index XXXXXXX..XXXXXXX 100644
126
--- a/tcg/ppc/tcg-target.h
127
+++ b/tcg/ppc/tcg-target.h
128
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
129
#define TCG_TARGET_HAS_not_vec 1
130
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
131
#define TCG_TARGET_HAS_abs_vec 0
132
+#define TCG_TARGET_HAS_roti_vec 0
133
#define TCG_TARGET_HAS_shi_vec 0
134
#define TCG_TARGET_HAS_shs_vec 0
135
#define TCG_TARGET_HAS_shv_vec 1
136
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/accel/tcg/tcg-runtime-gvec.c
139
+++ b/accel/tcg/tcg-runtime-gvec.c
140
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
141
clear_high(d, oprsz, desc);
142
}
143
144
+void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
145
+{
146
+ intptr_t oprsz = simd_oprsz(desc);
147
+ int shift = simd_data(desc);
148
+ intptr_t i;
149
+
150
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
151
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
152
+ }
153
+ clear_high(d, oprsz, desc);
154
+}
155
+
156
+void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
157
+{
158
+ intptr_t oprsz = simd_oprsz(desc);
159
+ int shift = simd_data(desc);
160
+ intptr_t i;
161
+
162
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
163
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
164
+ }
165
+ clear_high(d, oprsz, desc);
166
+}
167
+
168
+void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
169
+{
170
+ intptr_t oprsz = simd_oprsz(desc);
171
+ int shift = simd_data(desc);
172
+ intptr_t i;
173
+
174
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
175
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
176
+ }
177
+ clear_high(d, oprsz, desc);
178
+}
179
+
180
+void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
181
+{
182
+ intptr_t oprsz = simd_oprsz(desc);
183
+ int shift = simd_data(desc);
184
+ intptr_t i;
185
+
186
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
187
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
188
+ }
189
+ clear_high(d, oprsz, desc);
190
+}
191
+
192
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
193
{
194
intptr_t oprsz = simd_oprsz(desc);
195
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
196
index XXXXXXX..XXXXXXX 100644
197
--- a/tcg/tcg-op-gvec.c
198
+++ b/tcg/tcg-op-gvec.c
199
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
200
}
201
}
202
203
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
204
+{
205
+ uint64_t mask = dup_const(MO_8, 0xff << c);
206
+
207
+ tcg_gen_shli_i64(d, a, c);
208
+ tcg_gen_shri_i64(a, a, 8 - c);
209
+ tcg_gen_andi_i64(d, d, mask);
210
+ tcg_gen_andi_i64(a, a, ~mask);
211
+ tcg_gen_or_i64(d, d, a);
212
+}
213
+
214
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
215
+{
216
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
217
+
218
+ tcg_gen_shli_i64(d, a, c);
219
+ tcg_gen_shri_i64(a, a, 16 - c);
220
+ tcg_gen_andi_i64(d, d, mask);
221
+ tcg_gen_andi_i64(a, a, ~mask);
222
+ tcg_gen_or_i64(d, d, a);
223
+}
224
+
225
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
226
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
227
+{
228
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
229
+ static const GVecGen2i g[4] = {
230
+ { .fni8 = tcg_gen_vec_rotl8i_i64,
231
+ .fniv = tcg_gen_rotli_vec,
232
+ .fno = gen_helper_gvec_rotl8i,
233
+ .opt_opc = vecop_list,
234
+ .vece = MO_8 },
235
+ { .fni8 = tcg_gen_vec_rotl16i_i64,
236
+ .fniv = tcg_gen_rotli_vec,
237
+ .fno = gen_helper_gvec_rotl16i,
238
+ .opt_opc = vecop_list,
239
+ .vece = MO_16 },
240
+ { .fni4 = tcg_gen_rotli_i32,
241
+ .fniv = tcg_gen_rotli_vec,
242
+ .fno = gen_helper_gvec_rotl32i,
243
+ .opt_opc = vecop_list,
244
+ .vece = MO_32 },
245
+ { .fni8 = tcg_gen_rotli_i64,
246
+ .fniv = tcg_gen_rotli_vec,
247
+ .fno = gen_helper_gvec_rotl64i,
248
+ .opt_opc = vecop_list,
249
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
250
+ .vece = MO_64 },
251
+ };
252
+
253
+ tcg_debug_assert(vece <= MO_64);
254
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
255
+ if (shift == 0) {
256
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
257
+ } else {
258
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
259
+ }
260
+}
261
+
262
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
263
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
264
+{
265
+ tcg_debug_assert(vece <= MO_64);
266
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
267
+ tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
268
+ oprsz, maxsz);
269
+}
270
+
271
/*
272
* Specialized generation vector shifts by a non-constant scalar.
273
*/
274
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
275
index XXXXXXX..XXXXXXX 100644
276
--- a/tcg/tcg-op-vec.c
277
+++ b/tcg/tcg-op-vec.c
278
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
279
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
280
}
281
282
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
283
+{
284
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
285
+}
286
+
287
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
288
+{
289
+ int bits = 8 << vece;
290
+ tcg_debug_assert(i >= 0 && i < bits);
291
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
292
+}
293
+
294
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
295
TCGv_vec r, TCGv_vec a, TCGv_vec b)
296
{
297
diff --git a/tcg/tcg.c b/tcg/tcg.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/tcg/tcg.c
300
+++ b/tcg/tcg.c
301
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
302
case INDEX_op_shrv_vec:
303
case INDEX_op_sarv_vec:
304
return have_vec && TCG_TARGET_HAS_shv_vec;
305
+ case INDEX_op_rotli_vec:
306
+ return have_vec && TCG_TARGET_HAS_roti_vec;
307
case INDEX_op_ssadd_vec:
308
case INDEX_op_usadd_vec:
309
case INDEX_op_sssub_vec:
310
diff --git a/tcg/README b/tcg/README
311
index XXXXXXX..XXXXXXX 100644
312
--- a/tcg/README
313
+++ b/tcg/README
314
@@ -XXX,XX +XXX,XX @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
315
316
* shri_vec v0, v1, i2
317
* sari_vec v0, v1, i2
318
+* rotli_vec v0, v1, i2
319
* shrs_vec v0, v1, s2
320
* sars_vec v0, v1, s2
321
322
- Similarly for logical and arithmetic right shift.
323
+ Similarly for logical and arithmetic right shift, and left rotate.
324
325
* shlv_vec v0, v1, v2
326
327
--
328
2.25.1
329
330
diff view generated by jsdifflib
Deleted patch
1
No host backend support yet, but the interfaces for rotlv
2
and rotrv are in place.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
v3: Drop the generic expansion from rot to shift; we can do better
8
for each backend, and then this code becomes unused.
9
---
10
accel/tcg/tcg-runtime.h | 10 +++
11
include/tcg/tcg-op-gvec.h | 4 ++
12
include/tcg/tcg-op.h | 2 +
13
include/tcg/tcg-opc.h | 2 +
14
include/tcg/tcg.h | 1 +
15
tcg/aarch64/tcg-target.h | 1 +
16
tcg/i386/tcg-target.h | 1 +
17
tcg/ppc/tcg-target.h | 1 +
18
accel/tcg/tcg-runtime-gvec.c | 96 +++++++++++++++++++++++++++
19
tcg/tcg-op-gvec.c | 122 +++++++++++++++++++++++++++++++++++
20
tcg/tcg-op-vec.c | 10 +++
21
tcg/tcg.c | 3 +
22
tcg/README | 4 +-
23
13 files changed, 256 insertions(+), 1 deletion(-)
24
25
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/accel/tcg/tcg-runtime.h
28
+++ b/accel/tcg/tcg-runtime.h
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
33
+DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+
38
+DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
+
43
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
45
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/tcg/tcg-op-gvec.h
49
+++ b/include/tcg/tcg-op-gvec.h
50
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
51
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
52
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
53
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
54
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
55
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
56
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
57
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
58
59
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
60
uint32_t aofs, uint32_t bofs,
61
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-op.h
64
+++ b/include/tcg/tcg-op.h
65
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
66
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
67
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
68
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
69
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
70
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
71
72
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
73
TCGv_vec a, TCGv_vec b);
74
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/include/tcg/tcg-opc.h
77
+++ b/include/tcg/tcg-opc.h
78
@@ -XXX,XX +XXX,XX @@ DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
79
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
80
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
81
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
82
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
83
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
84
85
DEF(cmp_vec, 1, 2, 1, IMPLVEC)
86
87
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
88
index XXXXXXX..XXXXXXX 100644
89
--- a/include/tcg/tcg.h
90
+++ b/include/tcg/tcg.h
91
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
92
#define TCG_TARGET_HAS_andc_vec 0
93
#define TCG_TARGET_HAS_orc_vec 0
94
#define TCG_TARGET_HAS_roti_vec 0
95
+#define TCG_TARGET_HAS_rotv_vec 0
96
#define TCG_TARGET_HAS_shi_vec 0
97
#define TCG_TARGET_HAS_shs_vec 0
98
#define TCG_TARGET_HAS_shv_vec 0
99
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/tcg/aarch64/tcg-target.h
102
+++ b/tcg/aarch64/tcg-target.h
103
@@ -XXX,XX +XXX,XX @@ typedef enum {
104
#define TCG_TARGET_HAS_neg_vec 1
105
#define TCG_TARGET_HAS_abs_vec 1
106
#define TCG_TARGET_HAS_roti_vec 0
107
+#define TCG_TARGET_HAS_rotv_vec 0
108
#define TCG_TARGET_HAS_shi_vec 1
109
#define TCG_TARGET_HAS_shs_vec 0
110
#define TCG_TARGET_HAS_shv_vec 1
111
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/tcg/i386/tcg-target.h
114
+++ b/tcg/i386/tcg-target.h
115
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
116
#define TCG_TARGET_HAS_neg_vec 0
117
#define TCG_TARGET_HAS_abs_vec 1
118
#define TCG_TARGET_HAS_roti_vec 0
119
+#define TCG_TARGET_HAS_rotv_vec 0
120
#define TCG_TARGET_HAS_shi_vec 1
121
#define TCG_TARGET_HAS_shs_vec 1
122
#define TCG_TARGET_HAS_shv_vec have_avx2
123
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
124
index XXXXXXX..XXXXXXX 100644
125
--- a/tcg/ppc/tcg-target.h
126
+++ b/tcg/ppc/tcg-target.h
127
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
128
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
129
#define TCG_TARGET_HAS_abs_vec 0
130
#define TCG_TARGET_HAS_roti_vec 0
131
+#define TCG_TARGET_HAS_rotv_vec 0
132
#define TCG_TARGET_HAS_shi_vec 0
133
#define TCG_TARGET_HAS_shs_vec 0
134
#define TCG_TARGET_HAS_shv_vec 1
135
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/accel/tcg/tcg-runtime-gvec.c
138
+++ b/accel/tcg/tcg-runtime-gvec.c
139
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
140
clear_high(d, oprsz, desc);
141
}
142
143
+void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
144
+{
145
+ intptr_t oprsz = simd_oprsz(desc);
146
+ intptr_t i;
147
+
148
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
149
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
150
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
151
+ }
152
+ clear_high(d, oprsz, desc);
153
+}
154
+
155
+void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
156
+{
157
+ intptr_t oprsz = simd_oprsz(desc);
158
+ intptr_t i;
159
+
160
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
161
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
162
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
163
+ }
164
+ clear_high(d, oprsz, desc);
165
+}
166
+
167
+void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
168
+{
169
+ intptr_t oprsz = simd_oprsz(desc);
170
+ intptr_t i;
171
+
172
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
173
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
174
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
175
+ }
176
+ clear_high(d, oprsz, desc);
177
+}
178
+
179
+void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
180
+{
181
+ intptr_t oprsz = simd_oprsz(desc);
182
+ intptr_t i;
183
+
184
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
185
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
186
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
187
+ }
188
+ clear_high(d, oprsz, desc);
189
+}
190
+
191
+void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
192
+{
193
+ intptr_t oprsz = simd_oprsz(desc);
194
+ intptr_t i;
195
+
196
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
197
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
198
+ *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
199
+ }
200
+ clear_high(d, oprsz, desc);
201
+}
202
+
203
+void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
204
+{
205
+ intptr_t oprsz = simd_oprsz(desc);
206
+ intptr_t i;
207
+
208
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
209
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
210
+ *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
211
+ }
212
+ clear_high(d, oprsz, desc);
213
+}
214
+
215
+void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
216
+{
217
+ intptr_t oprsz = simd_oprsz(desc);
218
+ intptr_t i;
219
+
220
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
221
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
222
+ *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
223
+ }
224
+ clear_high(d, oprsz, desc);
225
+}
226
+
227
+void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
228
+{
229
+ intptr_t oprsz = simd_oprsz(desc);
230
+ intptr_t i;
231
+
232
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
233
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
234
+ *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
235
+ }
236
+ clear_high(d, oprsz, desc);
237
+}
238
+
239
#define DO_CMP1(NAME, TYPE, OP) \
240
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
241
{ \
242
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/tcg/tcg-op-gvec.c
245
+++ b/tcg/tcg-op-gvec.c
246
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
247
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
248
}
249
250
+/*
251
+ * Similarly for rotates.
252
+ */
253
+
254
+static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
255
+ TCGv_vec a, TCGv_vec b)
256
+{
257
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
258
+
259
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
260
+ tcg_gen_and_vec(vece, t, t, b);
261
+ tcg_gen_rotlv_vec(vece, d, a, t);
262
+ tcg_temp_free_vec(t);
263
+}
264
+
265
+static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
266
+{
267
+ TCGv_i32 t = tcg_temp_new_i32();
268
+
269
+ tcg_gen_andi_i32(t, b, 31);
270
+ tcg_gen_rotl_i32(d, a, t);
271
+ tcg_temp_free_i32(t);
272
+}
273
+
274
+static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
275
+{
276
+ TCGv_i64 t = tcg_temp_new_i64();
277
+
278
+ tcg_gen_andi_i64(t, b, 63);
279
+ tcg_gen_rotl_i64(d, a, t);
280
+ tcg_temp_free_i64(t);
281
+}
282
+
283
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
284
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
285
+{
286
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
287
+ static const GVecGen3 g[4] = {
288
+ { .fniv = tcg_gen_rotlv_mod_vec,
289
+ .fno = gen_helper_gvec_rotl8v,
290
+ .opt_opc = vecop_list,
291
+ .vece = MO_8 },
292
+ { .fniv = tcg_gen_rotlv_mod_vec,
293
+ .fno = gen_helper_gvec_rotl16v,
294
+ .opt_opc = vecop_list,
295
+ .vece = MO_16 },
296
+ { .fni4 = tcg_gen_rotl_mod_i32,
297
+ .fniv = tcg_gen_rotlv_mod_vec,
298
+ .fno = gen_helper_gvec_rotl32v,
299
+ .opt_opc = vecop_list,
300
+ .vece = MO_32 },
301
+ { .fni8 = tcg_gen_rotl_mod_i64,
302
+ .fniv = tcg_gen_rotlv_mod_vec,
303
+ .fno = gen_helper_gvec_rotl64v,
304
+ .opt_opc = vecop_list,
305
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
306
+ .vece = MO_64 },
307
+ };
308
+
309
+ tcg_debug_assert(vece <= MO_64);
310
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
311
+}
312
+
313
+static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
314
+ TCGv_vec a, TCGv_vec b)
315
+{
316
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
317
+
318
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
319
+ tcg_gen_and_vec(vece, t, t, b);
320
+ tcg_gen_rotrv_vec(vece, d, a, t);
321
+ tcg_temp_free_vec(t);
322
+}
323
+
324
+static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
325
+{
326
+ TCGv_i32 t = tcg_temp_new_i32();
327
+
328
+ tcg_gen_andi_i32(t, b, 31);
329
+ tcg_gen_rotr_i32(d, a, t);
330
+ tcg_temp_free_i32(t);
331
+}
332
+
333
+static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
334
+{
335
+ TCGv_i64 t = tcg_temp_new_i64();
336
+
337
+ tcg_gen_andi_i64(t, b, 63);
338
+ tcg_gen_rotr_i64(d, a, t);
339
+ tcg_temp_free_i64(t);
340
+}
341
+
342
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
343
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
344
+{
345
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
346
+ static const GVecGen3 g[4] = {
347
+ { .fniv = tcg_gen_rotrv_mod_vec,
348
+ .fno = gen_helper_gvec_rotr8v,
349
+ .opt_opc = vecop_list,
350
+ .vece = MO_8 },
351
+ { .fniv = tcg_gen_rotrv_mod_vec,
352
+ .fno = gen_helper_gvec_rotr16v,
353
+ .opt_opc = vecop_list,
354
+ .vece = MO_16 },
355
+ { .fni4 = tcg_gen_rotr_mod_i32,
356
+ .fniv = tcg_gen_rotrv_mod_vec,
357
+ .fno = gen_helper_gvec_rotr32v,
358
+ .opt_opc = vecop_list,
359
+ .vece = MO_32 },
360
+ { .fni8 = tcg_gen_rotr_mod_i64,
361
+ .fniv = tcg_gen_rotrv_mod_vec,
362
+ .fno = gen_helper_gvec_rotr64v,
363
+ .opt_opc = vecop_list,
364
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
365
+ .vece = MO_64 },
366
+ };
367
+
368
+ tcg_debug_assert(vece <= MO_64);
369
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
370
+}
371
+
372
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
373
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
374
uint32_t oprsz, TCGCond cond)
375
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
376
index XXXXXXX..XXXXXXX 100644
377
--- a/tcg/tcg-op-vec.c
378
+++ b/tcg/tcg-op-vec.c
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
380
do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
381
}
382
383
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
384
+{
385
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
386
+}
387
+
388
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
389
+{
390
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
391
+}
392
+
393
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
394
TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
395
{
396
diff --git a/tcg/tcg.c b/tcg/tcg.c
397
index XXXXXXX..XXXXXXX 100644
398
--- a/tcg/tcg.c
399
+++ b/tcg/tcg.c
400
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
401
return have_vec && TCG_TARGET_HAS_shv_vec;
402
case INDEX_op_rotli_vec:
403
return have_vec && TCG_TARGET_HAS_roti_vec;
404
+ case INDEX_op_rotlv_vec:
405
+ case INDEX_op_rotrv_vec:
406
+ return have_vec && TCG_TARGET_HAS_rotv_vec;
407
case INDEX_op_ssadd_vec:
408
case INDEX_op_usadd_vec:
409
case INDEX_op_sssub_vec:
410
diff --git a/tcg/README b/tcg/README
411
index XXXXXXX..XXXXXXX 100644
412
--- a/tcg/README
413
+++ b/tcg/README
414
@@ -XXX,XX +XXX,XX @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
415
416
* shrv_vec v0, v1, v2
417
* sarv_vec v0, v1, v2
418
+* rotlv_vec v0, v1, v2
419
+* rotrv_vec v0, v1, v2
420
421
- Similarly for logical and arithmetic right shift.
422
+ Similarly for logical and arithmetic right shift, and rotates.
423
424
* cmp_vec v0, v1, v2, cond
425
426
--
427
2.25.1
428
429
diff view generated by jsdifflib
Deleted patch
1
We do not reflect this expansion in tcg_can_emit_vecop_list,
2
so it is unused and unusable. However, we actually perform
3
the same expansion in do_gvec_shifts, so it is also unneeded.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-vec.c | 35 +++++++++++------------------------
8
1 file changed, 11 insertions(+), 24 deletions(-)
9
10
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-vec.c
13
+++ b/tcg/tcg-op-vec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
15
}
16
17
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
18
- TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
19
+ TCGv_i32 s, TCGOpcode opc)
20
{
21
TCGTemp *rt = tcgv_vec_temp(r);
22
TCGTemp *at = tcgv_vec_temp(a);
23
@@ -XXX,XX +XXX,XX @@ static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
24
TCGArg ai = temp_arg(at);
25
TCGArg si = temp_arg(st);
26
TCGType type = rt->base_type;
27
- const TCGOpcode *hold_list;
28
int can;
29
30
tcg_debug_assert(at->base_type >= type);
31
- tcg_assert_listed_vecop(opc_s);
32
- hold_list = tcg_swap_vecop_list(NULL);
33
-
34
- can = tcg_can_emit_vec_op(opc_s, type, vece);
35
+ tcg_assert_listed_vecop(opc);
36
+ can = tcg_can_emit_vec_op(opc, type, vece);
37
if (can > 0) {
38
- vec_gen_3(opc_s, type, vece, ri, ai, si);
39
+ vec_gen_3(opc, type, vece, ri, ai, si);
40
} else if (can < 0) {
41
- tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
42
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
43
+ tcg_expand_vec_op(opc, type, vece, ri, ai, si);
44
+ tcg_swap_vecop_list(hold_list);
45
} else {
46
- TCGv_vec vec_s = tcg_temp_new_vec(type);
47
-
48
- if (vece == MO_64) {
49
- TCGv_i64 s64 = tcg_temp_new_i64();
50
- tcg_gen_extu_i32_i64(s64, s);
51
- tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
52
- tcg_temp_free_i64(s64);
53
- } else {
54
- tcg_gen_dup_i32_vec(vece, vec_s, s);
55
- }
56
- do_op3_nofail(vece, r, a, vec_s, opc_v);
57
- tcg_temp_free_vec(vec_s);
58
+ g_assert_not_reached();
59
}
60
- tcg_swap_vecop_list(hold_list);
61
}
62
63
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
64
{
65
- do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
66
+ do_shifts(vece, r, a, b, INDEX_op_shls_vec);
67
}
68
69
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
70
{
71
- do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
72
+ do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
73
}
74
75
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
76
{
77
- do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
78
+ do_shifts(vece, r, a, b, INDEX_op_sars_vec);
79
}
80
81
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
82
--
83
2.25.1
84
85
diff view generated by jsdifflib
1
For immediates, we must continue the special casing of 8-bit
1
From: Claudio Fontana <cfontana@suse.de>
2
elements. The other element sizes and shift types are trivially
3
implemented with shifts.
4
2
3
split up the CpusAccel tcg_cpus into three TCG variants:
4
5
tcg_cpus_rr (single threaded, round robin cpus)
6
tcg_cpus_icount (same as rr, but with instruction counting enabled)
7
tcg_cpus_mttcg (multi-threaded cpus)
8
9
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Claudio Fontana <cfontana@suse.de>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Message-Id: <20201015143217.29337-2-cfontana@suse.de>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
15
---
7
tcg/i386/tcg-target.inc.c | 116 ++++++++++++++++++++++++++++++++------
16
accel/tcg/tcg-cpus-icount.h | 17 ++
8
1 file changed, 100 insertions(+), 16 deletions(-)
17
accel/tcg/tcg-cpus-mttcg.h | 21 ++
18
accel/tcg/tcg-cpus-rr.h | 20 ++
19
accel/tcg/tcg-cpus.h | 13 +-
20
accel/tcg/tcg-all.c | 8 +-
21
accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
22
accel/tcg/tcg-cpus-mttcg.c | 117 +++++++++
23
accel/tcg/tcg-cpus-rr.c | 270 ++++++++++++++++++++
24
accel/tcg/tcg-cpus.c | 484 ++----------------------------------
25
softmmu/icount.c | 2 +-
26
accel/tcg/meson.build | 9 +-
27
11 files changed, 646 insertions(+), 462 deletions(-)
28
create mode 100644 accel/tcg/tcg-cpus-icount.h
29
create mode 100644 accel/tcg/tcg-cpus-mttcg.h
30
create mode 100644 accel/tcg/tcg-cpus-rr.h
31
create mode 100644 accel/tcg/tcg-cpus-icount.c
32
create mode 100644 accel/tcg/tcg-cpus-mttcg.c
33
create mode 100644 accel/tcg/tcg-cpus-rr.c
9
34
10
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
35
diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
36
new file mode 100644
37
index XXXXXXX..XXXXXXX
38
--- /dev/null
39
+++ b/accel/tcg/tcg-cpus-icount.h
40
@@ -XXX,XX +XXX,XX @@
41
+/*
42
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
43
+ *
44
+ * Copyright 2020 SUSE LLC
45
+ *
46
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
47
+ * See the COPYING file in the top-level directory.
48
+ */
49
+
50
+#ifndef TCG_CPUS_ICOUNT_H
51
+#define TCG_CPUS_ICOUNT_H
52
+
53
+void handle_icount_deadline(void);
54
+void prepare_icount_for_run(CPUState *cpu);
55
+void process_icount_data(CPUState *cpu);
56
+
57
+#endif /* TCG_CPUS_ICOUNT_H */
58
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
59
new file mode 100644
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/accel/tcg/tcg-cpus-mttcg.h
63
@@ -XXX,XX +XXX,XX @@
64
+/*
65
+ * QEMU TCG Multi Threaded vCPUs implementation
66
+ *
67
+ * Copyright 2020 SUSE LLC
68
+ *
69
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
70
+ * See the COPYING file in the top-level directory.
71
+ */
72
+
73
+#ifndef TCG_CPUS_MTTCG_H
74
+#define TCG_CPUS_MTTCG_H
75
+
76
+/*
77
+ * In the multi-threaded case each vCPU has its own thread. The TLS
78
+ * variable current_cpu can be used deep in the code to find the
79
+ * current CPUState for a given thread.
80
+ */
81
+
82
+void *tcg_cpu_thread_fn(void *arg);
83
+
84
+#endif /* TCG_CPUS_MTTCG_H */
85
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
86
new file mode 100644
87
index XXXXXXX..XXXXXXX
88
--- /dev/null
89
+++ b/accel/tcg/tcg-cpus-rr.h
90
@@ -XXX,XX +XXX,XX @@
91
+/*
92
+ * QEMU TCG Single Threaded vCPUs implementation
93
+ *
94
+ * Copyright 2020 SUSE LLC
95
+ *
96
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
97
+ * See the COPYING file in the top-level directory.
98
+ */
99
+
100
+#ifndef TCG_CPUS_RR_H
101
+#define TCG_CPUS_RR_H
102
+
103
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
104
+
105
+/* Kick all RR vCPUs. */
106
+void qemu_cpu_kick_rr_cpus(CPUState *unused);
107
+
108
+void *tcg_rr_cpu_thread_fn(void *arg);
109
+
110
+#endif /* TCG_CPUS_RR_H */
111
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
11
index XXXXXXX..XXXXXXX 100644
112
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.inc.c
113
--- a/accel/tcg/tcg-cpus.h
13
+++ b/tcg/i386/tcg-target.inc.c
114
+++ b/accel/tcg/tcg-cpus.h
14
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
115
@@ -XXX,XX +XXX,XX @@
15
case INDEX_op_shls_vec:
116
/*
16
case INDEX_op_shrs_vec:
117
- * Accelerator CPUS Interface
17
case INDEX_op_sars_vec:
118
+ * QEMU TCG vCPU common functionality
18
+ case INDEX_op_rotls_vec:
119
+ *
19
case INDEX_op_cmp_vec:
120
+ * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
20
case INDEX_op_x86_shufps_vec:
121
*
21
case INDEX_op_x86_blend_vec:
122
* Copyright 2020 SUSE LLC
22
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
123
*
23
case INDEX_op_xor_vec:
124
@@ -XXX,XX +XXX,XX @@
24
case INDEX_op_andc_vec:
125
25
return 1;
126
#include "sysemu/cpus.h"
26
+ case INDEX_op_rotli_vec:
127
27
case INDEX_op_cmp_vec:
128
-extern const CpusAccel tcg_cpus;
28
case INDEX_op_cmpsel_vec:
129
+extern const CpusAccel tcg_cpus_mttcg;
29
return -1;
130
+extern const CpusAccel tcg_cpus_icount;
30
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
131
+extern const CpusAccel tcg_cpus_rr;
31
return vece >= MO_16;
132
+
32
case INDEX_op_sars_vec:
133
+void tcg_start_vcpu_thread(CPUState *cpu);
33
return vece >= MO_16 && vece <= MO_32;
134
+void qemu_tcg_destroy_vcpu(CPUState *cpu);
34
+ case INDEX_op_rotls_vec:
135
+int tcg_cpu_exec(CPUState *cpu);
35
+ return vece >= MO_16 ? -1 : 0;
136
+void tcg_handle_interrupt(CPUState *cpu, int mask);
36
137
37
case INDEX_op_shlv_vec:
138
#endif /* TCG_CPUS_H */
38
case INDEX_op_shrv_vec:
139
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
39
return have_avx2 && vece >= MO_32;
140
index XXXXXXX..XXXXXXX 100644
40
case INDEX_op_sarv_vec:
141
--- a/accel/tcg/tcg-all.c
41
return have_avx2 && vece == MO_32;
142
+++ b/accel/tcg/tcg-all.c
42
+ case INDEX_op_rotlv_vec:
143
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
43
+ case INDEX_op_rotrv_vec:
144
44
+ return have_avx2 && vece >= MO_32 ? -1 : 0;
145
tcg_exec_init(s->tb_size * 1024 * 1024);
45
146
mttcg_enabled = s->mttcg_enabled;
46
case INDEX_op_mul_vec:
147
- cpus_register_accel(&tcg_cpus);
47
if (vece == MO_8) {
148
48
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
149
+ if (mttcg_enabled) {
150
+ cpus_register_accel(&tcg_cpus_mttcg);
151
+ } else if (icount_enabled()) {
152
+ cpus_register_accel(&tcg_cpus_icount);
153
+ } else {
154
+ cpus_register_accel(&tcg_cpus_rr);
155
+ }
156
return 0;
157
}
158
159
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
160
new file mode 100644
161
index XXXXXXX..XXXXXXX
162
--- /dev/null
163
+++ b/accel/tcg/tcg-cpus-icount.c
164
@@ -XXX,XX +XXX,XX @@
165
+/*
166
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
167
+ *
168
+ * Copyright (c) 2003-2008 Fabrice Bellard
169
+ * Copyright (c) 2014 Red Hat Inc.
170
+ *
171
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
172
+ * of this software and associated documentation files (the "Software"), to deal
173
+ * in the Software without restriction, including without limitation the rights
174
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
175
+ * copies of the Software, and to permit persons to whom the Software is
176
+ * furnished to do so, subject to the following conditions:
177
+ *
178
+ * The above copyright notice and this permission notice shall be included in
179
+ * all copies or substantial portions of the Software.
180
+ *
181
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
182
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
183
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
184
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
185
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
186
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
187
+ * THE SOFTWARE.
188
+ */
189
+
190
+#include "qemu/osdep.h"
191
+#include "qemu-common.h"
192
+#include "sysemu/tcg.h"
193
+#include "sysemu/replay.h"
194
+#include "qemu/main-loop.h"
195
+#include "qemu/guest-random.h"
196
+#include "exec/exec-all.h"
197
+#include "hw/boards.h"
198
+
199
+#include "tcg-cpus.h"
200
+#include "tcg-cpus-icount.h"
201
+#include "tcg-cpus-rr.h"
202
+
203
+static int64_t tcg_get_icount_limit(void)
204
+{
205
+ int64_t deadline;
206
+
207
+ if (replay_mode != REPLAY_MODE_PLAY) {
208
+ /*
209
+ * Include all the timers, because they may need an attention.
210
+ * Too long CPU execution may create unnecessary delay in UI.
211
+ */
212
+ deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
213
+ QEMU_TIMER_ATTR_ALL);
214
+ /* Check realtime timers, because they help with input processing */
215
+ deadline = qemu_soonest_timeout(deadline,
216
+ qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
217
+ QEMU_TIMER_ATTR_ALL));
218
+
219
+ /*
220
+ * Maintain prior (possibly buggy) behaviour where if no deadline
221
+ * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
222
+ * INT32_MAX nanoseconds ahead, we still use INT32_MAX
223
+ * nanoseconds.
224
+ */
225
+ if ((deadline < 0) || (deadline > INT32_MAX)) {
226
+ deadline = INT32_MAX;
227
+ }
228
+
229
+ return icount_round(deadline);
230
+ } else {
231
+ return replay_get_instructions();
232
+ }
233
+}
234
+
235
+static void notify_aio_contexts(void)
236
+{
237
+ /* Wake up other AioContexts. */
238
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
239
+ qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
240
+}
241
+
242
+void handle_icount_deadline(void)
243
+{
244
+ assert(qemu_in_vcpu_thread());
245
+ int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
246
+ QEMU_TIMER_ATTR_ALL);
247
+
248
+ if (deadline == 0) {
249
+ notify_aio_contexts();
250
+ }
251
+}
252
+
253
+void prepare_icount_for_run(CPUState *cpu)
254
+{
255
+ int insns_left;
256
+
257
+ /*
258
+ * These should always be cleared by process_icount_data after
259
+ * each vCPU execution. However u16.high can be raised
260
+ * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
261
+ */
262
+ g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
263
+ g_assert(cpu->icount_extra == 0);
264
+
265
+ cpu->icount_budget = tcg_get_icount_limit();
266
+ insns_left = MIN(0xffff, cpu->icount_budget);
267
+ cpu_neg(cpu)->icount_decr.u16.low = insns_left;
268
+ cpu->icount_extra = cpu->icount_budget - insns_left;
269
+
270
+ replay_mutex_lock();
271
+
272
+ if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
273
+ notify_aio_contexts();
274
+ }
275
+}
276
+
277
+void process_icount_data(CPUState *cpu)
278
+{
279
+ /* Account for executed instructions */
280
+ icount_update(cpu);
281
+
282
+ /* Reset the counters */
283
+ cpu_neg(cpu)->icount_decr.u16.low = 0;
284
+ cpu->icount_extra = 0;
285
+ cpu->icount_budget = 0;
286
+
287
+ replay_account_executed_instructions();
288
+
289
+ replay_mutex_unlock();
290
+}
291
+
292
+static void icount_handle_interrupt(CPUState *cpu, int mask)
293
+{
294
+ int old_mask = cpu->interrupt_request;
295
+
296
+ tcg_handle_interrupt(cpu, mask);
297
+ if (qemu_cpu_is_self(cpu) &&
298
+ !cpu->can_do_io
299
+ && (mask & ~old_mask) != 0) {
300
+ cpu_abort(cpu, "Raised interrupt while not in I/O function");
301
+ }
302
+}
303
+
304
+const CpusAccel tcg_cpus_icount = {
305
+ .create_vcpu_thread = tcg_start_vcpu_thread,
306
+ .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
307
+
308
+ .handle_interrupt = icount_handle_interrupt,
309
+ .get_virtual_clock = icount_get,
310
+ .get_elapsed_ticks = icount_get,
311
+};
312
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
313
new file mode 100644
314
index XXXXXXX..XXXXXXX
315
--- /dev/null
316
+++ b/accel/tcg/tcg-cpus-mttcg.c
317
@@ -XXX,XX +XXX,XX @@
318
+/*
319
+ * QEMU TCG Multi Threaded vCPUs implementation
320
+ *
321
+ * Copyright (c) 2003-2008 Fabrice Bellard
322
+ * Copyright (c) 2014 Red Hat Inc.
323
+ *
324
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
325
+ * of this software and associated documentation files (the "Software"), to deal
326
+ * in the Software without restriction, including without limitation the rights
327
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
328
+ * copies of the Software, and to permit persons to whom the Software is
329
+ * furnished to do so, subject to the following conditions:
330
+ *
331
+ * The above copyright notice and this permission notice shall be included in
332
+ * all copies or substantial portions of the Software.
333
+ *
334
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
335
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
336
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
337
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
338
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
339
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
340
+ * THE SOFTWARE.
341
+ */
342
+
343
+#include "qemu/osdep.h"
344
+#include "qemu-common.h"
345
+#include "sysemu/tcg.h"
346
+#include "sysemu/replay.h"
347
+#include "qemu/main-loop.h"
348
+#include "qemu/guest-random.h"
349
+#include "exec/exec-all.h"
350
+#include "hw/boards.h"
351
+
352
+#include "tcg-cpus.h"
353
+#include "tcg-cpus-mttcg.h"
354
+
355
+/*
356
+ * In the multi-threaded case each vCPU has its own thread. The TLS
357
+ * variable current_cpu can be used deep in the code to find the
358
+ * current CPUState for a given thread.
359
+ */
360
+
361
+void *tcg_cpu_thread_fn(void *arg)
362
+{
363
+ CPUState *cpu = arg;
364
+
365
+ assert(tcg_enabled());
366
+ g_assert(!icount_enabled());
367
+
368
+ rcu_register_thread();
369
+ tcg_register_thread();
370
+
371
+ qemu_mutex_lock_iothread();
372
+ qemu_thread_get_self(cpu->thread);
373
+
374
+ cpu->thread_id = qemu_get_thread_id();
375
+ cpu->can_do_io = 1;
376
+ current_cpu = cpu;
377
+ cpu_thread_signal_created(cpu);
378
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
379
+
380
+ /* process any pending work */
381
+ cpu->exit_request = 1;
382
+
383
+ do {
384
+ if (cpu_can_run(cpu)) {
385
+ int r;
386
+ qemu_mutex_unlock_iothread();
387
+ r = tcg_cpu_exec(cpu);
388
+ qemu_mutex_lock_iothread();
389
+ switch (r) {
390
+ case EXCP_DEBUG:
391
+ cpu_handle_guest_debug(cpu);
392
+ break;
393
+ case EXCP_HALTED:
394
+ /*
395
+ * during start-up the vCPU is reset and the thread is
396
+ * kicked several times. If we don't ensure we go back
397
+ * to sleep in the halted state we won't cleanly
398
+ * start-up when the vCPU is enabled.
399
+ *
400
+ * cpu->halted should ensure we sleep in wait_io_event
401
+ */
402
+ g_assert(cpu->halted);
403
+ break;
404
+ case EXCP_ATOMIC:
405
+ qemu_mutex_unlock_iothread();
406
+ cpu_exec_step_atomic(cpu);
407
+ qemu_mutex_lock_iothread();
408
+ default:
409
+ /* Ignore everything else? */
410
+ break;
411
+ }
412
+ }
413
+
414
+ qatomic_mb_set(&cpu->exit_request, 0);
415
+ qemu_wait_io_event(cpu);
416
+ } while (!cpu->unplug || cpu_can_run(cpu));
417
+
418
+ qemu_tcg_destroy_vcpu(cpu);
419
+ qemu_mutex_unlock_iothread();
420
+ rcu_unregister_thread();
421
+ return NULL;
422
+}
423
+
424
+static void mttcg_kick_vcpu_thread(CPUState *cpu)
425
+{
426
+ cpu_exit(cpu);
427
+}
428
+
429
+const CpusAccel tcg_cpus_mttcg = {
430
+ .create_vcpu_thread = tcg_start_vcpu_thread,
431
+ .kick_vcpu_thread = mttcg_kick_vcpu_thread,
432
+
433
+ .handle_interrupt = tcg_handle_interrupt,
434
+};
435
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
436
new file mode 100644
437
index XXXXXXX..XXXXXXX
438
--- /dev/null
439
+++ b/accel/tcg/tcg-cpus-rr.c
440
@@ -XXX,XX +XXX,XX @@
441
+/*
442
+ * QEMU TCG Single Threaded vCPUs implementation
443
+ *
444
+ * Copyright (c) 2003-2008 Fabrice Bellard
445
+ * Copyright (c) 2014 Red Hat Inc.
446
+ *
447
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
448
+ * of this software and associated documentation files (the "Software"), to deal
449
+ * in the Software without restriction, including without limitation the rights
450
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
451
+ * copies of the Software, and to permit persons to whom the Software is
452
+ * furnished to do so, subject to the following conditions:
453
+ *
454
+ * The above copyright notice and this permission notice shall be included in
455
+ * all copies or substantial portions of the Software.
456
+ *
457
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
458
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
459
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
460
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
461
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
462
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
463
+ * THE SOFTWARE.
464
+ */
465
+
466
+#include "qemu/osdep.h"
467
+#include "qemu-common.h"
468
+#include "sysemu/tcg.h"
469
+#include "sysemu/replay.h"
470
+#include "qemu/main-loop.h"
471
+#include "qemu/guest-random.h"
472
+#include "exec/exec-all.h"
473
+#include "hw/boards.h"
474
+
475
+#include "tcg-cpus.h"
476
+#include "tcg-cpus-rr.h"
477
+#include "tcg-cpus-icount.h"
478
+
479
+/* Kick all RR vCPUs */
480
+void qemu_cpu_kick_rr_cpus(CPUState *unused)
481
+{
482
+ CPUState *cpu;
483
+
484
+ CPU_FOREACH(cpu) {
485
+ cpu_exit(cpu);
486
+ };
487
+}
488
+
489
+/*
490
+ * TCG vCPU kick timer
491
+ *
492
+ * The kick timer is responsible for moving single threaded vCPU
493
+ * emulation on to the next vCPU. If more than one vCPU is running a
494
+ * timer event with force a cpu->exit so the next vCPU can get
495
+ * scheduled.
496
+ *
497
+ * The timer is removed if all vCPUs are idle and restarted again once
498
+ * idleness is complete.
499
+ */
500
+
501
+static QEMUTimer *tcg_kick_vcpu_timer;
502
+static CPUState *tcg_current_rr_cpu;
503
+
504
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
505
+
506
+static inline int64_t qemu_tcg_next_kick(void)
507
+{
508
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
509
+}
510
+
511
+/* Kick the currently round-robin scheduled vCPU to next */
512
+static void qemu_cpu_kick_rr_next_cpu(void)
513
+{
514
+ CPUState *cpu;
515
+ do {
516
+ cpu = qatomic_mb_read(&tcg_current_rr_cpu);
517
+ if (cpu) {
518
+ cpu_exit(cpu);
519
+ }
520
+ } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
521
+}
522
+
523
+static void kick_tcg_thread(void *opaque)
524
+{
525
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
526
+ qemu_cpu_kick_rr_next_cpu();
527
+}
528
+
529
+static void start_tcg_kick_timer(void)
530
+{
531
+ if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
532
+ tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
533
+ kick_tcg_thread, NULL);
534
+ }
535
+ if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
536
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
537
+ }
538
+}
539
+
540
+static void stop_tcg_kick_timer(void)
541
+{
542
+ if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
543
+ timer_del(tcg_kick_vcpu_timer);
544
+ }
545
+}
546
+
547
+static void qemu_tcg_rr_wait_io_event(void)
548
+{
549
+ CPUState *cpu;
550
+
551
+ while (all_cpu_threads_idle()) {
552
+ stop_tcg_kick_timer();
553
+ qemu_cond_wait_iothread(first_cpu->halt_cond);
554
+ }
555
+
556
+ start_tcg_kick_timer();
557
+
558
+ CPU_FOREACH(cpu) {
559
+ qemu_wait_io_event_common(cpu);
560
+ }
561
+}
562
+
563
+/*
564
+ * Destroy any remaining vCPUs which have been unplugged and have
565
+ * finished running
566
+ */
567
+static void deal_with_unplugged_cpus(void)
568
+{
569
+ CPUState *cpu;
570
+
571
+ CPU_FOREACH(cpu) {
572
+ if (cpu->unplug && !cpu_can_run(cpu)) {
573
+ qemu_tcg_destroy_vcpu(cpu);
574
+ break;
575
+ }
576
+ }
577
+}
578
+
579
+/*
580
+ * In the single-threaded case each vCPU is simulated in turn. If
581
+ * there is more than a single vCPU we create a simple timer to kick
582
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
583
+ * This is done explicitly rather than relying on side-effects
584
+ * elsewhere.
585
+ */
586
+
587
+void *tcg_rr_cpu_thread_fn(void *arg)
588
+{
589
+ CPUState *cpu = arg;
590
+
591
+ assert(tcg_enabled());
592
+ rcu_register_thread();
593
+ tcg_register_thread();
594
+
595
+ qemu_mutex_lock_iothread();
596
+ qemu_thread_get_self(cpu->thread);
597
+
598
+ cpu->thread_id = qemu_get_thread_id();
599
+ cpu->can_do_io = 1;
600
+ cpu_thread_signal_created(cpu);
601
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
602
+
603
+ /* wait for initial kick-off after machine start */
604
+ while (first_cpu->stopped) {
605
+ qemu_cond_wait_iothread(first_cpu->halt_cond);
606
+
607
+ /* process any pending work */
608
+ CPU_FOREACH(cpu) {
609
+ current_cpu = cpu;
610
+ qemu_wait_io_event_common(cpu);
611
+ }
612
+ }
613
+
614
+ start_tcg_kick_timer();
615
+
616
+ cpu = first_cpu;
617
+
618
+ /* process any pending work */
619
+ cpu->exit_request = 1;
620
+
621
+ while (1) {
622
+ qemu_mutex_unlock_iothread();
623
+ replay_mutex_lock();
624
+ qemu_mutex_lock_iothread();
625
+
626
+ if (icount_enabled()) {
627
+ /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
628
+ icount_account_warp_timer();
629
+ /*
630
+ * Run the timers here. This is much more efficient than
631
+ * waking up the I/O thread and waiting for completion.
632
+ */
633
+ handle_icount_deadline();
634
+ }
635
+
636
+ replay_mutex_unlock();
637
+
638
+ if (!cpu) {
639
+ cpu = first_cpu;
640
+ }
641
+
642
+ while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
643
+
644
+ qatomic_mb_set(&tcg_current_rr_cpu, cpu);
645
+ current_cpu = cpu;
646
+
647
+ qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
648
+ (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
649
+
650
+ if (cpu_can_run(cpu)) {
651
+ int r;
652
+
653
+ qemu_mutex_unlock_iothread();
654
+ if (icount_enabled()) {
655
+ prepare_icount_for_run(cpu);
656
+ }
657
+ r = tcg_cpu_exec(cpu);
658
+ if (icount_enabled()) {
659
+ process_icount_data(cpu);
660
+ }
661
+ qemu_mutex_lock_iothread();
662
+
663
+ if (r == EXCP_DEBUG) {
664
+ cpu_handle_guest_debug(cpu);
665
+ break;
666
+ } else if (r == EXCP_ATOMIC) {
667
+ qemu_mutex_unlock_iothread();
668
+ cpu_exec_step_atomic(cpu);
669
+ qemu_mutex_lock_iothread();
670
+ break;
671
+ }
672
+ } else if (cpu->stop) {
673
+ if (cpu->unplug) {
674
+ cpu = CPU_NEXT(cpu);
675
+ }
676
+ break;
677
+ }
678
+
679
+ cpu = CPU_NEXT(cpu);
680
+ } /* while (cpu && !cpu->exit_request).. */
681
+
682
+ /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
683
+ qatomic_set(&tcg_current_rr_cpu, NULL);
684
+
685
+ if (cpu && cpu->exit_request) {
686
+ qatomic_mb_set(&cpu->exit_request, 0);
687
+ }
688
+
689
+ if (icount_enabled() && all_cpu_threads_idle()) {
690
+ /*
691
+ * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
692
+ * in the main_loop, wake it up in order to start the warp timer.
693
+ */
694
+ qemu_notify_event();
695
+ }
696
+
697
+ qemu_tcg_rr_wait_io_event();
698
+ deal_with_unplugged_cpus();
699
+ }
700
+
701
+ rcu_unregister_thread();
702
+ return NULL;
703
+}
704
+
705
+const CpusAccel tcg_cpus_rr = {
706
+ .create_vcpu_thread = tcg_start_vcpu_thread,
707
+ .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
708
+
709
+ .handle_interrupt = tcg_handle_interrupt,
710
+};
711
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
712
index XXXXXXX..XXXXXXX 100644
713
--- a/accel/tcg/tcg-cpus.c
714
+++ b/accel/tcg/tcg-cpus.c
715
@@ -XXX,XX +XXX,XX @@
716
/*
717
- * QEMU System Emulator
718
+ * QEMU TCG vCPU common functionality
719
+ *
720
+ * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
721
*
722
* Copyright (c) 2003-2008 Fabrice Bellard
723
* Copyright (c) 2014 Red Hat Inc.
724
@@ -XXX,XX +XXX,XX @@
725
#include "hw/boards.h"
726
727
#include "tcg-cpus.h"
728
+#include "tcg-cpus-mttcg.h"
729
+#include "tcg-cpus-rr.h"
730
731
-/* Kick all RR vCPUs */
732
-static void qemu_cpu_kick_rr_cpus(void)
733
-{
734
- CPUState *cpu;
735
+/* common functionality among all TCG variants */
736
737
- CPU_FOREACH(cpu) {
738
- cpu_exit(cpu);
739
- };
740
-}
741
-
742
-static void tcg_kick_vcpu_thread(CPUState *cpu)
743
-{
744
- if (qemu_tcg_mttcg_enabled()) {
745
- cpu_exit(cpu);
746
- } else {
747
- qemu_cpu_kick_rr_cpus();
748
- }
749
-}
750
-
751
-/*
752
- * TCG vCPU kick timer
753
- *
754
- * The kick timer is responsible for moving single threaded vCPU
755
- * emulation on to the next vCPU. If more than one vCPU is running a
756
- * timer event with force a cpu->exit so the next vCPU can get
757
- * scheduled.
758
- *
759
- * The timer is removed if all vCPUs are idle and restarted again once
760
- * idleness is complete.
761
- */
762
-
763
-static QEMUTimer *tcg_kick_vcpu_timer;
764
-static CPUState *tcg_current_rr_cpu;
765
-
766
-#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
767
-
768
-static inline int64_t qemu_tcg_next_kick(void)
769
-{
770
- return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
771
-}
772
-
773
-/* Kick the currently round-robin scheduled vCPU to next */
774
-static void qemu_cpu_kick_rr_next_cpu(void)
775
-{
776
- CPUState *cpu;
777
- do {
778
- cpu = qatomic_mb_read(&tcg_current_rr_cpu);
779
- if (cpu) {
780
- cpu_exit(cpu);
781
- }
782
- } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
783
-}
784
-
785
-static void kick_tcg_thread(void *opaque)
786
-{
787
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
788
- qemu_cpu_kick_rr_next_cpu();
789
-}
790
-
791
-static void start_tcg_kick_timer(void)
792
-{
793
- assert(!mttcg_enabled);
794
- if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
795
- tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
796
- kick_tcg_thread, NULL);
797
- }
798
- if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
799
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
800
- }
801
-}
802
-
803
-static void stop_tcg_kick_timer(void)
804
-{
805
- assert(!mttcg_enabled);
806
- if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
807
- timer_del(tcg_kick_vcpu_timer);
808
- }
809
-}
810
-
811
-static void qemu_tcg_destroy_vcpu(CPUState *cpu)
812
-{
813
-}
814
-
815
-static void qemu_tcg_rr_wait_io_event(void)
816
-{
817
- CPUState *cpu;
818
-
819
- while (all_cpu_threads_idle()) {
820
- stop_tcg_kick_timer();
821
- qemu_cond_wait_iothread(first_cpu->halt_cond);
822
- }
823
-
824
- start_tcg_kick_timer();
825
-
826
- CPU_FOREACH(cpu) {
827
- qemu_wait_io_event_common(cpu);
828
- }
829
-}
830
-
831
-static int64_t tcg_get_icount_limit(void)
832
-{
833
- int64_t deadline;
834
-
835
- if (replay_mode != REPLAY_MODE_PLAY) {
836
- /*
837
- * Include all the timers, because they may need an attention.
838
- * Too long CPU execution may create unnecessary delay in UI.
839
- */
840
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
841
- QEMU_TIMER_ATTR_ALL);
842
- /* Check realtime timers, because they help with input processing */
843
- deadline = qemu_soonest_timeout(deadline,
844
- qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
845
- QEMU_TIMER_ATTR_ALL));
846
-
847
- /*
848
- * Maintain prior (possibly buggy) behaviour where if no deadline
849
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
850
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
851
- * nanoseconds.
852
- */
853
- if ((deadline < 0) || (deadline > INT32_MAX)) {
854
- deadline = INT32_MAX;
855
- }
856
-
857
- return icount_round(deadline);
858
- } else {
859
- return replay_get_instructions();
860
- }
861
-}
862
-
863
-static void notify_aio_contexts(void)
864
-{
865
- /* Wake up other AioContexts. */
866
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
867
- qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
868
-}
869
-
870
-static void handle_icount_deadline(void)
871
-{
872
- assert(qemu_in_vcpu_thread());
873
- if (icount_enabled()) {
874
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
875
- QEMU_TIMER_ATTR_ALL);
876
-
877
- if (deadline == 0) {
878
- notify_aio_contexts();
879
- }
880
- }
881
-}
882
-
883
-static void prepare_icount_for_run(CPUState *cpu)
884
-{
885
- if (icount_enabled()) {
886
- int insns_left;
887
-
888
- /*
889
- * These should always be cleared by process_icount_data after
890
- * each vCPU execution. However u16.high can be raised
891
- * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
892
- */
893
- g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
894
- g_assert(cpu->icount_extra == 0);
895
-
896
- cpu->icount_budget = tcg_get_icount_limit();
897
- insns_left = MIN(0xffff, cpu->icount_budget);
898
- cpu_neg(cpu)->icount_decr.u16.low = insns_left;
899
- cpu->icount_extra = cpu->icount_budget - insns_left;
900
-
901
- replay_mutex_lock();
902
-
903
- if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
904
- notify_aio_contexts();
905
- }
906
- }
907
-}
908
-
909
-static void process_icount_data(CPUState *cpu)
910
-{
911
- if (icount_enabled()) {
912
- /* Account for executed instructions */
913
- icount_update(cpu);
914
-
915
- /* Reset the counters */
916
- cpu_neg(cpu)->icount_decr.u16.low = 0;
917
- cpu->icount_extra = 0;
918
- cpu->icount_budget = 0;
919
-
920
- replay_account_executed_instructions();
921
-
922
- replay_mutex_unlock();
923
- }
924
-}
925
-
926
-static int tcg_cpu_exec(CPUState *cpu)
927
-{
928
- int ret;
929
-#ifdef CONFIG_PROFILER
930
- int64_t ti;
931
-#endif
932
-
933
- assert(tcg_enabled());
934
-#ifdef CONFIG_PROFILER
935
- ti = profile_getclock();
936
-#endif
937
- cpu_exec_start(cpu);
938
- ret = cpu_exec(cpu);
939
- cpu_exec_end(cpu);
940
-#ifdef CONFIG_PROFILER
941
- qatomic_set(&tcg_ctx->prof.cpu_exec_time,
942
- tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
943
-#endif
944
- return ret;
945
-}
946
-
947
-/*
948
- * Destroy any remaining vCPUs which have been unplugged and have
949
- * finished running
950
- */
951
-static void deal_with_unplugged_cpus(void)
952
-{
953
- CPUState *cpu;
954
-
955
- CPU_FOREACH(cpu) {
956
- if (cpu->unplug && !cpu_can_run(cpu)) {
957
- qemu_tcg_destroy_vcpu(cpu);
958
- cpu_thread_signal_destroyed(cpu);
959
- break;
960
- }
961
- }
962
-}
963
-
964
-/*
965
- * Single-threaded TCG
966
- *
967
- * In the single-threaded case each vCPU is simulated in turn. If
968
- * there is more than a single vCPU we create a simple timer to kick
969
- * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
970
- * This is done explicitly rather than relying on side-effects
971
- * elsewhere.
972
- */
973
-
974
-static void *tcg_rr_cpu_thread_fn(void *arg)
975
-{
976
- CPUState *cpu = arg;
977
-
978
- assert(tcg_enabled());
979
- rcu_register_thread();
980
- tcg_register_thread();
981
-
982
- qemu_mutex_lock_iothread();
983
- qemu_thread_get_self(cpu->thread);
984
-
985
- cpu->thread_id = qemu_get_thread_id();
986
- cpu->can_do_io = 1;
987
- cpu_thread_signal_created(cpu);
988
- qemu_guest_random_seed_thread_part2(cpu->random_seed);
989
-
990
- /* wait for initial kick-off after machine start */
991
- while (first_cpu->stopped) {
992
- qemu_cond_wait_iothread(first_cpu->halt_cond);
993
-
994
- /* process any pending work */
995
- CPU_FOREACH(cpu) {
996
- current_cpu = cpu;
997
- qemu_wait_io_event_common(cpu);
998
- }
999
- }
1000
-
1001
- start_tcg_kick_timer();
1002
-
1003
- cpu = first_cpu;
1004
-
1005
- /* process any pending work */
1006
- cpu->exit_request = 1;
1007
-
1008
- while (1) {
1009
- qemu_mutex_unlock_iothread();
1010
- replay_mutex_lock();
1011
- qemu_mutex_lock_iothread();
1012
- /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1013
- icount_account_warp_timer();
1014
-
1015
- /*
1016
- * Run the timers here. This is much more efficient than
1017
- * waking up the I/O thread and waiting for completion.
1018
- */
1019
- handle_icount_deadline();
1020
-
1021
- replay_mutex_unlock();
1022
-
1023
- if (!cpu) {
1024
- cpu = first_cpu;
1025
- }
1026
-
1027
- while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
1028
-
1029
- qatomic_mb_set(&tcg_current_rr_cpu, cpu);
1030
- current_cpu = cpu;
1031
-
1032
- qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1033
- (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1034
-
1035
- if (cpu_can_run(cpu)) {
1036
- int r;
1037
-
1038
- qemu_mutex_unlock_iothread();
1039
- prepare_icount_for_run(cpu);
1040
-
1041
- r = tcg_cpu_exec(cpu);
1042
-
1043
- process_icount_data(cpu);
1044
- qemu_mutex_lock_iothread();
1045
-
1046
- if (r == EXCP_DEBUG) {
1047
- cpu_handle_guest_debug(cpu);
1048
- break;
1049
- } else if (r == EXCP_ATOMIC) {
1050
- qemu_mutex_unlock_iothread();
1051
- cpu_exec_step_atomic(cpu);
1052
- qemu_mutex_lock_iothread();
1053
- break;
1054
- }
1055
- } else if (cpu->stop) {
1056
- if (cpu->unplug) {
1057
- cpu = CPU_NEXT(cpu);
1058
- }
1059
- break;
1060
- }
1061
-
1062
- cpu = CPU_NEXT(cpu);
1063
- } /* while (cpu && !cpu->exit_request).. */
1064
-
1065
- /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
1066
- qatomic_set(&tcg_current_rr_cpu, NULL);
1067
-
1068
- if (cpu && cpu->exit_request) {
1069
- qatomic_mb_set(&cpu->exit_request, 0);
1070
- }
1071
-
1072
- if (icount_enabled() && all_cpu_threads_idle()) {
1073
- /*
1074
- * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1075
- * in the main_loop, wake it up in order to start the warp timer.
1076
- */
1077
- qemu_notify_event();
1078
- }
1079
-
1080
- qemu_tcg_rr_wait_io_event();
1081
- deal_with_unplugged_cpus();
1082
- }
1083
-
1084
- rcu_unregister_thread();
1085
- return NULL;
1086
-}
1087
-
1088
-/*
1089
- * Multi-threaded TCG
1090
- *
1091
- * In the multi-threaded case each vCPU has its own thread. The TLS
1092
- * variable current_cpu can be used deep in the code to find the
1093
- * current CPUState for a given thread.
1094
- */
1095
-
1096
-static void *tcg_cpu_thread_fn(void *arg)
1097
-{
1098
- CPUState *cpu = arg;
1099
-
1100
- assert(tcg_enabled());
1101
- g_assert(!icount_enabled());
1102
-
1103
- rcu_register_thread();
1104
- tcg_register_thread();
1105
-
1106
- qemu_mutex_lock_iothread();
1107
- qemu_thread_get_self(cpu->thread);
1108
-
1109
- cpu->thread_id = qemu_get_thread_id();
1110
- cpu->can_do_io = 1;
1111
- current_cpu = cpu;
1112
- cpu_thread_signal_created(cpu);
1113
- qemu_guest_random_seed_thread_part2(cpu->random_seed);
1114
-
1115
- /* process any pending work */
1116
- cpu->exit_request = 1;
1117
-
1118
- do {
1119
- if (cpu_can_run(cpu)) {
1120
- int r;
1121
- qemu_mutex_unlock_iothread();
1122
- r = tcg_cpu_exec(cpu);
1123
- qemu_mutex_lock_iothread();
1124
- switch (r) {
1125
- case EXCP_DEBUG:
1126
- cpu_handle_guest_debug(cpu);
1127
- break;
1128
- case EXCP_HALTED:
1129
- /*
1130
- * during start-up the vCPU is reset and the thread is
1131
- * kicked several times. If we don't ensure we go back
1132
- * to sleep in the halted state we won't cleanly
1133
- * start-up when the vCPU is enabled.
1134
- *
1135
- * cpu->halted should ensure we sleep in wait_io_event
1136
- */
1137
- g_assert(cpu->halted);
1138
- break;
1139
- case EXCP_ATOMIC:
1140
- qemu_mutex_unlock_iothread();
1141
- cpu_exec_step_atomic(cpu);
1142
- qemu_mutex_lock_iothread();
1143
- default:
1144
- /* Ignore everything else? */
1145
- break;
1146
- }
1147
- }
1148
-
1149
- qatomic_mb_set(&cpu->exit_request, 0);
1150
- qemu_wait_io_event(cpu);
1151
- } while (!cpu->unplug || cpu_can_run(cpu));
1152
-
1153
- qemu_tcg_destroy_vcpu(cpu);
1154
- cpu_thread_signal_destroyed(cpu);
1155
- qemu_mutex_unlock_iothread();
1156
- rcu_unregister_thread();
1157
- return NULL;
1158
-}
1159
-
1160
-static void tcg_start_vcpu_thread(CPUState *cpu)
1161
+void tcg_start_vcpu_thread(CPUState *cpu)
1162
{
1163
char thread_name[VCPU_THREAD_NAME_SIZE];
1164
static QemuCond *single_tcg_halt_cond;
1165
@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
49
}
1166
}
50
}
1167
}
51
1168
52
-static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
1169
-static int64_t tcg_get_virtual_clock(void)
53
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
1170
+void qemu_tcg_destroy_vcpu(CPUState *cpu)
54
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
55
{
1171
{
56
TCGv_vec t1, t2;
1172
- if (icount_enabled()) {
57
@@ -XXX,XX +XXX,XX @@ static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
1173
- return icount_get();
58
t1 = tcg_temp_new_vec(type);
1174
- }
59
t2 = tcg_temp_new_vec(type);
1175
- return cpu_get_clock();
60
1176
+ cpu_thread_signal_destroyed(cpu);
61
- /* Unpack to W, shift, and repack. Tricky bits:
1177
}
62
- (1) Use punpck*bw x,x to produce DDCCBBAA,
1178
63
- i.e. duplicate in other half of the 16-bit lane.
1179
-static int64_t tcg_get_elapsed_ticks(void)
64
- (2) For right-shift, add 8 so that the high half of
1180
+int tcg_cpu_exec(CPUState *cpu)
65
- the lane becomes zero. For left-shift, we must
1181
{
66
- shift up and down again.
1182
- if (icount_enabled()) {
67
- (3) Step 2 leaves high half zero such that PACKUSWB
1183
- return icount_get();
68
- (pack with unsigned saturation) does not modify
1184
- }
69
- the quantity. */
1185
- return cpu_get_ticks();
70
+ /*
1186
+ int ret;
71
+ * Unpack to W, shift, and repack. Tricky bits:
1187
+#ifdef CONFIG_PROFILER
72
+ * (1) Use punpck*bw x,x to produce DDCCBBAA,
1188
+ int64_t ti;
73
+ * i.e. duplicate in other half of the 16-bit lane.
1189
+#endif
74
+ * (2) For right-shift, add 8 so that the high half of the lane
1190
+ assert(tcg_enabled());
75
+ * becomes zero. For left-shift, and left-rotate, we must
1191
+#ifdef CONFIG_PROFILER
76
+ * shift up and down again.
1192
+ ti = profile_getclock();
77
+ * (3) Step 2 leaves high half zero such that PACKUSWB
1193
+#endif
78
+ * (pack with unsigned saturation) does not modify
1194
+ cpu_exec_start(cpu);
79
+ * the quantity.
1195
+ ret = cpu_exec(cpu);
80
+ */
1196
+ cpu_exec_end(cpu);
81
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
1197
+#ifdef CONFIG_PROFILER
82
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
1198
+ qatomic_set(&tcg_ctx->prof.cpu_exec_time,
83
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
1199
+ tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
84
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
1200
+#endif
85
1201
+ return ret;
86
- if (shr) {
1202
}
87
- tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
1203
88
- tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
1204
/* mask must never be zero, except for A20 change call */
89
+ if (opc != INDEX_op_rotli_vec) {
1205
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
90
+ imm += 8;
1206
+void tcg_handle_interrupt(CPUState *cpu, int mask)
91
+ }
1207
{
92
+ if (opc == INDEX_op_shri_vec) {
1208
- int old_mask;
93
+ tcg_gen_shri_vec(MO_16, t1, t1, imm);
1209
g_assert(qemu_mutex_iothread_locked());
94
+ tcg_gen_shri_vec(MO_16, t2, t2, imm);
1210
1211
- old_mask = cpu->interrupt_request;
1212
cpu->interrupt_request |= mask;
1213
1214
/*
1215
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
1216
qemu_cpu_kick(cpu);
95
} else {
1217
} else {
96
- tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
1218
qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
97
- tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
1219
- if (icount_enabled() &&
98
+ tcg_gen_shli_vec(MO_16, t1, t1, imm);
1220
- !cpu->can_do_io
99
+ tcg_gen_shli_vec(MO_16, t2, t2, imm);
1221
- && (mask & ~old_mask) != 0) {
100
tcg_gen_shri_vec(MO_16, t1, t1, 8);
1222
- cpu_abort(cpu, "Raised interrupt while not in I/O function");
101
tcg_gen_shri_vec(MO_16, t2, t2, 8);
1223
- }
102
}
103
@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
104
}
1224
}
105
}
1225
}
106
1226
-
107
+static void expand_vec_rotli(TCGType type, unsigned vece,
1227
-const CpusAccel tcg_cpus = {
108
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
1228
- .create_vcpu_thread = tcg_start_vcpu_thread,
109
+{
1229
- .kick_vcpu_thread = tcg_kick_vcpu_thread,
110
+ TCGv_vec t;
1230
-
111
+
1231
- .handle_interrupt = tcg_handle_interrupt,
112
+ if (vece == MO_8) {
1232
-
113
+ expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
1233
- .get_virtual_clock = tcg_get_virtual_clock,
114
+ return;
1234
- .get_elapsed_ticks = tcg_get_elapsed_ticks,
115
+ }
1235
-};
116
+
1236
diff --git a/softmmu/icount.c b/softmmu/icount.c
117
+ t = tcg_temp_new_vec(type);
1237
index XXXXXXX..XXXXXXX 100644
118
+ tcg_gen_shli_vec(vece, t, v1, imm);
1238
--- a/softmmu/icount.c
119
+ tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
1239
+++ b/softmmu/icount.c
120
+ tcg_gen_or_vec(vece, v0, v0, t);
1240
@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
121
+ tcg_temp_free_vec(t);
1241
122
+}
1242
void icount_account_warp_timer(void)
123
+
124
+static void expand_vec_rotls(TCGType type, unsigned vece,
125
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
126
+{
127
+ TCGv_i32 rsh;
128
+ TCGv_vec t;
129
+
130
+ tcg_debug_assert(vece != MO_8);
131
+
132
+ t = tcg_temp_new_vec(type);
133
+ rsh = tcg_temp_new_i32();
134
+
135
+ tcg_gen_neg_i32(rsh, lsh);
136
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
137
+ tcg_gen_shls_vec(vece, t, v1, lsh);
138
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
139
+ tcg_gen_or_vec(vece, v0, v0, t);
140
+ tcg_temp_free_vec(t);
141
+ tcg_temp_free_i32(rsh);
142
+}
143
+
144
+static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
145
+ TCGv_vec v1, TCGv_vec sh, bool right)
146
+{
147
+ TCGv_vec t = tcg_temp_new_vec(type);
148
+
149
+ tcg_gen_dupi_vec(vece, t, 8 << vece);
150
+ tcg_gen_sub_vec(vece, t, t, sh);
151
+ if (right) {
152
+ tcg_gen_shlv_vec(vece, t, v1, t);
153
+ tcg_gen_shrv_vec(vece, v0, v1, sh);
154
+ } else {
155
+ tcg_gen_shrv_vec(vece, t, v1, t);
156
+ tcg_gen_shlv_vec(vece, v0, v1, sh);
157
+ }
158
+ tcg_gen_or_vec(vece, v0, v0, t);
159
+ tcg_temp_free_vec(t);
160
+}
161
+
162
static void expand_vec_mul(TCGType type, unsigned vece,
163
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
164
{
1243
{
165
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
1244
- if (!icount_enabled() || !icount_sleep) {
166
switch (opc) {
1245
+ if (!icount_sleep) {
167
case INDEX_op_shli_vec:
1246
return;
168
case INDEX_op_shri_vec:
1247
}
169
- expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
1248
170
+ expand_vec_shi(type, vece, opc, v0, v1, a2);
1249
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
171
break;
1250
index XXXXXXX..XXXXXXX 100644
172
1251
--- a/accel/tcg/meson.build
173
case INDEX_op_sari_vec:
1252
+++ b/accel/tcg/meson.build
174
expand_vec_sari(type, vece, v0, v1, a2);
1253
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
175
break;
1254
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
176
1255
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
177
+ case INDEX_op_rotli_vec:
1256
178
+ expand_vec_rotli(type, vece, v0, v1, a2);
1257
-specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
179
+ break;
1258
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
180
+
1259
+ 'tcg-all.c',
181
+ case INDEX_op_rotls_vec:
1260
+ 'cputlb.c',
182
+ expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
1261
+ 'tcg-cpus.c',
183
+ break;
1262
+ 'tcg-cpus-mttcg.c',
184
+
1263
+ 'tcg-cpus-icount.c',
185
+ case INDEX_op_rotlv_vec:
1264
+ 'tcg-cpus-rr.c'
186
+ v2 = temp_tcgv_vec(arg_temp(a2));
1265
+))
187
+ expand_vec_rotv(type, vece, v0, v1, v2, false);
188
+ break;
189
+ case INDEX_op_rotrv_vec:
190
+ v2 = temp_tcgv_vec(arg_temp(a2));
191
+ expand_vec_rotv(type, vece, v0, v1, v2, true);
192
+ break;
193
+
194
case INDEX_op_mul_vec:
195
v2 = temp_tcgv_vec(arg_temp(a2));
196
expand_vec_mul(type, vece, v0, v1, v2);
197
--
1266
--
198
2.25.1
1267
2.25.1
199
1268
200
1269
diff view generated by jsdifflib
1
No host backend support yet, but the interfaces for rotls
1
From: Claudio Fontana <cfontana@suse.de>
2
are in place. Only implement left-rotate for now, as the
2
3
only known use of vector rotate by scalar is s390x, so any
3
after the initial split into 3 tcg variants, we proceed to also
4
right-rotate would be unused and untestable.
4
split tcg_start_vcpu_thread.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
We actually split it in 2 this time, since the icount variant
7
just uses the round robin function.
8
9
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Claudio Fontana <cfontana@suse.de>
11
Message-Id: <20201015143217.29337-3-cfontana@suse.de>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
13
---
9
include/tcg/tcg-op-gvec.h | 2 ++
14
accel/tcg/tcg-cpus-mttcg.h | 21 --------------
10
include/tcg/tcg-op.h | 1 +
15
accel/tcg/tcg-cpus-rr.h | 3 +-
11
include/tcg/tcg-opc.h | 1 +
16
accel/tcg/tcg-cpus.h | 1 -
12
include/tcg/tcg.h | 1 +
17
accel/tcg/tcg-all.c | 5 ++++
13
tcg/aarch64/tcg-target.h | 1 +
18
accel/tcg/tcg-cpus-icount.c | 2 +-
14
tcg/i386/tcg-target.h | 1 +
19
accel/tcg/tcg-cpus-mttcg.c | 29 +++++++++++++++++--
15
tcg/ppc/tcg-target.h | 1 +
20
accel/tcg/tcg-cpus-rr.c | 39 +++++++++++++++++++++++--
16
tcg/tcg-op-gvec.c | 22 ++++++++++++++++++++++
21
accel/tcg/tcg-cpus.c | 58 -------------------------------------
17
tcg/tcg-op-vec.c | 5 +++++
22
8 files changed, 71 insertions(+), 87 deletions(-)
18
tcg/tcg.c | 2 ++
23
delete mode 100644 accel/tcg/tcg-cpus-mttcg.h
19
10 files changed, 37 insertions(+)
24
20
25
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
21
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
26
deleted file mode 100644
22
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX
23
--- a/include/tcg/tcg-op-gvec.h
28
--- a/accel/tcg/tcg-cpus-mttcg.h
24
+++ b/include/tcg/tcg-op-gvec.h
29
+++ /dev/null
25
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
30
@@ -XXX,XX +XXX,XX @@
26
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
31
-/*
27
void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
32
- * QEMU TCG Multi Threaded vCPUs implementation
28
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
33
- *
29
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
34
- * Copyright 2020 SUSE LLC
30
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
35
- *
36
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
37
- * See the COPYING file in the top-level directory.
38
- */
39
-
40
-#ifndef TCG_CPUS_MTTCG_H
41
-#define TCG_CPUS_MTTCG_H
42
-
43
-/*
44
- * In the multi-threaded case each vCPU has its own thread. The TLS
45
- * variable current_cpu can be used deep in the code to find the
46
- * current CPUState for a given thread.
47
- */
48
-
49
-void *tcg_cpu_thread_fn(void *arg);
50
-
51
-#endif /* TCG_CPUS_MTTCG_H */
52
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/accel/tcg/tcg-cpus-rr.h
55
+++ b/accel/tcg/tcg-cpus-rr.h
56
@@ -XXX,XX +XXX,XX @@
57
/* Kick all RR vCPUs. */
58
void qemu_cpu_kick_rr_cpus(CPUState *unused);
59
60
-void *tcg_rr_cpu_thread_fn(void *arg);
61
+/* start the round robin vcpu thread */
62
+void rr_start_vcpu_thread(CPUState *cpu);
63
64
#endif /* TCG_CPUS_RR_H */
65
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/accel/tcg/tcg-cpus.h
68
+++ b/accel/tcg/tcg-cpus.h
69
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
70
extern const CpusAccel tcg_cpus_icount;
71
extern const CpusAccel tcg_cpus_rr;
72
73
-void tcg_start_vcpu_thread(CPUState *cpu);
74
void qemu_tcg_destroy_vcpu(CPUState *cpu);
75
int tcg_cpu_exec(CPUState *cpu);
76
void tcg_handle_interrupt(CPUState *cpu, int mask);
77
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/accel/tcg/tcg-all.c
80
+++ b/accel/tcg/tcg-all.c
81
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
82
tcg_exec_init(s->tb_size * 1024 * 1024);
83
mttcg_enabled = s->mttcg_enabled;
84
85
+ /*
86
+ * Initialize TCG regions
87
+ */
88
+ tcg_region_init();
89
+
90
if (mttcg_enabled) {
91
cpus_register_accel(&tcg_cpus_mttcg);
92
} else if (icount_enabled()) {
93
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/tcg-cpus-icount.c
96
+++ b/accel/tcg/tcg-cpus-icount.c
97
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
98
}
99
100
const CpusAccel tcg_cpus_icount = {
101
- .create_vcpu_thread = tcg_start_vcpu_thread,
102
+ .create_vcpu_thread = rr_start_vcpu_thread,
103
.kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
104
105
.handle_interrupt = icount_handle_interrupt,
106
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/accel/tcg/tcg-cpus-mttcg.c
109
+++ b/accel/tcg/tcg-cpus-mttcg.c
110
@@ -XXX,XX +XXX,XX @@
111
#include "hw/boards.h"
112
113
#include "tcg-cpus.h"
114
-#include "tcg-cpus-mttcg.h"
31
115
32
/*
116
/*
33
* Perform vector shift by vector element, modulo the element size.
117
* In the multi-threaded case each vCPU has its own thread. The TLS
34
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
118
@@ -XXX,XX +XXX,XX @@
35
index XXXXXXX..XXXXXXX 100644
119
* current CPUState for a given thread.
36
--- a/include/tcg/tcg-op.h
120
*/
37
+++ b/include/tcg/tcg-op.h
121
38
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
122
-void *tcg_cpu_thread_fn(void *arg)
39
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
123
+static void *tcg_cpu_thread_fn(void *arg)
40
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
124
{
41
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
125
CPUState *cpu = arg;
42
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
126
43
127
@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
44
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
128
cpu_exit(cpu);
45
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
46
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/tcg/tcg-opc.h
49
+++ b/include/tcg/tcg-opc.h
50
@@ -XXX,XX +XXX,XX @@ DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
51
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
52
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
53
DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
54
+DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
55
56
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
57
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
58
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
59
index XXXXXXX..XXXXXXX 100644
60
--- a/include/tcg/tcg.h
61
+++ b/include/tcg/tcg.h
62
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
63
#define TCG_TARGET_HAS_andc_vec 0
64
#define TCG_TARGET_HAS_orc_vec 0
65
#define TCG_TARGET_HAS_roti_vec 0
66
+#define TCG_TARGET_HAS_rots_vec 0
67
#define TCG_TARGET_HAS_rotv_vec 0
68
#define TCG_TARGET_HAS_shi_vec 0
69
#define TCG_TARGET_HAS_shs_vec 0
70
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tcg/aarch64/tcg-target.h
73
+++ b/tcg/aarch64/tcg-target.h
74
@@ -XXX,XX +XXX,XX @@ typedef enum {
75
#define TCG_TARGET_HAS_neg_vec 1
76
#define TCG_TARGET_HAS_abs_vec 1
77
#define TCG_TARGET_HAS_roti_vec 0
78
+#define TCG_TARGET_HAS_rots_vec 0
79
#define TCG_TARGET_HAS_rotv_vec 0
80
#define TCG_TARGET_HAS_shi_vec 1
81
#define TCG_TARGET_HAS_shs_vec 0
82
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
83
index XXXXXXX..XXXXXXX 100644
84
--- a/tcg/i386/tcg-target.h
85
+++ b/tcg/i386/tcg-target.h
86
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
87
#define TCG_TARGET_HAS_neg_vec 0
88
#define TCG_TARGET_HAS_abs_vec 1
89
#define TCG_TARGET_HAS_roti_vec 0
90
+#define TCG_TARGET_HAS_rots_vec 0
91
#define TCG_TARGET_HAS_rotv_vec 0
92
#define TCG_TARGET_HAS_shi_vec 1
93
#define TCG_TARGET_HAS_shs_vec 1
94
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
95
index XXXXXXX..XXXXXXX 100644
96
--- a/tcg/ppc/tcg-target.h
97
+++ b/tcg/ppc/tcg-target.h
98
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
99
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
100
#define TCG_TARGET_HAS_abs_vec 0
101
#define TCG_TARGET_HAS_roti_vec 0
102
+#define TCG_TARGET_HAS_rots_vec 0
103
#define TCG_TARGET_HAS_rotv_vec 0
104
#define TCG_TARGET_HAS_shi_vec 0
105
#define TCG_TARGET_HAS_shs_vec 0
106
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/tcg-op-gvec.c
109
+++ b/tcg/tcg-op-gvec.c
110
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
111
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
112
}
129
}
113
130
114
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
131
+static void mttcg_start_vcpu_thread(CPUState *cpu)
115
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
116
+{
132
+{
117
+ static const GVecGen2sh g = {
133
+ char thread_name[VCPU_THREAD_NAME_SIZE];
118
+ .fni4 = tcg_gen_rotl_i32,
134
+
119
+ .fni8 = tcg_gen_rotl_i64,
135
+ g_assert(tcg_enabled());
120
+ .fniv_s = tcg_gen_rotls_vec,
136
+
121
+ .fniv_v = tcg_gen_rotlv_vec,
137
+ parallel_cpus = (current_machine->smp.max_cpus > 1);
122
+ .fno = {
138
+
123
+ gen_helper_gvec_rotl8i,
139
+ cpu->thread = g_malloc0(sizeof(QemuThread));
124
+ gen_helper_gvec_rotl16i,
140
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
125
+ gen_helper_gvec_rotl32i,
141
+ qemu_cond_init(cpu->halt_cond);
126
+ gen_helper_gvec_rotl64i,
142
+
127
+ },
143
+ /* create a thread per vCPU with TCG (MTTCG) */
128
+ .s_list = { INDEX_op_rotls_vec, 0 },
144
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
129
+ .v_list = { INDEX_op_rotlv_vec, 0 },
145
+ cpu->cpu_index);
130
+ };
146
+
131
+
147
+ qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
132
+ tcg_debug_assert(vece <= MO_64);
148
+ cpu, QEMU_THREAD_JOINABLE);
133
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
149
+
150
+#ifdef _WIN32
151
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
152
+#endif
134
+}
153
+}
135
+
154
+
136
/*
155
const CpusAccel tcg_cpus_mttcg = {
137
* Expand D = A << (B % element bits)
156
- .create_vcpu_thread = tcg_start_vcpu_thread,
138
*
157
+ .create_vcpu_thread = mttcg_start_vcpu_thread,
139
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
158
.kick_vcpu_thread = mttcg_kick_vcpu_thread,
140
index XXXXXXX..XXXXXXX 100644
159
141
--- a/tcg/tcg-op-vec.c
160
.handle_interrupt = tcg_handle_interrupt,
142
+++ b/tcg/tcg-op-vec.c
161
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
143
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
162
index XXXXXXX..XXXXXXX 100644
144
do_shifts(vece, r, a, b, INDEX_op_sars_vec);
163
--- a/accel/tcg/tcg-cpus-rr.c
164
+++ b/accel/tcg/tcg-cpus-rr.c
165
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
166
* elsewhere.
167
*/
168
169
-void *tcg_rr_cpu_thread_fn(void *arg)
170
+static void *tcg_rr_cpu_thread_fn(void *arg)
171
{
172
CPUState *cpu = arg;
173
174
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
175
return NULL;
145
}
176
}
146
177
147
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
178
+void rr_start_vcpu_thread(CPUState *cpu)
148
+{
179
+{
149
+ do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
180
+ char thread_name[VCPU_THREAD_NAME_SIZE];
181
+ static QemuCond *single_tcg_halt_cond;
182
+ static QemuThread *single_tcg_cpu_thread;
183
+
184
+ g_assert(tcg_enabled());
185
+ parallel_cpus = false;
186
+
187
+ if (!single_tcg_cpu_thread) {
188
+ cpu->thread = g_malloc0(sizeof(QemuThread));
189
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
190
+ qemu_cond_init(cpu->halt_cond);
191
+
192
+ /* share a single thread for all cpus with TCG */
193
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
194
+ qemu_thread_create(cpu->thread, thread_name,
195
+ tcg_rr_cpu_thread_fn,
196
+ cpu, QEMU_THREAD_JOINABLE);
197
+
198
+ single_tcg_halt_cond = cpu->halt_cond;
199
+ single_tcg_cpu_thread = cpu->thread;
200
+#ifdef _WIN32
201
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
202
+#endif
203
+ } else {
204
+ /* we share the thread */
205
+ cpu->thread = single_tcg_cpu_thread;
206
+ cpu->halt_cond = single_tcg_halt_cond;
207
+ cpu->thread_id = first_cpu->thread_id;
208
+ cpu->can_do_io = 1;
209
+ cpu->created = true;
210
+ }
150
+}
211
+}
151
+
212
+
152
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
213
const CpusAccel tcg_cpus_rr = {
153
TCGv_vec b, TCGv_vec c)
214
- .create_vcpu_thread = tcg_start_vcpu_thread,
215
+ .create_vcpu_thread = rr_start_vcpu_thread,
216
.kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
217
218
.handle_interrupt = tcg_handle_interrupt,
219
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/tcg-cpus.c
222
+++ b/accel/tcg/tcg-cpus.c
223
@@ -XXX,XX +XXX,XX @@
224
#include "hw/boards.h"
225
226
#include "tcg-cpus.h"
227
-#include "tcg-cpus-mttcg.h"
228
-#include "tcg-cpus-rr.h"
229
230
/* common functionality among all TCG variants */
231
232
-void tcg_start_vcpu_thread(CPUState *cpu)
233
-{
234
- char thread_name[VCPU_THREAD_NAME_SIZE];
235
- static QemuCond *single_tcg_halt_cond;
236
- static QemuThread *single_tcg_cpu_thread;
237
- static int tcg_region_inited;
238
-
239
- assert(tcg_enabled());
240
- /*
241
- * Initialize TCG regions--once. Now is a good time, because:
242
- * (1) TCG's init context, prologue and target globals have been set up.
243
- * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
244
- * -accel flag is processed, so the check doesn't work then).
245
- */
246
- if (!tcg_region_inited) {
247
- tcg_region_inited = 1;
248
- tcg_region_init();
249
- parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
250
- }
251
-
252
- if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
253
- cpu->thread = g_malloc0(sizeof(QemuThread));
254
- cpu->halt_cond = g_malloc0(sizeof(QemuCond));
255
- qemu_cond_init(cpu->halt_cond);
256
-
257
- if (qemu_tcg_mttcg_enabled()) {
258
- /* create a thread per vCPU with TCG (MTTCG) */
259
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
260
- cpu->cpu_index);
261
-
262
- qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
263
- cpu, QEMU_THREAD_JOINABLE);
264
-
265
- } else {
266
- /* share a single thread for all cpus with TCG */
267
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
268
- qemu_thread_create(cpu->thread, thread_name,
269
- tcg_rr_cpu_thread_fn,
270
- cpu, QEMU_THREAD_JOINABLE);
271
-
272
- single_tcg_halt_cond = cpu->halt_cond;
273
- single_tcg_cpu_thread = cpu->thread;
274
- }
275
-#ifdef _WIN32
276
- cpu->hThread = qemu_thread_get_handle(cpu->thread);
277
-#endif
278
- } else {
279
- /* For non-MTTCG cases we share the thread */
280
- cpu->thread = single_tcg_cpu_thread;
281
- cpu->halt_cond = single_tcg_halt_cond;
282
- cpu->thread_id = first_cpu->thread_id;
283
- cpu->can_do_io = 1;
284
- cpu->created = true;
285
- }
286
-}
287
-
288
void qemu_tcg_destroy_vcpu(CPUState *cpu)
154
{
289
{
155
diff --git a/tcg/tcg.c b/tcg/tcg.c
290
cpu_thread_signal_destroyed(cpu);
156
index XXXXXXX..XXXXXXX 100644
157
--- a/tcg/tcg.c
158
+++ b/tcg/tcg.c
159
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
160
return have_vec && TCG_TARGET_HAS_shv_vec;
161
case INDEX_op_rotli_vec:
162
return have_vec && TCG_TARGET_HAS_roti_vec;
163
+ case INDEX_op_rotls_vec:
164
+ return have_vec && TCG_TARGET_HAS_rots_vec;
165
case INDEX_op_rotlv_vec:
166
case INDEX_op_rotrv_vec:
167
return have_vec && TCG_TARGET_HAS_rotv_vec;
168
--
291
--
169
2.25.1
292
2.25.1
170
293
171
294
diff view generated by jsdifflib
Deleted patch
1
For immediate rotate , we can implement this in two instructions,
2
using SLI. For variable rotate, the oddness of aarch64 right-shift-
3
as-negative-left-shift means a backend-specific expansion works best.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.opc.h | 1 +
8
tcg/aarch64/tcg-target.inc.c | 53 ++++++++++++++++++++++++++++++++++--
9
2 files changed, 52 insertions(+), 2 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.opc.h
14
+++ b/tcg/aarch64/tcg-target.opc.h
15
@@ -XXX,XX +XXX,XX @@
16
*/
17
18
DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
19
+DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
20
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/aarch64/tcg-target.inc.c
23
+++ b/tcg/aarch64/tcg-target.inc.c
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
I3614_SSHR = 0x0f000400,
26
I3614_SSRA = 0x0f001400,
27
I3614_SHL = 0x0f005400,
28
+ I3614_SLI = 0x2f005400,
29
I3614_USHR = 0x2f000400,
30
I3614_USRA = 0x2f001400,
31
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
33
case INDEX_op_sari_vec:
34
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
35
break;
36
+ case INDEX_op_aa64_sli_vec:
37
+ tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
38
+ break;
39
case INDEX_op_shlv_vec:
40
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
41
break;
42
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
43
case INDEX_op_shlv_vec:
44
case INDEX_op_bitsel_vec:
45
return 1;
46
+ case INDEX_op_rotli_vec:
47
case INDEX_op_shrv_vec:
48
case INDEX_op_sarv_vec:
49
+ case INDEX_op_rotlv_vec:
50
+ case INDEX_op_rotrv_vec:
51
return -1;
52
case INDEX_op_mul_vec:
53
case INDEX_op_smax_vec:
54
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
55
TCGArg a0, ...)
56
{
57
va_list va;
58
- TCGv_vec v0, v1, v2, t1;
59
+ TCGv_vec v0, v1, v2, t1, t2;
60
+ TCGArg a2;
61
62
va_start(va, a0);
63
v0 = temp_tcgv_vec(arg_temp(a0));
64
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
65
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
66
+ a2 = va_arg(va, TCGArg);
67
+ v2 = temp_tcgv_vec(arg_temp(a2));
68
69
switch (opc) {
70
+ case INDEX_op_rotli_vec:
71
+ t1 = tcg_temp_new_vec(type);
72
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
73
+ vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
74
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
75
+ tcg_temp_free_vec(t1);
76
+ break;
77
+
78
case INDEX_op_shrv_vec:
79
case INDEX_op_sarv_vec:
80
/* Right shifts are negative left shifts for AArch64. */
81
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
82
tcg_temp_free_vec(t1);
83
break;
84
85
+ case INDEX_op_rotlv_vec:
86
+ t1 = tcg_temp_new_vec(type);
87
+ tcg_gen_dupi_vec(vece, t1, 8 << vece);
88
+ tcg_gen_sub_vec(vece, t1, v2, t1);
89
+ /* Right shifts are negative left shifts for AArch64. */
90
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
91
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
92
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
93
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
94
+ tcg_gen_or_vec(vece, v0, v0, t1);
95
+ tcg_temp_free_vec(t1);
96
+ break;
97
+
98
+ case INDEX_op_rotrv_vec:
99
+ t1 = tcg_temp_new_vec(type);
100
+ t2 = tcg_temp_new_vec(type);
101
+ tcg_gen_neg_vec(vece, t1, v2);
102
+ tcg_gen_dupi_vec(vece, t2, 8 << vece);
103
+ tcg_gen_add_vec(vece, t2, t1, t2);
104
+ /* Right shifts are negative left shifts for AArch64. */
105
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
106
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
107
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
108
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
109
+ tcg_gen_or_vec(vece, v0, t1, t2);
110
+ tcg_temp_free_vec(t1);
111
+ tcg_temp_free_vec(t2);
112
+ break;
113
+
114
default:
115
g_assert_not_reached();
116
}
117
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
118
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
119
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
120
static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
121
+ static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
122
static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
123
static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
124
static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
125
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
126
return &w_w_wZ;
127
case INDEX_op_bitsel_vec:
128
return &w_w_w_w;
129
+ case INDEX_op_aa64_sli_vec:
130
+ return &w_0_w;
131
132
default:
133
return NULL;
134
--
135
2.25.1
136
137
diff view generated by jsdifflib
Deleted patch
1
We already had support for rotlv, using a target-specific opcode;
2
convert to use the generic opcode. Handle rotrv via simple negation.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.h | 2 +-
7
tcg/ppc/tcg-target.opc.h | 1 -
8
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++----
9
3 files changed, 20 insertions(+), 6 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
16
#define TCG_TARGET_HAS_abs_vec 0
17
#define TCG_TARGET_HAS_roti_vec 0
18
#define TCG_TARGET_HAS_rots_vec 0
19
-#define TCG_TARGET_HAS_rotv_vec 0
20
+#define TCG_TARGET_HAS_rotv_vec 1
21
#define TCG_TARGET_HAS_shi_vec 0
22
#define TCG_TARGET_HAS_shs_vec 0
23
#define TCG_TARGET_HAS_shv_vec 1
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.opc.h
27
+++ b/tcg/ppc/tcg-target.opc.h
28
@@ -XXX,XX +XXX,XX @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
29
DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
30
DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
31
DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
32
-DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
33
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target.inc.c
36
+++ b/tcg/ppc/tcg-target.inc.c
37
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
38
case INDEX_op_shlv_vec:
39
case INDEX_op_shrv_vec:
40
case INDEX_op_sarv_vec:
41
+ case INDEX_op_rotlv_vec:
42
return vece <= MO_32 || have_isa_2_07;
43
case INDEX_op_ssadd_vec:
44
case INDEX_op_sssub_vec:
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
46
case INDEX_op_shli_vec:
47
case INDEX_op_shri_vec:
48
case INDEX_op_sari_vec:
49
+ case INDEX_op_rotli_vec:
50
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
51
case INDEX_op_neg_vec:
52
return vece >= MO_32 && have_isa_3_00;
53
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
54
return 0;
55
case INDEX_op_bitsel_vec:
56
return have_vsx;
57
+ case INDEX_op_rotrv_vec:
58
+ return -1;
59
default:
60
return 0;
61
}
62
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
63
case INDEX_op_ppc_pkum_vec:
64
insn = pkum_op[vece];
65
break;
66
- case INDEX_op_ppc_rotl_vec:
67
+ case INDEX_op_rotlv_vec:
68
insn = rotl_op[vece];
69
break;
70
case INDEX_op_ppc_msum_vec:
71
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
72
t3 = tcg_temp_new_vec(type);
73
t4 = tcg_temp_new_vec(type);
74
tcg_gen_dupi_vec(MO_8, t4, -16);
75
- vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
76
+ vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
77
tcgv_vec_arg(v2), tcgv_vec_arg(t4));
78
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
79
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
80
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
81
TCGArg a0, ...)
82
{
83
va_list va;
84
- TCGv_vec v0, v1, v2;
85
+ TCGv_vec v0, v1, v2, t0;
86
TCGArg a2;
87
88
va_start(va, a0);
89
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
90
case INDEX_op_sari_vec:
91
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
92
break;
93
+ case INDEX_op_rotli_vec:
94
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
95
+ break;
96
case INDEX_op_cmp_vec:
97
v2 = temp_tcgv_vec(arg_temp(a2));
98
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
99
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
100
v2 = temp_tcgv_vec(arg_temp(a2));
101
expand_vec_mul(type, vece, v0, v1, v2);
102
break;
103
+ case INDEX_op_rotlv_vec:
104
+ v2 = temp_tcgv_vec(arg_temp(a2));
105
+ t0 = tcg_temp_new_vec(type);
106
+ tcg_gen_neg_vec(vece, t0, v2);
107
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
108
+ tcg_temp_free_vec(t0);
109
+ break;
110
default:
111
g_assert_not_reached();
112
}
113
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
114
case INDEX_op_shlv_vec:
115
case INDEX_op_shrv_vec:
116
case INDEX_op_sarv_vec:
117
+ case INDEX_op_rotlv_vec:
118
+ case INDEX_op_rotrv_vec:
119
case INDEX_op_ppc_mrgh_vec:
120
case INDEX_op_ppc_mrgl_vec:
121
case INDEX_op_ppc_muleu_vec:
122
case INDEX_op_ppc_mulou_vec:
123
case INDEX_op_ppc_pkum_vec:
124
- case INDEX_op_ppc_rotl_vec:
125
case INDEX_op_dup2_vec:
126
return &v_v_v;
127
case INDEX_op_not_vec:
128
--
129
2.25.1
130
131
diff view generated by jsdifflib
Deleted patch
1
Acked-by: David Gibson <david@gibson.dropbear.id.au>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/ppc/helper.h | 4 ----
5
target/ppc/int_helper.c | 17 -----------------
6
target/ppc/translate/vmx-impl.inc.c | 8 ++++----
7
3 files changed, 4 insertions(+), 25 deletions(-)
8
1
9
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/ppc/helper.h
12
+++ b/target/ppc/helper.h
13
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
14
DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
15
DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
16
DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
17
-DEF_HELPER_3(vrlb, void, avr, avr, avr)
18
-DEF_HELPER_3(vrlh, void, avr, avr, avr)
19
-DEF_HELPER_3(vrlw, void, avr, avr, avr)
20
-DEF_HELPER_3(vrld, void, avr, avr, avr)
21
DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
22
DEF_HELPER_3(vextractub, void, avr, avr, i32)
23
DEF_HELPER_3(vextractuh, void, avr, avr, i32)
24
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/ppc/int_helper.c
27
+++ b/target/ppc/int_helper.c
28
@@ -XXX,XX +XXX,XX @@ VRFI(p, float_round_up)
29
VRFI(z, float_round_to_zero)
30
#undef VRFI
31
32
-#define VROTATE(suffix, element, mask) \
33
- void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
34
- { \
35
- int i; \
36
- \
37
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
38
- unsigned int shift = b->element[i] & mask; \
39
- r->element[i] = (a->element[i] << shift) | \
40
- (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
41
- } \
42
- }
43
-VROTATE(b, u8, 0x7)
44
-VROTATE(h, u16, 0xF)
45
-VROTATE(w, u32, 0x1F)
46
-VROTATE(d, u64, 0x3F)
47
-#undef VROTATE
48
-
49
void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
50
{
51
int i;
52
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/ppc/translate/vmx-impl.inc.c
55
+++ b/target/ppc/translate/vmx-impl.inc.c
56
@@ -XXX,XX +XXX,XX @@ GEN_VXFORM3(vsubeuqm, 31, 0);
57
GEN_VXFORM3(vsubecuq, 31, 0);
58
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
59
vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
60
-GEN_VXFORM(vrlb, 2, 0);
61
-GEN_VXFORM(vrlh, 2, 1);
62
-GEN_VXFORM(vrlw, 2, 2);
63
+GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0);
64
+GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1);
65
+GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2);
66
GEN_VXFORM(vrlwmi, 2, 2);
67
GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
68
vrlwmi, PPC_NONE, PPC2_ISA300)
69
-GEN_VXFORM(vrld, 2, 3);
70
+GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3);
71
GEN_VXFORM(vrldmi, 2, 3);
72
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
73
vrldmi, PPC_NONE, PPC2_ISA300)
74
--
75
2.25.1
76
77
diff view generated by jsdifflib
Deleted patch
1
Merge VERLL and VERLLV into op_vesv and op_ves, alongside
2
all of the other vector shift operations.
3
1
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/s390x/helper.h | 4 --
8
target/s390x/translate_vx.inc.c | 66 +++++----------------------------
9
target/s390x/vec_int_helper.c | 31 ----------------
10
target/s390x/insn-data.def | 4 +-
11
4 files changed, 11 insertions(+), 94 deletions(-)
12
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/helper.h
16
+++ b/target/s390x/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
18
DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
19
DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
20
DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
21
-DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
22
-DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
23
-DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
24
-DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
25
DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
26
DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
27
DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
28
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/s390x/translate_vx.inc.c
31
+++ b/target/s390x/translate_vx.inc.c
32
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
33
return DISAS_NEXT;
34
}
35
36
-static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
37
-{
38
- TCGv_i32 t0 = tcg_temp_new_i32();
39
-
40
- tcg_gen_andi_i32(t0, b, 31);
41
- tcg_gen_rotl_i32(d, a, t0);
42
- tcg_temp_free_i32(t0);
43
-}
44
-
45
-static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
46
-{
47
- TCGv_i64 t0 = tcg_temp_new_i64();
48
-
49
- tcg_gen_andi_i64(t0, b, 63);
50
- tcg_gen_rotl_i64(d, a, t0);
51
- tcg_temp_free_i64(t0);
52
-}
53
-
54
-static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
55
-{
56
- const uint8_t es = get_field(s, m4);
57
- static const GVecGen3 g[4] = {
58
- { .fno = gen_helper_gvec_verllv8, },
59
- { .fno = gen_helper_gvec_verllv16, },
60
- { .fni4 = gen_rll_i32, },
61
- { .fni8 = gen_rll_i64, },
62
- };
63
-
64
- if (es > ES_64) {
65
- gen_program_exception(s, PGM_SPECIFICATION);
66
- return DISAS_NORETURN;
67
- }
68
-
69
- gen_gvec_3(get_field(s, v1), get_field(s, v2),
70
- get_field(s, v3), &g[es]);
71
- return DISAS_NEXT;
72
-}
73
-
74
-static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
75
-{
76
- const uint8_t es = get_field(s, m4);
77
- static const GVecGen2s g[4] = {
78
- { .fno = gen_helper_gvec_verll8, },
79
- { .fno = gen_helper_gvec_verll16, },
80
- { .fni4 = gen_rll_i32, },
81
- { .fni8 = gen_rll_i64, },
82
- };
83
-
84
- if (es > ES_64) {
85
- gen_program_exception(s, PGM_SPECIFICATION);
86
- return DISAS_NORETURN;
87
- }
88
- gen_gvec_2s(get_field(s, v1), get_field(s, v3), o->addr1,
89
- &g[es]);
90
- return DISAS_NEXT;
91
-}
92
-
93
static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
94
{
95
TCGv_i32 t = tcg_temp_new_i32();
96
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
97
case 0x70:
98
gen_gvec_fn_3(shlv, es, v1, v2, v3);
99
break;
100
+ case 0x73:
101
+ gen_gvec_fn_3(rotlv, es, v1, v2, v3);
102
+ break;
103
case 0x7a:
104
gen_gvec_fn_3(sarv, es, v1, v2, v3);
105
break;
106
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
107
case 0x30:
108
gen_gvec_fn_2i(shli, es, v1, v3, d2);
109
break;
110
+ case 0x33:
111
+ gen_gvec_fn_2i(rotli, es, v1, v3, d2);
112
+ break;
113
case 0x3a:
114
gen_gvec_fn_2i(sari, es, v1, v3, d2);
115
break;
116
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
117
case 0x30:
118
gen_gvec_fn_2s(shls, es, v1, v3, shift);
119
break;
120
+ case 0x33:
121
+ gen_gvec_fn_2s(rotls, es, v1, v3, shift);
122
+ break;
123
case 0x3a:
124
gen_gvec_fn_2s(sars, es, v1, v3, shift);
125
break;
126
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/s390x/vec_int_helper.c
129
+++ b/target/s390x/vec_int_helper.c
130
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
131
DEF_VPOPCT(8)
132
DEF_VPOPCT(16)
133
134
-#define DEF_VERLLV(BITS) \
135
-void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
136
- uint32_t desc) \
137
-{ \
138
- int i; \
139
- \
140
- for (i = 0; i < (128 / BITS); i++) { \
141
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
142
- const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
143
- \
144
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
145
- } \
146
-}
147
-DEF_VERLLV(8)
148
-DEF_VERLLV(16)
149
-
150
-#define DEF_VERLL(BITS) \
151
-void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
152
- uint32_t desc) \
153
-{ \
154
- int i; \
155
- \
156
- for (i = 0; i < (128 / BITS); i++) { \
157
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
158
- \
159
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
160
- } \
161
-}
162
-DEF_VERLL(8)
163
-DEF_VERLL(16)
164
-
165
#define DEF_VERIM(BITS) \
166
void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
167
uint32_t desc) \
168
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
169
index XXXXXXX..XXXXXXX 100644
170
--- a/target/s390x/insn-data.def
171
+++ b/target/s390x/insn-data.def
172
@@ -XXX,XX +XXX,XX @@
173
/* VECTOR POPULATION COUNT */
174
F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC)
175
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
176
- F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC)
177
- F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC)
178
+ F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
179
+ F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
180
/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
181
F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC)
182
/* VECTOR ELEMENT SHIFT LEFT */
183
--
184
2.25.1
185
186
diff view generated by jsdifflib
Deleted patch
1
If the output of the move is dead, then the last use is in
2
the store. If we propagate the input to the store, then we
3
can remove the move opcode entirely.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 78 +++++++++++++++++++++++++++++++++++++++----------------
8
1 file changed, 56 insertions(+), 22 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
15
}
16
17
/* Outputs become available. */
18
- for (i = 0; i < nb_oargs; i++) {
19
- arg_ts = arg_temp(op->args[i]);
20
+ if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
21
+ arg_ts = arg_temp(op->args[0]);
22
dir_ts = arg_ts->state_ptr;
23
- if (!dir_ts) {
24
- continue;
25
+ if (dir_ts) {
26
+ op->args[0] = temp_arg(dir_ts);
27
+ changes = true;
28
+
29
+ /* The output is now live and modified. */
30
+ arg_ts->state = 0;
31
+
32
+ if (NEED_SYNC_ARG(0)) {
33
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
34
+ ? INDEX_op_st_i32
35
+ : INDEX_op_st_i64);
36
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
37
+ TCGTemp *out_ts = dir_ts;
38
+
39
+ if (IS_DEAD_ARG(0)) {
40
+ out_ts = arg_temp(op->args[1]);
41
+ arg_ts->state = TS_DEAD;
42
+ tcg_op_remove(s, op);
43
+ } else {
44
+ arg_ts->state = TS_MEM;
45
+ }
46
+
47
+ sop->args[0] = temp_arg(out_ts);
48
+ sop->args[1] = temp_arg(arg_ts->mem_base);
49
+ sop->args[2] = arg_ts->mem_offset;
50
+ } else {
51
+ tcg_debug_assert(!IS_DEAD_ARG(0));
52
+ }
53
}
54
- op->args[i] = temp_arg(dir_ts);
55
- changes = true;
56
+ } else {
57
+ for (i = 0; i < nb_oargs; i++) {
58
+ arg_ts = arg_temp(op->args[i]);
59
+ dir_ts = arg_ts->state_ptr;
60
+ if (!dir_ts) {
61
+ continue;
62
+ }
63
+ op->args[i] = temp_arg(dir_ts);
64
+ changes = true;
65
66
- /* The output is now live and modified. */
67
- arg_ts->state = 0;
68
+ /* The output is now live and modified. */
69
+ arg_ts->state = 0;
70
71
- /* Sync outputs upon their last write. */
72
- if (NEED_SYNC_ARG(i)) {
73
- TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
74
- ? INDEX_op_st_i32
75
- : INDEX_op_st_i64);
76
- TCGOp *sop = tcg_op_insert_after(s, op, sopc);
77
+ /* Sync outputs upon their last write. */
78
+ if (NEED_SYNC_ARG(i)) {
79
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
80
+ ? INDEX_op_st_i32
81
+ : INDEX_op_st_i64);
82
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
83
84
- sop->args[0] = temp_arg(dir_ts);
85
- sop->args[1] = temp_arg(arg_ts->mem_base);
86
- sop->args[2] = arg_ts->mem_offset;
87
+ sop->args[0] = temp_arg(dir_ts);
88
+ sop->args[1] = temp_arg(arg_ts->mem_base);
89
+ sop->args[2] = arg_ts->mem_offset;
90
91
- arg_ts->state = TS_MEM;
92
- }
93
- /* Drop outputs that are dead. */
94
- if (IS_DEAD_ARG(i)) {
95
- arg_ts->state = TS_DEAD;
96
+ arg_ts->state = TS_MEM;
97
+ }
98
+ /* Drop outputs that are dead. */
99
+ if (IS_DEAD_ARG(i)) {
100
+ arg_ts->state = TS_DEAD;
101
+ }
102
}
103
}
104
}
105
--
106
2.25.1
107
108
diff view generated by jsdifflib
Deleted patch
1
From: Nick Hudson <skrll@netbsd.org>
2
1
3
Fix building on NetBSD/arm by extracting the FSR value from the
4
correct siginfo_t field.
5
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Nick Hudson <skrll@netbsd.org>
8
Message-Id: <20200516154147.24842-1-skrll@netbsd.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
accel/tcg/user-exec.c | 16 +++++++++++++---
12
1 file changed, 13 insertions(+), 3 deletions(-)
13
14
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/user-exec.c
17
+++ b/accel/tcg/user-exec.c
18
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
19
20
#if defined(__NetBSD__)
21
#include <ucontext.h>
22
+#include <sys/siginfo.h>
23
#endif
24
25
int cpu_signal_handler(int host_signum, void *pinfo,
26
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
27
siginfo_t *info = pinfo;
28
#if defined(__NetBSD__)
29
ucontext_t *uc = puc;
30
+ siginfo_t *si = pinfo;
31
#else
32
ucontext_t *uc = puc;
33
#endif
34
unsigned long pc;
35
+ uint32_t fsr;
36
int is_write;
37
38
#if defined(__NetBSD__)
39
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
40
pc = uc->uc_mcontext.arm_pc;
41
#endif
42
43
- /* error_code is the FSR value, in which bit 11 is WnR (assuming a v6 or
44
- * later processor; on v5 we will always report this as a read).
45
+#ifdef __NetBSD__
46
+ fsr = si->si_trap;
47
+#else
48
+ fsr = uc->uc_mcontext.error_code;
49
+#endif
50
+ /*
51
+ * In the FSR, bit 11 is WnR, assuming a v6 or
52
+ * later processor. On v5 we will always report
53
+ * this as a read, which will fail later.
54
*/
55
- is_write = extract32(uc->uc_mcontext.error_code, 11, 1);
56
+ is_write = extract32(fsr, 11, 1);
57
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
58
}
59
60
--
61
2.25.1
62
63
diff view generated by jsdifflib
1
From: Nick Hudson <skrll@netbsd.org>
1
From: Claudio Fontana <cfontana@suse.de>
2
2
3
Fix qemu build on NetBSD/evbarm-aarch64 by providing a NetBSD specific
3
Signed-off-by: Claudio Fontana <cfontana@suse.de>
4
cpu_signal_handler.
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
5
Message-Id: <20201015143217.29337-4-cfontana@suse.de>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Nick Hudson <skrll@netbsd.org>
8
Message-Id: <20200517101529.5367-1-skrll@netbsd.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
accel/tcg/user-exec.c | 27 +++++++++++++++++++++++++++
8
accel/tcg/tcg-cpus-icount.h | 6 +--
12
1 file changed, 27 insertions(+)
9
accel/tcg/tcg-cpus-rr.h | 2 +-
10
accel/tcg/tcg-cpus.h | 6 +--
11
accel/tcg/tcg-cpus-icount.c | 24 ++++++------
12
accel/tcg/tcg-cpus-mttcg.c | 10 ++---
13
accel/tcg/tcg-cpus-rr.c | 74 ++++++++++++++++++-------------------
14
accel/tcg/tcg-cpus.c | 6 +--
15
7 files changed, 64 insertions(+), 64 deletions(-)
13
16
14
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
17
diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/user-exec.c
19
--- a/accel/tcg/tcg-cpus-icount.h
17
+++ b/accel/tcg/user-exec.c
20
+++ b/accel/tcg/tcg-cpus-icount.h
18
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
21
@@ -XXX,XX +XXX,XX @@
19
22
#ifndef TCG_CPUS_ICOUNT_H
20
#elif defined(__aarch64__)
23
#define TCG_CPUS_ICOUNT_H
21
24
22
+#if defined(__NetBSD__)
25
-void handle_icount_deadline(void);
23
+
26
-void prepare_icount_for_run(CPUState *cpu);
24
+#include <ucontext.h>
27
-void process_icount_data(CPUState *cpu);
25
+#include <sys/siginfo.h>
28
+void icount_handle_deadline(void);
26
+
29
+void icount_prepare_for_run(CPUState *cpu);
27
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
30
+void icount_process_data(CPUState *cpu);
28
+{
31
29
+ ucontext_t *uc = puc;
32
#endif /* TCG_CPUS_ICOUNT_H */
30
+ siginfo_t *si = pinfo;
33
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
31
+ unsigned long pc;
34
index XXXXXXX..XXXXXXX 100644
32
+ int is_write;
35
--- a/accel/tcg/tcg-cpus-rr.h
33
+ uint32_t esr;
36
+++ b/accel/tcg/tcg-cpus-rr.h
34
+
37
@@ -XXX,XX +XXX,XX @@
35
+ pc = uc->uc_mcontext.__gregs[_REG_PC];
38
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
36
+ esr = si->si_trap;
39
37
+
40
/* Kick all RR vCPUs. */
38
+ /*
41
-void qemu_cpu_kick_rr_cpus(CPUState *unused);
39
+ * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
42
+void rr_kick_vcpu_thread(CPUState *unused);
40
+ * is 0b10010x: then bit 6 is the WnR bit
43
41
+ */
44
/* start the round robin vcpu thread */
42
+ is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
45
void rr_start_vcpu_thread(CPUState *cpu);
43
+ return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
46
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
44
+}
47
index XXXXXXX..XXXXXXX 100644
45
+
48
--- a/accel/tcg/tcg-cpus.h
46
+#else
49
+++ b/accel/tcg/tcg-cpus.h
47
+
50
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
48
#ifndef ESR_MAGIC
51
extern const CpusAccel tcg_cpus_icount;
49
/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
52
extern const CpusAccel tcg_cpus_rr;
50
#define ESR_MAGIC 0x45535201
53
51
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
54
-void qemu_tcg_destroy_vcpu(CPUState *cpu);
52
}
55
-int tcg_cpu_exec(CPUState *cpu);
53
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
56
-void tcg_handle_interrupt(CPUState *cpu, int mask);
54
}
57
+void tcg_cpus_destroy(CPUState *cpu);
55
+#endif
58
+int tcg_cpus_exec(CPUState *cpu);
56
59
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
57
#elif defined(__s390__)
60
61
#endif /* TCG_CPUS_H */
62
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/accel/tcg/tcg-cpus-icount.c
65
+++ b/accel/tcg/tcg-cpus-icount.c
66
@@ -XXX,XX +XXX,XX @@
67
#include "tcg-cpus-icount.h"
68
#include "tcg-cpus-rr.h"
69
70
-static int64_t tcg_get_icount_limit(void)
71
+static int64_t icount_get_limit(void)
72
{
73
int64_t deadline;
74
75
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
76
}
77
}
78
79
-static void notify_aio_contexts(void)
80
+static void icount_notify_aio_contexts(void)
81
{
82
/* Wake up other AioContexts. */
83
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
84
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
85
}
86
87
-void handle_icount_deadline(void)
88
+void icount_handle_deadline(void)
89
{
90
assert(qemu_in_vcpu_thread());
91
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
92
QEMU_TIMER_ATTR_ALL);
93
94
if (deadline == 0) {
95
- notify_aio_contexts();
96
+ icount_notify_aio_contexts();
97
}
98
}
99
100
-void prepare_icount_for_run(CPUState *cpu)
101
+void icount_prepare_for_run(CPUState *cpu)
102
{
103
int insns_left;
104
105
/*
106
- * These should always be cleared by process_icount_data after
107
+ * These should always be cleared by icount_process_data after
108
* each vCPU execution. However u16.high can be raised
109
- * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
110
+ * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
111
*/
112
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
113
g_assert(cpu->icount_extra == 0);
114
115
- cpu->icount_budget = tcg_get_icount_limit();
116
+ cpu->icount_budget = icount_get_limit();
117
insns_left = MIN(0xffff, cpu->icount_budget);
118
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
119
cpu->icount_extra = cpu->icount_budget - insns_left;
120
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
121
replay_mutex_lock();
122
123
if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
124
- notify_aio_contexts();
125
+ icount_notify_aio_contexts();
126
}
127
}
128
129
-void process_icount_data(CPUState *cpu)
130
+void icount_process_data(CPUState *cpu)
131
{
132
/* Account for executed instructions */
133
icount_update(cpu);
134
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
135
{
136
int old_mask = cpu->interrupt_request;
137
138
- tcg_handle_interrupt(cpu, mask);
139
+ tcg_cpus_handle_interrupt(cpu, mask);
140
if (qemu_cpu_is_self(cpu) &&
141
!cpu->can_do_io
142
&& (mask & ~old_mask) != 0) {
143
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
144
145
const CpusAccel tcg_cpus_icount = {
146
.create_vcpu_thread = rr_start_vcpu_thread,
147
- .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
148
+ .kick_vcpu_thread = rr_kick_vcpu_thread,
149
150
.handle_interrupt = icount_handle_interrupt,
151
.get_virtual_clock = icount_get,
152
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/accel/tcg/tcg-cpus-mttcg.c
155
+++ b/accel/tcg/tcg-cpus-mttcg.c
156
@@ -XXX,XX +XXX,XX @@
157
* current CPUState for a given thread.
158
*/
159
160
-static void *tcg_cpu_thread_fn(void *arg)
161
+static void *mttcg_cpu_thread_fn(void *arg)
162
{
163
CPUState *cpu = arg;
164
165
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
166
if (cpu_can_run(cpu)) {
167
int r;
168
qemu_mutex_unlock_iothread();
169
- r = tcg_cpu_exec(cpu);
170
+ r = tcg_cpus_exec(cpu);
171
qemu_mutex_lock_iothread();
172
switch (r) {
173
case EXCP_DEBUG:
174
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
175
qemu_wait_io_event(cpu);
176
} while (!cpu->unplug || cpu_can_run(cpu));
177
178
- qemu_tcg_destroy_vcpu(cpu);
179
+ tcg_cpus_destroy(cpu);
180
qemu_mutex_unlock_iothread();
181
rcu_unregister_thread();
182
return NULL;
183
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
184
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
185
cpu->cpu_index);
186
187
- qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
188
+ qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
189
cpu, QEMU_THREAD_JOINABLE);
190
191
#ifdef _WIN32
192
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
193
.create_vcpu_thread = mttcg_start_vcpu_thread,
194
.kick_vcpu_thread = mttcg_kick_vcpu_thread,
195
196
- .handle_interrupt = tcg_handle_interrupt,
197
+ .handle_interrupt = tcg_cpus_handle_interrupt,
198
};
199
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
200
index XXXXXXX..XXXXXXX 100644
201
--- a/accel/tcg/tcg-cpus-rr.c
202
+++ b/accel/tcg/tcg-cpus-rr.c
203
@@ -XXX,XX +XXX,XX @@
204
#include "tcg-cpus-icount.h"
205
206
/* Kick all RR vCPUs */
207
-void qemu_cpu_kick_rr_cpus(CPUState *unused)
208
+void rr_kick_vcpu_thread(CPUState *unused)
209
{
210
CPUState *cpu;
211
212
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
213
* idleness is complete.
214
*/
215
216
-static QEMUTimer *tcg_kick_vcpu_timer;
217
-static CPUState *tcg_current_rr_cpu;
218
+static QEMUTimer *rr_kick_vcpu_timer;
219
+static CPUState *rr_current_cpu;
220
221
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
222
223
-static inline int64_t qemu_tcg_next_kick(void)
224
+static inline int64_t rr_next_kick_time(void)
225
{
226
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
227
}
228
229
/* Kick the currently round-robin scheduled vCPU to next */
230
-static void qemu_cpu_kick_rr_next_cpu(void)
231
+static void rr_kick_next_cpu(void)
232
{
233
CPUState *cpu;
234
do {
235
- cpu = qatomic_mb_read(&tcg_current_rr_cpu);
236
+ cpu = qatomic_mb_read(&rr_current_cpu);
237
if (cpu) {
238
cpu_exit(cpu);
239
}
240
- } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
241
+ } while (cpu != qatomic_mb_read(&rr_current_cpu));
242
}
243
244
-static void kick_tcg_thread(void *opaque)
245
+static void rr_kick_thread(void *opaque)
246
{
247
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
248
- qemu_cpu_kick_rr_next_cpu();
249
+ timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
250
+ rr_kick_next_cpu();
251
}
252
253
-static void start_tcg_kick_timer(void)
254
+static void rr_start_kick_timer(void)
255
{
256
- if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
257
- tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
258
- kick_tcg_thread, NULL);
259
+ if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
260
+ rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
261
+ rr_kick_thread, NULL);
262
}
263
- if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
264
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
265
+ if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
266
+ timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
267
}
268
}
269
270
-static void stop_tcg_kick_timer(void)
271
+static void rr_stop_kick_timer(void)
272
{
273
- if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
274
- timer_del(tcg_kick_vcpu_timer);
275
+ if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
276
+ timer_del(rr_kick_vcpu_timer);
277
}
278
}
279
280
-static void qemu_tcg_rr_wait_io_event(void)
281
+static void rr_wait_io_event(void)
282
{
283
CPUState *cpu;
284
285
while (all_cpu_threads_idle()) {
286
- stop_tcg_kick_timer();
287
+ rr_stop_kick_timer();
288
qemu_cond_wait_iothread(first_cpu->halt_cond);
289
}
290
291
- start_tcg_kick_timer();
292
+ rr_start_kick_timer();
293
294
CPU_FOREACH(cpu) {
295
qemu_wait_io_event_common(cpu);
296
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
297
* Destroy any remaining vCPUs which have been unplugged and have
298
* finished running
299
*/
300
-static void deal_with_unplugged_cpus(void)
301
+static void rr_deal_with_unplugged_cpus(void)
302
{
303
CPUState *cpu;
304
305
CPU_FOREACH(cpu) {
306
if (cpu->unplug && !cpu_can_run(cpu)) {
307
- qemu_tcg_destroy_vcpu(cpu);
308
+ tcg_cpus_destroy(cpu);
309
break;
310
}
311
}
312
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
313
* elsewhere.
314
*/
315
316
-static void *tcg_rr_cpu_thread_fn(void *arg)
317
+static void *rr_cpu_thread_fn(void *arg)
318
{
319
CPUState *cpu = arg;
320
321
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
322
}
323
}
324
325
- start_tcg_kick_timer();
326
+ rr_start_kick_timer();
327
328
cpu = first_cpu;
329
330
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
331
* Run the timers here. This is much more efficient than
332
* waking up the I/O thread and waiting for completion.
333
*/
334
- handle_icount_deadline();
335
+ icount_handle_deadline();
336
}
337
338
replay_mutex_unlock();
339
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
340
341
while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
342
343
- qatomic_mb_set(&tcg_current_rr_cpu, cpu);
344
+ qatomic_mb_set(&rr_current_cpu, cpu);
345
current_cpu = cpu;
346
347
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
348
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
349
350
qemu_mutex_unlock_iothread();
351
if (icount_enabled()) {
352
- prepare_icount_for_run(cpu);
353
+ icount_prepare_for_run(cpu);
354
}
355
- r = tcg_cpu_exec(cpu);
356
+ r = tcg_cpus_exec(cpu);
357
if (icount_enabled()) {
358
- process_icount_data(cpu);
359
+ icount_process_data(cpu);
360
}
361
qemu_mutex_lock_iothread();
362
363
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
364
} /* while (cpu && !cpu->exit_request).. */
365
366
/* Does not need qatomic_mb_set because a spurious wakeup is okay. */
367
- qatomic_set(&tcg_current_rr_cpu, NULL);
368
+ qatomic_set(&rr_current_cpu, NULL);
369
370
if (cpu && cpu->exit_request) {
371
qatomic_mb_set(&cpu->exit_request, 0);
372
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
373
qemu_notify_event();
374
}
375
376
- qemu_tcg_rr_wait_io_event();
377
- deal_with_unplugged_cpus();
378
+ rr_wait_io_event();
379
+ rr_deal_with_unplugged_cpus();
380
}
381
382
rcu_unregister_thread();
383
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
384
/* share a single thread for all cpus with TCG */
385
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
386
qemu_thread_create(cpu->thread, thread_name,
387
- tcg_rr_cpu_thread_fn,
388
+ rr_cpu_thread_fn,
389
cpu, QEMU_THREAD_JOINABLE);
390
391
single_tcg_halt_cond = cpu->halt_cond;
392
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
393
394
const CpusAccel tcg_cpus_rr = {
395
.create_vcpu_thread = rr_start_vcpu_thread,
396
- .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
397
+ .kick_vcpu_thread = rr_kick_vcpu_thread,
398
399
- .handle_interrupt = tcg_handle_interrupt,
400
+ .handle_interrupt = tcg_cpus_handle_interrupt,
401
};
402
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
403
index XXXXXXX..XXXXXXX 100644
404
--- a/accel/tcg/tcg-cpus.c
405
+++ b/accel/tcg/tcg-cpus.c
406
@@ -XXX,XX +XXX,XX @@
407
408
/* common functionality among all TCG variants */
409
410
-void qemu_tcg_destroy_vcpu(CPUState *cpu)
411
+void tcg_cpus_destroy(CPUState *cpu)
412
{
413
cpu_thread_signal_destroyed(cpu);
414
}
415
416
-int tcg_cpu_exec(CPUState *cpu)
417
+int tcg_cpus_exec(CPUState *cpu)
418
{
419
int ret;
420
#ifdef CONFIG_PROFILER
421
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
422
}
423
424
/* mask must never be zero, except for A20 change call */
425
-void tcg_handle_interrupt(CPUState *cpu, int mask)
426
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
427
{
428
g_assert(qemu_mutex_iothread_locked());
58
429
59
--
430
--
60
2.25.1
431
2.25.1
61
432
62
433
diff view generated by jsdifflib