1
The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
1
I have not been able to prod reviews of all of the rotate patches
2
in 4 weeks, but let's not let that block ARM work forever.
2
3
3
Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
4
5
r~
6
7
8
The following changes since commit cccdd8c7971896c339d59c9c5d4647d4ffd9568a:
9
10
Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request' into staging (2020-06-02 10:25:55 +0100)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
14
https://github.com/rth7680/qemu.git tags/pull-tcg-20200602
8
15
9
for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
16
for you to fetch changes up to 71b04329c4f7d5824a289ca5225e1883a278cf3b:
10
17
11
scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
18
accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler (2020-06-02 08:42:37 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Remove another limit to NB_MMU_MODES.
21
Vector rotate support
15
Fix compilation using uclibc.
22
Signal handling support for NetBSD arm/aarch64
16
Fix defaulting of -accel parameters.
17
Tidy cputlb basic routines.
18
Adjust git.orderfile for decodetree.
19
23
20
----------------------------------------------------------------
24
----------------------------------------------------------------
21
Carlos Santos (1):
25
Nick Hudson (2):
22
util/cacheinfo: fix crash when compiling with uClibc
26
accel/tcg: Adjust cpu_signal_handler for NetBSD/arm
27
accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler
23
28
24
Philippe Mathieu-Daudé (1):
29
Richard Henderson (10):
25
scripts/git.orderfile: Display decodetree before C source
30
tcg: Implement gvec support for rotate by immediate
31
tcg: Implement gvec support for rotate by vector
32
tcg: Remove expansion to shift by vector from do_shifts
33
tcg: Implement gvec support for rotate by scalar
34
tcg/i386: Implement INDEX_op_rotl{i,s,v}_vec
35
tcg/aarch64: Implement INDEX_op_rotl{i,v}_vec
36
tcg/ppc: Implement INDEX_op_rot[lr]v_vec
37
target/ppc: Use tcg_gen_gvec_rotlv
38
target/s390x: Use tcg_gen_gvec_rotl{i,s,v}
39
tcg: Improve move ops in liveness_pass_2
26
40
27
Richard Henderson (14):
41
accel/tcg/tcg-runtime.h | 15 +++
28
cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
42
include/tcg/tcg-op-gvec.h | 12 ++
29
vl: Remove unused variable in configure_accelerators
43
include/tcg/tcg-op.h | 5 +
30
vl: Reduce scope of variables in configure_accelerators
44
include/tcg/tcg-opc.h | 4 +
31
vl: Remove useless test in configure_accelerators
45
include/tcg/tcg.h | 3 +
32
vl: Only choose enabled accelerators in configure_accelerators
46
target/ppc/helper.h | 4 -
33
cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
47
target/s390x/helper.h | 4 -
34
cputlb: Make tlb_n_entries private to cputlb.c
48
tcg/aarch64/tcg-target.h | 3 +
35
cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
49
tcg/aarch64/tcg-target.opc.h | 1 +
36
cputlb: Hoist tlb portions in tlb_mmu_resize_locked
50
tcg/i386/tcg-target.h | 3 +
37
cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
51
tcg/ppc/tcg-target.h | 3 +
38
cputlb: Split out tlb_mmu_flush_locked
52
tcg/ppc/tcg-target.opc.h | 1 -
39
cputlb: Partially merge tlb_dyn_init into tlb_init
53
accel/tcg/tcg-runtime-gvec.c | 144 ++++++++++++++++++++++++
40
cputlb: Initialize tlbs as flushed
54
accel/tcg/user-exec.c | 43 +++++++-
41
cputlb: Hoist timestamp outside of loops over tlbs
55
target/ppc/int_helper.c | 17 ---
56
target/ppc/translate/vmx-impl.inc.c | 8 +-
57
target/s390x/translate_vx.inc.c | 66 ++---------
58
target/s390x/vec_int_helper.c | 31 ------
59
tcg/aarch64/tcg-target.inc.c | 53 ++++++++-
60
tcg/i386/tcg-target.inc.c | 116 +++++++++++++++++---
61
tcg/ppc/tcg-target.inc.c | 23 +++-
62
tcg/tcg-op-gvec.c | 212 ++++++++++++++++++++++++++++++++++++
63
tcg/tcg-op-vec.c | 62 +++++++----
64
tcg/tcg.c | 85 +++++++++++----
65
target/s390x/insn-data.def | 4 +-
66
tcg/README | 7 +-
67
26 files changed, 736 insertions(+), 193 deletions(-)
42
68
43
include/exec/cpu_ldst.h | 5 -
44
accel/tcg/cputlb.c | 287 +++++++++++++++++++++++++++++++++---------------
45
util/cacheinfo.c | 10 +-
46
vl.c | 27 +++--
47
scripts/git.orderfile | 3 +
48
5 files changed, 223 insertions(+), 109 deletions(-)
49
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
No host backend support yet, but the interfaces for rotli
2
the code easier to read.
2
are in place. Canonicalize immediate rotate to the left,
3
based on a survey of architectures, but provide both left
4
and right shift interfaces to the translators.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
9
accel/tcg/tcg-runtime.h | 5 +++
10
1 file changed, 17 insertions(+), 18 deletions(-)
10
include/tcg/tcg-op-gvec.h | 6 ++++
11
include/tcg/tcg-op.h | 2 ++
12
include/tcg/tcg-opc.h | 1 +
13
include/tcg/tcg.h | 1 +
14
tcg/aarch64/tcg-target.h | 1 +
15
tcg/i386/tcg-target.h | 1 +
16
tcg/ppc/tcg-target.h | 1 +
17
accel/tcg/tcg-runtime-gvec.c | 48 +++++++++++++++++++++++++
18
tcg/tcg-op-gvec.c | 68 ++++++++++++++++++++++++++++++++++++
19
tcg/tcg-op-vec.c | 12 +++++++
20
tcg/tcg.c | 2 ++
21
tcg/README | 3 +-
22
13 files changed, 150 insertions(+), 1 deletion(-)
11
23
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
24
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
26
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/cputlb.c
27
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
17
29
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
18
/**
30
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
19
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
31
20
- * @env: CPU that owns the TLB
32
+DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
21
- * @mmu_idx: MMU index of the TLB
33
+DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
22
+ * @desc: The CPUTLBDesc portion of the TLB
34
+DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
23
+ * @fast: The CPUTLBDescFast portion of the same TLB
35
+DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
24
*
36
+
25
* Called with tlb_lock_held.
37
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
*
38
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
39
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
* high), since otherwise we are likely to have a significant amount of
40
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
29
* conflict misses.
41
index XXXXXXX..XXXXXXX 100644
30
*/
42
--- a/include/tcg/tcg-op-gvec.h
31
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
43
+++ b/include/tcg/tcg-op-gvec.h
32
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
44
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
45
int64_t shift, uint32_t oprsz, uint32_t maxsz);
46
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
47
int64_t shift, uint32_t oprsz, uint32_t maxsz);
48
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
49
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
50
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
51
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
52
53
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
54
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
55
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
56
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
57
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
58
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
59
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
60
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
61
62
#endif
63
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
64
index XXXXXXX..XXXXXXX 100644
65
--- a/include/tcg/tcg-op.h
66
+++ b/include/tcg/tcg-op.h
67
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
68
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
69
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
70
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
71
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
72
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
73
74
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
75
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
76
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/tcg/tcg-opc.h
79
+++ b/include/tcg/tcg-opc.h
80
@@ -XXX,XX +XXX,XX @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
81
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
82
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
83
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
84
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
85
86
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
87
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
88
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg.h
91
+++ b/include/tcg/tcg.h
92
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
93
#define TCG_TARGET_HAS_not_vec 0
94
#define TCG_TARGET_HAS_andc_vec 0
95
#define TCG_TARGET_HAS_orc_vec 0
96
+#define TCG_TARGET_HAS_roti_vec 0
97
#define TCG_TARGET_HAS_shi_vec 0
98
#define TCG_TARGET_HAS_shs_vec 0
99
#define TCG_TARGET_HAS_shv_vec 0
100
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/aarch64/tcg-target.h
103
+++ b/tcg/aarch64/tcg-target.h
104
@@ -XXX,XX +XXX,XX @@ typedef enum {
105
#define TCG_TARGET_HAS_not_vec 1
106
#define TCG_TARGET_HAS_neg_vec 1
107
#define TCG_TARGET_HAS_abs_vec 1
108
+#define TCG_TARGET_HAS_roti_vec 0
109
#define TCG_TARGET_HAS_shi_vec 1
110
#define TCG_TARGET_HAS_shs_vec 0
111
#define TCG_TARGET_HAS_shv_vec 1
112
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
113
index XXXXXXX..XXXXXXX 100644
114
--- a/tcg/i386/tcg-target.h
115
+++ b/tcg/i386/tcg-target.h
116
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
117
#define TCG_TARGET_HAS_not_vec 0
118
#define TCG_TARGET_HAS_neg_vec 0
119
#define TCG_TARGET_HAS_abs_vec 1
120
+#define TCG_TARGET_HAS_roti_vec 0
121
#define TCG_TARGET_HAS_shi_vec 1
122
#define TCG_TARGET_HAS_shs_vec 1
123
#define TCG_TARGET_HAS_shv_vec have_avx2
124
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
125
index XXXXXXX..XXXXXXX 100644
126
--- a/tcg/ppc/tcg-target.h
127
+++ b/tcg/ppc/tcg-target.h
128
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
129
#define TCG_TARGET_HAS_not_vec 1
130
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
131
#define TCG_TARGET_HAS_abs_vec 0
132
+#define TCG_TARGET_HAS_roti_vec 0
133
#define TCG_TARGET_HAS_shi_vec 0
134
#define TCG_TARGET_HAS_shs_vec 0
135
#define TCG_TARGET_HAS_shv_vec 1
136
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/accel/tcg/tcg-runtime-gvec.c
139
+++ b/accel/tcg/tcg-runtime-gvec.c
140
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
141
clear_high(d, oprsz, desc);
142
}
143
144
+void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
145
+{
146
+ intptr_t oprsz = simd_oprsz(desc);
147
+ int shift = simd_data(desc);
148
+ intptr_t i;
149
+
150
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
151
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
152
+ }
153
+ clear_high(d, oprsz, desc);
154
+}
155
+
156
+void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
157
+{
158
+ intptr_t oprsz = simd_oprsz(desc);
159
+ int shift = simd_data(desc);
160
+ intptr_t i;
161
+
162
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
163
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
164
+ }
165
+ clear_high(d, oprsz, desc);
166
+}
167
+
168
+void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
169
+{
170
+ intptr_t oprsz = simd_oprsz(desc);
171
+ int shift = simd_data(desc);
172
+ intptr_t i;
173
+
174
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
175
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
176
+ }
177
+ clear_high(d, oprsz, desc);
178
+}
179
+
180
+void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
181
+{
182
+ intptr_t oprsz = simd_oprsz(desc);
183
+ int shift = simd_data(desc);
184
+ intptr_t i;
185
+
186
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
187
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
188
+ }
189
+ clear_high(d, oprsz, desc);
190
+}
191
+
192
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
33
{
193
{
34
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
194
intptr_t oprsz = simd_oprsz(desc);
35
- size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
195
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
36
+ size_t old_size = tlb_n_entries(fast);
196
index XXXXXXX..XXXXXXX 100644
37
size_t rate;
197
--- a/tcg/tcg-op-gvec.c
38
size_t new_size = old_size;
198
+++ b/tcg/tcg-op-gvec.c
39
int64_t now = get_clock_realtime();
199
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
40
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
41
return;
42
}
43
44
- g_free(env_tlb(env)->f[mmu_idx].table);
45
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
46
+ g_free(fast->table);
47
+ g_free(desc->iotlb);
48
49
tlb_window_reset(desc, now, 0);
50
/* desc->n_used_entries is cleared by the caller */
51
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
52
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
53
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
54
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
55
+ fast->table = g_try_new(CPUTLBEntry, new_size);
56
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
57
+
58
/*
59
* If the allocations fail, try smaller sizes. We just freed some
60
* memory, so going back to half of new_size has a good chance of working.
61
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
62
* allocations to fail though, so we progressively reduce the allocation
63
* size, aborting if we cannot even allocate the smallest TLB we support.
64
*/
65
- while (env_tlb(env)->f[mmu_idx].table == NULL ||
66
- env_tlb(env)->d[mmu_idx].iotlb == NULL) {
67
+ while (fast->table == NULL || desc->iotlb == NULL) {
68
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
69
error_report("%s: %s", __func__, strerror(errno));
70
abort();
71
}
72
new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
73
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
74
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
75
76
- g_free(env_tlb(env)->f[mmu_idx].table);
77
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
78
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
79
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
80
+ g_free(fast->table);
81
+ g_free(desc->iotlb);
82
+ fast->table = g_try_new(CPUTLBEntry, new_size);
83
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
84
}
200
}
85
}
201
}
86
202
87
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
203
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
204
+{
205
+ uint64_t mask = dup_const(MO_8, 0xff << c);
206
+
207
+ tcg_gen_shli_i64(d, a, c);
208
+ tcg_gen_shri_i64(a, a, 8 - c);
209
+ tcg_gen_andi_i64(d, d, mask);
210
+ tcg_gen_andi_i64(a, a, ~mask);
211
+ tcg_gen_or_i64(d, d, a);
212
+}
213
+
214
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
215
+{
216
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
217
+
218
+ tcg_gen_shli_i64(d, a, c);
219
+ tcg_gen_shri_i64(a, a, 16 - c);
220
+ tcg_gen_andi_i64(d, d, mask);
221
+ tcg_gen_andi_i64(a, a, ~mask);
222
+ tcg_gen_or_i64(d, d, a);
223
+}
224
+
225
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
226
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
227
+{
228
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
229
+ static const GVecGen2i g[4] = {
230
+ { .fni8 = tcg_gen_vec_rotl8i_i64,
231
+ .fniv = tcg_gen_rotli_vec,
232
+ .fno = gen_helper_gvec_rotl8i,
233
+ .opt_opc = vecop_list,
234
+ .vece = MO_8 },
235
+ { .fni8 = tcg_gen_vec_rotl16i_i64,
236
+ .fniv = tcg_gen_rotli_vec,
237
+ .fno = gen_helper_gvec_rotl16i,
238
+ .opt_opc = vecop_list,
239
+ .vece = MO_16 },
240
+ { .fni4 = tcg_gen_rotli_i32,
241
+ .fniv = tcg_gen_rotli_vec,
242
+ .fno = gen_helper_gvec_rotl32i,
243
+ .opt_opc = vecop_list,
244
+ .vece = MO_32 },
245
+ { .fni8 = tcg_gen_rotli_i64,
246
+ .fniv = tcg_gen_rotli_vec,
247
+ .fno = gen_helper_gvec_rotl64i,
248
+ .opt_opc = vecop_list,
249
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
250
+ .vece = MO_64 },
251
+ };
252
+
253
+ tcg_debug_assert(vece <= MO_64);
254
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
255
+ if (shift == 0) {
256
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
257
+ } else {
258
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
259
+ }
260
+}
261
+
262
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
263
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
264
+{
265
+ tcg_debug_assert(vece <= MO_64);
266
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
267
+ tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
268
+ oprsz, maxsz);
269
+}
270
+
271
/*
272
* Specialized generation vector shifts by a non-constant scalar.
273
*/
274
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
275
index XXXXXXX..XXXXXXX 100644
276
--- a/tcg/tcg-op-vec.c
277
+++ b/tcg/tcg-op-vec.c
278
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
279
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
280
}
281
282
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
283
+{
284
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
285
+}
286
+
287
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
288
+{
289
+ int bits = 8 << vece;
290
+ tcg_debug_assert(i >= 0 && i < bits);
291
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
292
+}
293
+
294
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
295
TCGv_vec r, TCGv_vec a, TCGv_vec b)
88
{
296
{
89
- tlb_mmu_resize_locked(env, mmu_idx);
297
diff --git a/tcg/tcg.c b/tcg/tcg.c
90
+ tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
298
index XXXXXXX..XXXXXXX 100644
91
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
299
--- a/tcg/tcg.c
92
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
300
+++ b/tcg/tcg.c
93
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
301
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
302
case INDEX_op_shrv_vec:
303
case INDEX_op_sarv_vec:
304
return have_vec && TCG_TARGET_HAS_shv_vec;
305
+ case INDEX_op_rotli_vec:
306
+ return have_vec && TCG_TARGET_HAS_roti_vec;
307
case INDEX_op_ssadd_vec:
308
case INDEX_op_usadd_vec:
309
case INDEX_op_sssub_vec:
310
diff --git a/tcg/README b/tcg/README
311
index XXXXXXX..XXXXXXX 100644
312
--- a/tcg/README
313
+++ b/tcg/README
314
@@ -XXX,XX +XXX,XX @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
315
316
* shri_vec v0, v1, i2
317
* sari_vec v0, v1, i2
318
+* rotli_vec v0, v1, i2
319
* shrs_vec v0, v1, s2
320
* sars_vec v0, v1, s2
321
322
- Similarly for logical and arithmetic right shift.
323
+ Similarly for logical and arithmetic right shift, and left rotate.
324
325
* shlv_vec v0, v1, v2
326
94
--
327
--
95
2.20.1
328
2.25.1
96
329
97
330
diff view generated by jsdifflib
1
There's little point in leaving these data structures half initialized,
1
No host backend support yet, but the interfaces for rotlv
2
and relying on a flush to be done during reset.
2
and rotrv are in place.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/cputlb.c | 5 +++--
7
v3: Drop the generic expansion from rot to shift; we can do better
9
1 file changed, 3 insertions(+), 2 deletions(-)
8
for each backend, and then this code becomes unused.
9
---
10
accel/tcg/tcg-runtime.h | 10 +++
11
include/tcg/tcg-op-gvec.h | 4 ++
12
include/tcg/tcg-op.h | 2 +
13
include/tcg/tcg-opc.h | 2 +
14
include/tcg/tcg.h | 1 +
15
tcg/aarch64/tcg-target.h | 1 +
16
tcg/i386/tcg-target.h | 1 +
17
tcg/ppc/tcg-target.h | 1 +
18
accel/tcg/tcg-runtime-gvec.c | 96 +++++++++++++++++++++++++++
19
tcg/tcg-op-gvec.c | 122 +++++++++++++++++++++++++++++++++++
20
tcg/tcg-op-vec.c | 10 +++
21
tcg/tcg.c | 3 +
22
tcg/README | 4 +-
23
13 files changed, 256 insertions(+), 1 deletion(-)
10
24
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
25
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
12
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
27
--- a/accel/tcg/tcg-runtime.h
14
+++ b/accel/tcg/cputlb.c
28
+++ b/accel/tcg/tcg-runtime.h
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
16
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
30
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
fast->table = g_new(CPUTLBEntry, n_entries);
31
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
18
desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
32
19
+ tlb_mmu_flush_locked(desc, fast);
33
+DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+
38
+DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
+
43
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
45
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/tcg/tcg-op-gvec.h
49
+++ b/include/tcg/tcg-op-gvec.h
50
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
51
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
52
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
53
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
54
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
55
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
56
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
57
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
58
59
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
60
uint32_t aofs, uint32_t bofs,
61
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-op.h
64
+++ b/include/tcg/tcg-op.h
65
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
66
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
67
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
68
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
69
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
70
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
71
72
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
73
TCGv_vec a, TCGv_vec b);
74
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/include/tcg/tcg-opc.h
77
+++ b/include/tcg/tcg-opc.h
78
@@ -XXX,XX +XXX,XX @@ DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
79
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
80
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
81
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
82
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
83
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
84
85
DEF(cmp_vec, 1, 2, 1, IMPLVEC)
86
87
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
88
index XXXXXXX..XXXXXXX 100644
89
--- a/include/tcg/tcg.h
90
+++ b/include/tcg/tcg.h
91
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
92
#define TCG_TARGET_HAS_andc_vec 0
93
#define TCG_TARGET_HAS_orc_vec 0
94
#define TCG_TARGET_HAS_roti_vec 0
95
+#define TCG_TARGET_HAS_rotv_vec 0
96
#define TCG_TARGET_HAS_shi_vec 0
97
#define TCG_TARGET_HAS_shs_vec 0
98
#define TCG_TARGET_HAS_shv_vec 0
99
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/tcg/aarch64/tcg-target.h
102
+++ b/tcg/aarch64/tcg-target.h
103
@@ -XXX,XX +XXX,XX @@ typedef enum {
104
#define TCG_TARGET_HAS_neg_vec 1
105
#define TCG_TARGET_HAS_abs_vec 1
106
#define TCG_TARGET_HAS_roti_vec 0
107
+#define TCG_TARGET_HAS_rotv_vec 0
108
#define TCG_TARGET_HAS_shi_vec 1
109
#define TCG_TARGET_HAS_shs_vec 0
110
#define TCG_TARGET_HAS_shv_vec 1
111
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/tcg/i386/tcg-target.h
114
+++ b/tcg/i386/tcg-target.h
115
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
116
#define TCG_TARGET_HAS_neg_vec 0
117
#define TCG_TARGET_HAS_abs_vec 1
118
#define TCG_TARGET_HAS_roti_vec 0
119
+#define TCG_TARGET_HAS_rotv_vec 0
120
#define TCG_TARGET_HAS_shi_vec 1
121
#define TCG_TARGET_HAS_shs_vec 1
122
#define TCG_TARGET_HAS_shv_vec have_avx2
123
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
124
index XXXXXXX..XXXXXXX 100644
125
--- a/tcg/ppc/tcg-target.h
126
+++ b/tcg/ppc/tcg-target.h
127
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
128
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
129
#define TCG_TARGET_HAS_abs_vec 0
130
#define TCG_TARGET_HAS_roti_vec 0
131
+#define TCG_TARGET_HAS_rotv_vec 0
132
#define TCG_TARGET_HAS_shi_vec 0
133
#define TCG_TARGET_HAS_shs_vec 0
134
#define TCG_TARGET_HAS_shv_vec 1
135
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/accel/tcg/tcg-runtime-gvec.c
138
+++ b/accel/tcg/tcg-runtime-gvec.c
139
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
140
clear_high(d, oprsz, desc);
20
}
141
}
21
142
22
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
143
+void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
23
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
144
+{
24
145
+ intptr_t oprsz = simd_oprsz(desc);
25
qemu_spin_init(&env_tlb(env)->c.lock);
146
+ intptr_t i;
26
147
+
27
- /* Ensure that cpu_reset performs a full flush. */
148
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
28
- env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
149
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
29
+ /* All tlbs are initialized flushed. */
150
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
30
+ env_tlb(env)->c.dirty = 0;
151
+ }
31
152
+ clear_high(d, oprsz, desc);
32
for (i = 0; i < NB_MMU_MODES; i++) {
153
+}
33
tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
154
+
155
+void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
156
+{
157
+ intptr_t oprsz = simd_oprsz(desc);
158
+ intptr_t i;
159
+
160
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
161
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
162
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
163
+ }
164
+ clear_high(d, oprsz, desc);
165
+}
166
+
167
+void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
168
+{
169
+ intptr_t oprsz = simd_oprsz(desc);
170
+ intptr_t i;
171
+
172
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
173
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
174
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
175
+ }
176
+ clear_high(d, oprsz, desc);
177
+}
178
+
179
+void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
180
+{
181
+ intptr_t oprsz = simd_oprsz(desc);
182
+ intptr_t i;
183
+
184
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
185
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
186
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
187
+ }
188
+ clear_high(d, oprsz, desc);
189
+}
190
+
191
+void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
192
+{
193
+ intptr_t oprsz = simd_oprsz(desc);
194
+ intptr_t i;
195
+
196
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
197
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
198
+ *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
199
+ }
200
+ clear_high(d, oprsz, desc);
201
+}
202
+
203
+void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
204
+{
205
+ intptr_t oprsz = simd_oprsz(desc);
206
+ intptr_t i;
207
+
208
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
209
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
210
+ *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
211
+ }
212
+ clear_high(d, oprsz, desc);
213
+}
214
+
215
+void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
216
+{
217
+ intptr_t oprsz = simd_oprsz(desc);
218
+ intptr_t i;
219
+
220
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
221
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
222
+ *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
223
+ }
224
+ clear_high(d, oprsz, desc);
225
+}
226
+
227
+void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
228
+{
229
+ intptr_t oprsz = simd_oprsz(desc);
230
+ intptr_t i;
231
+
232
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
233
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
234
+ *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
235
+ }
236
+ clear_high(d, oprsz, desc);
237
+}
238
+
239
#define DO_CMP1(NAME, TYPE, OP) \
240
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
241
{ \
242
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/tcg/tcg-op-gvec.c
245
+++ b/tcg/tcg-op-gvec.c
246
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
247
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
248
}
249
250
+/*
251
+ * Similarly for rotates.
252
+ */
253
+
254
+static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
255
+ TCGv_vec a, TCGv_vec b)
256
+{
257
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
258
+
259
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
260
+ tcg_gen_and_vec(vece, t, t, b);
261
+ tcg_gen_rotlv_vec(vece, d, a, t);
262
+ tcg_temp_free_vec(t);
263
+}
264
+
265
+static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
266
+{
267
+ TCGv_i32 t = tcg_temp_new_i32();
268
+
269
+ tcg_gen_andi_i32(t, b, 31);
270
+ tcg_gen_rotl_i32(d, a, t);
271
+ tcg_temp_free_i32(t);
272
+}
273
+
274
+static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
275
+{
276
+ TCGv_i64 t = tcg_temp_new_i64();
277
+
278
+ tcg_gen_andi_i64(t, b, 63);
279
+ tcg_gen_rotl_i64(d, a, t);
280
+ tcg_temp_free_i64(t);
281
+}
282
+
283
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
284
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
285
+{
286
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
287
+ static const GVecGen3 g[4] = {
288
+ { .fniv = tcg_gen_rotlv_mod_vec,
289
+ .fno = gen_helper_gvec_rotl8v,
290
+ .opt_opc = vecop_list,
291
+ .vece = MO_8 },
292
+ { .fniv = tcg_gen_rotlv_mod_vec,
293
+ .fno = gen_helper_gvec_rotl16v,
294
+ .opt_opc = vecop_list,
295
+ .vece = MO_16 },
296
+ { .fni4 = tcg_gen_rotl_mod_i32,
297
+ .fniv = tcg_gen_rotlv_mod_vec,
298
+ .fno = gen_helper_gvec_rotl32v,
299
+ .opt_opc = vecop_list,
300
+ .vece = MO_32 },
301
+ { .fni8 = tcg_gen_rotl_mod_i64,
302
+ .fniv = tcg_gen_rotlv_mod_vec,
303
+ .fno = gen_helper_gvec_rotl64v,
304
+ .opt_opc = vecop_list,
305
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
306
+ .vece = MO_64 },
307
+ };
308
+
309
+ tcg_debug_assert(vece <= MO_64);
310
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
311
+}
312
+
313
+static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
314
+ TCGv_vec a, TCGv_vec b)
315
+{
316
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
317
+
318
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
319
+ tcg_gen_and_vec(vece, t, t, b);
320
+ tcg_gen_rotrv_vec(vece, d, a, t);
321
+ tcg_temp_free_vec(t);
322
+}
323
+
324
+static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
325
+{
326
+ TCGv_i32 t = tcg_temp_new_i32();
327
+
328
+ tcg_gen_andi_i32(t, b, 31);
329
+ tcg_gen_rotr_i32(d, a, t);
330
+ tcg_temp_free_i32(t);
331
+}
332
+
333
+static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
334
+{
335
+ TCGv_i64 t = tcg_temp_new_i64();
336
+
337
+ tcg_gen_andi_i64(t, b, 63);
338
+ tcg_gen_rotr_i64(d, a, t);
339
+ tcg_temp_free_i64(t);
340
+}
341
+
342
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
343
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
344
+{
345
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
346
+ static const GVecGen3 g[4] = {
347
+ { .fniv = tcg_gen_rotrv_mod_vec,
348
+ .fno = gen_helper_gvec_rotr8v,
349
+ .opt_opc = vecop_list,
350
+ .vece = MO_8 },
351
+ { .fniv = tcg_gen_rotrv_mod_vec,
352
+ .fno = gen_helper_gvec_rotr16v,
353
+ .opt_opc = vecop_list,
354
+ .vece = MO_16 },
355
+ { .fni4 = tcg_gen_rotr_mod_i32,
356
+ .fniv = tcg_gen_rotrv_mod_vec,
357
+ .fno = gen_helper_gvec_rotr32v,
358
+ .opt_opc = vecop_list,
359
+ .vece = MO_32 },
360
+ { .fni8 = tcg_gen_rotr_mod_i64,
361
+ .fniv = tcg_gen_rotrv_mod_vec,
362
+ .fno = gen_helper_gvec_rotr64v,
363
+ .opt_opc = vecop_list,
364
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
365
+ .vece = MO_64 },
366
+ };
367
+
368
+ tcg_debug_assert(vece <= MO_64);
369
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
370
+}
371
+
372
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
373
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
374
uint32_t oprsz, TCGCond cond)
375
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
376
index XXXXXXX..XXXXXXX 100644
377
--- a/tcg/tcg-op-vec.c
378
+++ b/tcg/tcg-op-vec.c
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
380
do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
381
}
382
383
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
384
+{
385
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
386
+}
387
+
388
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
389
+{
390
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
391
+}
392
+
393
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
394
TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
395
{
396
diff --git a/tcg/tcg.c b/tcg/tcg.c
397
index XXXXXXX..XXXXXXX 100644
398
--- a/tcg/tcg.c
399
+++ b/tcg/tcg.c
400
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
401
return have_vec && TCG_TARGET_HAS_shv_vec;
402
case INDEX_op_rotli_vec:
403
return have_vec && TCG_TARGET_HAS_roti_vec;
404
+ case INDEX_op_rotlv_vec:
405
+ case INDEX_op_rotrv_vec:
406
+ return have_vec && TCG_TARGET_HAS_rotv_vec;
407
case INDEX_op_ssadd_vec:
408
case INDEX_op_usadd_vec:
409
case INDEX_op_sssub_vec:
410
diff --git a/tcg/README b/tcg/README
411
index XXXXXXX..XXXXXXX 100644
412
--- a/tcg/README
413
+++ b/tcg/README
414
@@ -XXX,XX +XXX,XX @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
415
416
* shrv_vec v0, v1, v2
417
* sarv_vec v0, v1, v2
418
+* rotlv_vec v0, v1, v2
419
+* rotrv_vec v0, v1, v2
420
421
- Similarly for logical and arithmetic right shift.
422
+ Similarly for logical and arithmetic right shift, and rotates.
423
424
* cmp_vec v0, v1, v2, cond
425
34
--
426
--
35
2.20.1
427
2.25.1
36
428
37
429
diff view generated by jsdifflib
1
In target/arm we will shortly have "too many" mmu_idx.
1
We do not reflect this expansion in tcg_can_emit_vecop_list,
2
The current minimum barrier is caused by the way in which
2
so it is unused and unusable. However, we actually perform
3
tlb_flush_page_by_mmuidx is coded.
3
the same expansion in do_gvec_shifts, so it is also unneeded.
4
4
5
We can remove this limitation by allocating memory for
6
consumption by the worker. Let us assume that this is
7
the unlikely case, as will be the case for the majority
8
of targets which have so far satisfied the BUILD_BUG_ON,
9
and only allocate memory when necessary.
10
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
7
tcg/tcg-op-vec.c | 35 +++++++++++------------------------
15
1 file changed, 132 insertions(+), 35 deletions(-)
8
1 file changed, 11 insertions(+), 24 deletions(-)
16
9
17
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/cputlb.c
12
--- a/tcg/tcg-op-vec.c
20
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/tcg-op-vec.c
21
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
15
}
16
17
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
18
- TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
19
+ TCGv_i32 s, TCGOpcode opc)
20
{
21
TCGTemp *rt = tcgv_vec_temp(r);
22
TCGTemp *at = tcgv_vec_temp(a);
23
@@ -XXX,XX +XXX,XX @@ static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
24
TCGArg ai = temp_arg(at);
25
TCGArg si = temp_arg(st);
26
TCGType type = rt->base_type;
27
- const TCGOpcode *hold_list;
28
int can;
29
30
tcg_debug_assert(at->base_type >= type);
31
- tcg_assert_listed_vecop(opc_s);
32
- hold_list = tcg_swap_vecop_list(NULL);
33
-
34
- can = tcg_can_emit_vec_op(opc_s, type, vece);
35
+ tcg_assert_listed_vecop(opc);
36
+ can = tcg_can_emit_vec_op(opc, type, vece);
37
if (can > 0) {
38
- vec_gen_3(opc_s, type, vece, ri, ai, si);
39
+ vec_gen_3(opc, type, vece, ri, ai, si);
40
} else if (can < 0) {
41
- tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
42
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
43
+ tcg_expand_vec_op(opc, type, vece, ri, ai, si);
44
+ tcg_swap_vecop_list(hold_list);
45
} else {
46
- TCGv_vec vec_s = tcg_temp_new_vec(type);
47
-
48
- if (vece == MO_64) {
49
- TCGv_i64 s64 = tcg_temp_new_i64();
50
- tcg_gen_extu_i32_i64(s64, s);
51
- tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
52
- tcg_temp_free_i64(s64);
53
- } else {
54
- tcg_gen_dup_i32_vec(vece, vec_s, s);
55
- }
56
- do_op3_nofail(vece, r, a, vec_s, opc_v);
57
- tcg_temp_free_vec(vec_s);
58
+ g_assert_not_reached();
22
}
59
}
60
- tcg_swap_vecop_list(hold_list);
23
}
61
}
24
62
25
-/* As we are going to hijack the bottom bits of the page address for a
63
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
26
- * mmuidx bit mask we need to fail to build if we can't do that
27
+/**
28
+ * tlb_flush_page_by_mmuidx_async_0:
29
+ * @cpu: cpu on which to flush
30
+ * @addr: page of virtual address to flush
31
+ * @idxmap: set of mmu_idx to flush
32
+ *
33
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
34
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
35
*/
36
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
37
-
38
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
39
- run_on_cpu_data data)
40
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
41
+ target_ulong addr,
42
+ uint16_t idxmap)
43
{
64
{
44
CPUArchState *env = cpu->env_ptr;
65
- do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
45
- target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
66
+ do_shifts(vece, r, a, b, INDEX_op_shls_vec);
46
- target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
47
- unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
48
int mmu_idx;
49
50
assert_cpu_is_self(cpu);
51
52
- tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
53
- addr, mmu_idx_bitmap);
54
+ tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
55
56
qemu_spin_lock(&env_tlb(env)->c.lock);
57
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
58
- if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
59
+ if ((idxmap >> mmu_idx) & 1) {
60
tlb_flush_page_locked(env, mmu_idx, addr);
61
}
62
}
63
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
64
tb_flush_jmp_cache(cpu, addr);
65
}
67
}
66
68
67
+/**
69
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
68
+ * tlb_flush_page_by_mmuidx_async_1:
69
+ * @cpu: cpu on which to flush
70
+ * @data: encoded addr + idxmap
71
+ *
72
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
73
+ * async_run_on_cpu. The idxmap parameter is encoded in the page
74
+ * offset of the target_ptr field. This limits the set of mmu_idx
75
+ * that can be passed via this method.
76
+ */
77
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
78
+ run_on_cpu_data data)
79
+{
80
+ target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
81
+ target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
82
+ uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
83
+
84
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
85
+}
86
+
87
+typedef struct {
88
+ target_ulong addr;
89
+ uint16_t idxmap;
90
+} TLBFlushPageByMMUIdxData;
91
+
92
+/**
93
+ * tlb_flush_page_by_mmuidx_async_2:
94
+ * @cpu: cpu on which to flush
95
+ * @data: allocated addr + idxmap
96
+ *
97
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
98
+ * async_run_on_cpu. The addr+idxmap parameters are stored in a
99
+ * TLBFlushPageByMMUIdxData structure that has been allocated
100
+ * specifically for this helper. Free the structure when done.
101
+ */
102
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
103
+ run_on_cpu_data data)
104
+{
105
+ TLBFlushPageByMMUIdxData *d = data.host_ptr;
106
+
107
+ tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
108
+ g_free(d);
109
+}
110
+
111
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
112
{
70
{
113
- target_ulong addr_and_mmu_idx;
71
- do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
114
-
72
+ do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
115
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
116
117
/* This should already be page aligned */
118
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
119
- addr_and_mmu_idx |= idxmap;
120
+ addr &= TARGET_PAGE_MASK;
121
122
- if (!qemu_cpu_is_self(cpu)) {
123
- async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
124
- RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
125
+ if (qemu_cpu_is_self(cpu)) {
126
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
127
+ } else if (idxmap < TARGET_PAGE_SIZE) {
128
+ /*
129
+ * Most targets have only a few mmu_idx. In the case where
130
+ * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
131
+ * allocating memory for this operation.
132
+ */
133
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
134
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
135
} else {
136
- tlb_flush_page_by_mmuidx_async_work(
137
- cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
138
+ TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
139
+
140
+ /* Otherwise allocate a structure, freed by the worker. */
141
+ d->addr = addr;
142
+ d->idxmap = idxmap;
143
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
144
+ RUN_ON_CPU_HOST_PTR(d));
145
}
146
}
73
}
147
74
148
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
75
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
149
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
150
uint16_t idxmap)
151
{
76
{
152
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
77
- do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
153
- target_ulong addr_and_mmu_idx;
78
+ do_shifts(vece, r, a, b, INDEX_op_sars_vec);
154
-
155
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
156
157
/* This should already be page aligned */
158
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
159
- addr_and_mmu_idx |= idxmap;
160
+ addr &= TARGET_PAGE_MASK;
161
162
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
163
- fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
164
+ /*
165
+ * Allocate memory to hold addr+idxmap only when needed.
166
+ * See tlb_flush_page_by_mmuidx for details.
167
+ */
168
+ if (idxmap < TARGET_PAGE_SIZE) {
169
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
170
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
171
+ } else {
172
+ CPUState *dst_cpu;
173
+
174
+ /* Allocate a separate data block for each destination cpu. */
175
+ CPU_FOREACH(dst_cpu) {
176
+ if (dst_cpu != src_cpu) {
177
+ TLBFlushPageByMMUIdxData *d
178
+ = g_new(TLBFlushPageByMMUIdxData, 1);
179
+
180
+ d->addr = addr;
181
+ d->idxmap = idxmap;
182
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
183
+ RUN_ON_CPU_HOST_PTR(d));
184
+ }
185
+ }
186
+ }
187
+
188
+ tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
189
}
79
}
190
80
191
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
81
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
192
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
193
target_ulong addr,
194
uint16_t idxmap)
195
{
196
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
197
- target_ulong addr_and_mmu_idx;
198
-
199
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
200
201
/* This should already be page aligned */
202
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
203
- addr_and_mmu_idx |= idxmap;
204
+ addr &= TARGET_PAGE_MASK;
205
206
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
207
- async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
208
+ /*
209
+ * Allocate memory to hold addr+idxmap only when needed.
210
+ * See tlb_flush_page_by_mmuidx for details.
211
+ */
212
+ if (idxmap < TARGET_PAGE_SIZE) {
213
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
214
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
215
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
216
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
217
+ } else {
218
+ CPUState *dst_cpu;
219
+ TLBFlushPageByMMUIdxData *d;
220
+
221
+ /* Allocate a separate data block for each destination cpu. */
222
+ CPU_FOREACH(dst_cpu) {
223
+ if (dst_cpu != src_cpu) {
224
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
225
+ d->addr = addr;
226
+ d->idxmap = idxmap;
227
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
228
+ RUN_ON_CPU_HOST_PTR(d));
229
+ }
230
+ }
231
+
232
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
233
+ d->addr = addr;
234
+ d->idxmap = idxmap;
235
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
236
+ RUN_ON_CPU_HOST_PTR(d));
237
+ }
238
}
239
240
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
241
--
82
--
242
2.20.1
83
2.25.1
243
84
244
85
diff view generated by jsdifflib
Deleted patch
1
From: Carlos Santos <casantos@redhat.com>
2
1
3
uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
4
but the corresponding sysconf calls returns -1, which is a valid result,
5
meaning that the limit is indeterminate.
6
7
Handle this situation using the fallback values instead of crashing due
8
to an assertion failure.
9
10
Signed-off-by: Carlos Santos <casantos@redhat.com>
11
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
util/cacheinfo.c | 10 ++++++++--
15
1 file changed, 8 insertions(+), 2 deletions(-)
16
17
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/util/cacheinfo.c
20
+++ b/util/cacheinfo.c
21
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
22
static void sys_cache_info(int *isize, int *dsize)
23
{
24
# ifdef _SC_LEVEL1_ICACHE_LINESIZE
25
- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
26
+ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
27
+ if (tmp_isize > 0) {
28
+ *isize = tmp_isize;
29
+ }
30
# endif
31
# ifdef _SC_LEVEL1_DCACHE_LINESIZE
32
- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
33
+ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
34
+ if (tmp_dsize > 0) {
35
+ *dsize = tmp_dsize;
36
+ }
37
# endif
38
}
39
#endif /* sys_cache_info */
40
--
41
2.20.1
42
43
diff view generated by jsdifflib
Deleted patch
1
The accel_initialised variable no longer has any setters.
2
1
3
Fixes: 6f6e1698a68c
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
vl.c | 3 +--
11
1 file changed, 1 insertion(+), 2 deletions(-)
12
13
diff --git a/vl.c b/vl.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/vl.c
16
+++ b/vl.c
17
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
18
{
19
const char *accel;
20
char **accel_list, **tmp;
21
- bool accel_initialised = false;
22
bool init_failed = false;
23
24
qemu_opts_foreach(qemu_find_opts("icount"),
25
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
26
27
accel_list = g_strsplit(accel, ":", 0);
28
29
- for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
30
+ for (tmp = accel_list; tmp && *tmp; tmp++) {
31
/*
32
* Filter invalid accelerators here, to prevent obscenities
33
* such as "-machine accel=tcg,,thread=single".
34
--
35
2.20.1
36
37
diff view generated by jsdifflib
Deleted patch
1
The accel_list and tmp variables are only used when manufacturing
2
-machine accel, options based on -accel.
3
1
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
vl.c | 3 ++-
10
1 file changed, 2 insertions(+), 1 deletion(-)
11
12
diff --git a/vl.c b/vl.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
15
+++ b/vl.c
16
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
17
static void configure_accelerators(const char *progname)
18
{
19
const char *accel;
20
- char **accel_list, **tmp;
21
bool init_failed = false;
22
23
qemu_opts_foreach(qemu_find_opts("icount"),
24
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
25
26
accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
27
if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
28
+ char **accel_list, **tmp;
29
+
30
if (accel == NULL) {
31
/* Select the default accelerator */
32
if (!accel_find("tcg") && !accel_find("kvm")) {
33
--
34
2.20.1
35
36
diff view generated by jsdifflib
Deleted patch
1
The result of g_strsplit is never NULL.
2
1
3
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
vl.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/vl.c b/vl.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
15
+++ b/vl.c
16
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
17
18
accel_list = g_strsplit(accel, ":", 0);
19
20
- for (tmp = accel_list; tmp && *tmp; tmp++) {
21
+ for (tmp = accel_list; *tmp; tmp++) {
22
/*
23
* Filter invalid accelerators here, to prevent obscenities
24
* such as "-machine accel=tcg,,thread=single".
25
--
26
2.20.1
27
28
diff view generated by jsdifflib
1
Merge into the only caller, but at the same time split
1
No host backend support yet, but the interfaces for rotls
2
out tlb_mmu_init to initialize a single tlb entry.
2
are in place. Only implement left-rotate for now, as the
3
only known use of vector rotate by scalar is s390x, so any
4
right-rotate would be unused and untestable.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
9
include/tcg/tcg-op-gvec.h | 2 ++
10
1 file changed, 16 insertions(+), 17 deletions(-)
10
include/tcg/tcg-op.h | 1 +
11
include/tcg/tcg-opc.h | 1 +
12
include/tcg/tcg.h | 1 +
13
tcg/aarch64/tcg-target.h | 1 +
14
tcg/i386/tcg-target.h | 1 +
15
tcg/ppc/tcg-target.h | 1 +
16
tcg/tcg-op-gvec.c | 22 ++++++++++++++++++++++
17
tcg/tcg-op-vec.c | 5 +++++
18
tcg/tcg.c | 2 ++
19
10 files changed, 37 insertions(+)
11
20
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
21
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
23
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/accel/tcg/cputlb.c
24
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
25
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
17
desc->window_max_entries = max_entries;
26
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
27
void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
28
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
29
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
30
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
31
32
/*
33
* Perform vector shift by vector element, modulo the element size.
34
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/tcg/tcg-op.h
37
+++ b/include/tcg/tcg-op.h
38
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
39
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
40
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
41
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
42
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
43
44
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
45
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
46
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/include/tcg/tcg-opc.h
49
+++ b/include/tcg/tcg-opc.h
50
@@ -XXX,XX +XXX,XX @@ DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
51
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
52
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
53
DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
54
+DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
55
56
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
57
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
58
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
59
index XXXXXXX..XXXXXXX 100644
60
--- a/include/tcg/tcg.h
61
+++ b/include/tcg/tcg.h
62
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
63
#define TCG_TARGET_HAS_andc_vec 0
64
#define TCG_TARGET_HAS_orc_vec 0
65
#define TCG_TARGET_HAS_roti_vec 0
66
+#define TCG_TARGET_HAS_rots_vec 0
67
#define TCG_TARGET_HAS_rotv_vec 0
68
#define TCG_TARGET_HAS_shi_vec 0
69
#define TCG_TARGET_HAS_shs_vec 0
70
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tcg/aarch64/tcg-target.h
73
+++ b/tcg/aarch64/tcg-target.h
74
@@ -XXX,XX +XXX,XX @@ typedef enum {
75
#define TCG_TARGET_HAS_neg_vec 1
76
#define TCG_TARGET_HAS_abs_vec 1
77
#define TCG_TARGET_HAS_roti_vec 0
78
+#define TCG_TARGET_HAS_rots_vec 0
79
#define TCG_TARGET_HAS_rotv_vec 0
80
#define TCG_TARGET_HAS_shi_vec 1
81
#define TCG_TARGET_HAS_shs_vec 0
82
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
83
index XXXXXXX..XXXXXXX 100644
84
--- a/tcg/i386/tcg-target.h
85
+++ b/tcg/i386/tcg-target.h
86
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
87
#define TCG_TARGET_HAS_neg_vec 0
88
#define TCG_TARGET_HAS_abs_vec 1
89
#define TCG_TARGET_HAS_roti_vec 0
90
+#define TCG_TARGET_HAS_rots_vec 0
91
#define TCG_TARGET_HAS_rotv_vec 0
92
#define TCG_TARGET_HAS_shi_vec 1
93
#define TCG_TARGET_HAS_shs_vec 1
94
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
95
index XXXXXXX..XXXXXXX 100644
96
--- a/tcg/ppc/tcg-target.h
97
+++ b/tcg/ppc/tcg-target.h
98
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
99
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
100
#define TCG_TARGET_HAS_abs_vec 0
101
#define TCG_TARGET_HAS_roti_vec 0
102
+#define TCG_TARGET_HAS_rots_vec 0
103
#define TCG_TARGET_HAS_rotv_vec 0
104
#define TCG_TARGET_HAS_shi_vec 0
105
#define TCG_TARGET_HAS_shs_vec 0
106
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/tcg-op-gvec.c
109
+++ b/tcg/tcg-op-gvec.c
110
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
111
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
18
}
112
}
19
113
20
-static void tlb_dyn_init(CPUArchState *env)
114
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
21
-{
115
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
22
- int i;
23
-
24
- for (i = 0; i < NB_MMU_MODES; i++) {
25
- CPUTLBDesc *desc = &env_tlb(env)->d[i];
26
- size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
27
-
28
- tlb_window_reset(desc, get_clock_realtime(), 0);
29
- desc->n_used_entries = 0;
30
- env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
31
- env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
32
- env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
33
- }
34
-}
35
-
36
/**
37
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
38
* @desc: The CPUTLBDesc portion of the TLB
39
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
40
tlb_mmu_flush_locked(desc, fast);
41
}
42
43
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
44
+{
116
+{
45
+ size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
117
+ static const GVecGen2sh g = {
118
+ .fni4 = tcg_gen_rotl_i32,
119
+ .fni8 = tcg_gen_rotl_i64,
120
+ .fniv_s = tcg_gen_rotls_vec,
121
+ .fniv_v = tcg_gen_rotlv_vec,
122
+ .fno = {
123
+ gen_helper_gvec_rotl8i,
124
+ gen_helper_gvec_rotl16i,
125
+ gen_helper_gvec_rotl32i,
126
+ gen_helper_gvec_rotl64i,
127
+ },
128
+ .s_list = { INDEX_op_rotls_vec, 0 },
129
+ .v_list = { INDEX_op_rotlv_vec, 0 },
130
+ };
46
+
131
+
47
+ tlb_window_reset(desc, now, 0);
132
+ tcg_debug_assert(vece <= MO_64);
48
+ desc->n_used_entries = 0;
133
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
49
+ fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
50
+ fast->table = g_new(CPUTLBEntry, n_entries);
51
+ desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
52
+}
134
+}
53
+
135
+
54
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
136
/*
137
* Expand D = A << (B % element bits)
138
*
139
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/tcg/tcg-op-vec.c
142
+++ b/tcg/tcg-op-vec.c
143
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
144
do_shifts(vece, r, a, b, INDEX_op_sars_vec);
145
}
146
147
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
148
+{
149
+ do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
150
+}
151
+
152
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
153
TCGv_vec b, TCGv_vec c)
55
{
154
{
56
env_tlb(env)->d[mmu_idx].n_used_entries++;
155
diff --git a/tcg/tcg.c b/tcg/tcg.c
57
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
156
index XXXXXXX..XXXXXXX 100644
58
void tlb_init(CPUState *cpu)
157
--- a/tcg/tcg.c
59
{
158
+++ b/tcg/tcg.c
60
CPUArchState *env = cpu->env_ptr;
159
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
61
+ int64_t now = get_clock_realtime();
160
return have_vec && TCG_TARGET_HAS_shv_vec;
62
+ int i;
161
case INDEX_op_rotli_vec:
63
162
return have_vec && TCG_TARGET_HAS_roti_vec;
64
qemu_spin_init(&env_tlb(env)->c.lock);
163
+ case INDEX_op_rotls_vec:
65
164
+ return have_vec && TCG_TARGET_HAS_rots_vec;
66
/* Ensure that cpu_reset performs a full flush. */
165
case INDEX_op_rotlv_vec:
67
env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
166
case INDEX_op_rotrv_vec:
68
167
return have_vec && TCG_TARGET_HAS_rotv_vec;
69
- tlb_dyn_init(env);
70
+ for (i = 0; i < NB_MMU_MODES; i++) {
71
+ tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
72
+ }
73
}
74
75
/* flush_all_helper: run fn across all cpus
76
--
168
--
77
2.20.1
169
2.25.1
78
170
79
171
diff view generated by jsdifflib
1
We will want to be able to flush a tlb without resizing.
1
For immediates, we must continue the special casing of 8-bit
2
2
elements. The other element sizes and shift types are trivially
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
implemented with shifts.
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/cputlb.c | 15 ++++++++++-----
7
tcg/i386/tcg-target.inc.c | 116 ++++++++++++++++++++++++++++++++------
9
1 file changed, 10 insertions(+), 5 deletions(-)
8
1 file changed, 100 insertions(+), 16 deletions(-)
10
9
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
12
--- a/tcg/i386/tcg-target.inc.c
14
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/i386/tcg-target.inc.c
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
14
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
15
case INDEX_op_shls_vec:
16
case INDEX_op_shrs_vec:
17
case INDEX_op_sars_vec:
18
+ case INDEX_op_rotls_vec:
19
case INDEX_op_cmp_vec:
20
case INDEX_op_x86_shufps_vec:
21
case INDEX_op_x86_blend_vec:
22
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
23
case INDEX_op_xor_vec:
24
case INDEX_op_andc_vec:
25
return 1;
26
+ case INDEX_op_rotli_vec:
27
case INDEX_op_cmp_vec:
28
case INDEX_op_cmpsel_vec:
29
return -1;
30
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
31
return vece >= MO_16;
32
case INDEX_op_sars_vec:
33
return vece >= MO_16 && vece <= MO_32;
34
+ case INDEX_op_rotls_vec:
35
+ return vece >= MO_16 ? -1 : 0;
36
37
case INDEX_op_shlv_vec:
38
case INDEX_op_shrv_vec:
39
return have_avx2 && vece >= MO_32;
40
case INDEX_op_sarv_vec:
41
return have_avx2 && vece == MO_32;
42
+ case INDEX_op_rotlv_vec:
43
+ case INDEX_op_rotrv_vec:
44
+ return have_avx2 && vece >= MO_32 ? -1 : 0;
45
46
case INDEX_op_mul_vec:
47
if (vece == MO_8) {
48
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
16
}
49
}
17
}
50
}
18
51
19
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
52
-static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
20
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
53
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
54
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
21
{
55
{
22
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
56
TCGv_vec t1, t2;
23
- CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
57
@@ -XXX,XX +XXX,XX @@ static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
24
-
58
t1 = tcg_temp_new_vec(type);
25
- tlb_mmu_resize_locked(desc, fast);
59
t2 = tcg_temp_new_vec(type);
26
desc->n_used_entries = 0;
60
27
desc->large_page_addr = -1;
61
- /* Unpack to W, shift, and repack. Tricky bits:
28
desc->large_page_mask = -1;
62
- (1) Use punpck*bw x,x to produce DDCCBBAA,
29
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
63
- i.e. duplicate in other half of the 16-bit lane.
30
memset(desc->vtable, -1, sizeof(desc->vtable));
64
- (2) For right-shift, add 8 so that the high half of
65
- the lane becomes zero. For left-shift, we must
66
- shift up and down again.
67
- (3) Step 2 leaves high half zero such that PACKUSWB
68
- (pack with unsigned saturation) does not modify
69
- the quantity. */
70
+ /*
71
+ * Unpack to W, shift, and repack. Tricky bits:
72
+ * (1) Use punpck*bw x,x to produce DDCCBBAA,
73
+ * i.e. duplicate in other half of the 16-bit lane.
74
+ * (2) For right-shift, add 8 so that the high half of the lane
75
+ * becomes zero. For left-shift, and left-rotate, we must
76
+ * shift up and down again.
77
+ * (3) Step 2 leaves high half zero such that PACKUSWB
78
+ * (pack with unsigned saturation) does not modify
79
+ * the quantity.
80
+ */
81
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
82
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
83
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
84
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
85
86
- if (shr) {
87
- tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
88
- tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
89
+ if (opc != INDEX_op_rotli_vec) {
90
+ imm += 8;
91
+ }
92
+ if (opc == INDEX_op_shri_vec) {
93
+ tcg_gen_shri_vec(MO_16, t1, t1, imm);
94
+ tcg_gen_shri_vec(MO_16, t2, t2, imm);
95
} else {
96
- tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
97
- tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
98
+ tcg_gen_shli_vec(MO_16, t1, t1, imm);
99
+ tcg_gen_shli_vec(MO_16, t2, t2, imm);
100
tcg_gen_shri_vec(MO_16, t1, t1, 8);
101
tcg_gen_shri_vec(MO_16, t2, t2, 8);
102
}
103
@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
104
}
31
}
105
}
32
106
33
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
107
+static void expand_vec_rotli(TCGType type, unsigned vece,
108
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
34
+{
109
+{
35
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
110
+ TCGv_vec t;
36
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
111
+
37
+
112
+ if (vece == MO_8) {
38
+ tlb_mmu_resize_locked(desc, fast);
113
+ expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
39
+ tlb_mmu_flush_locked(desc, fast);
114
+ return;
115
+ }
116
+
117
+ t = tcg_temp_new_vec(type);
118
+ tcg_gen_shli_vec(vece, t, v1, imm);
119
+ tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
120
+ tcg_gen_or_vec(vece, v0, v0, t);
121
+ tcg_temp_free_vec(t);
40
+}
122
+}
41
+
123
+
42
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
124
+static void expand_vec_rotls(TCGType type, unsigned vece,
125
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
126
+{
127
+ TCGv_i32 rsh;
128
+ TCGv_vec t;
129
+
130
+ tcg_debug_assert(vece != MO_8);
131
+
132
+ t = tcg_temp_new_vec(type);
133
+ rsh = tcg_temp_new_i32();
134
+
135
+ tcg_gen_neg_i32(rsh, lsh);
136
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
137
+ tcg_gen_shls_vec(vece, t, v1, lsh);
138
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
139
+ tcg_gen_or_vec(vece, v0, v0, t);
140
+ tcg_temp_free_vec(t);
141
+ tcg_temp_free_i32(rsh);
142
+}
143
+
144
+static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
145
+ TCGv_vec v1, TCGv_vec sh, bool right)
146
+{
147
+ TCGv_vec t = tcg_temp_new_vec(type);
148
+
149
+ tcg_gen_dupi_vec(vece, t, 8 << vece);
150
+ tcg_gen_sub_vec(vece, t, t, sh);
151
+ if (right) {
152
+ tcg_gen_shlv_vec(vece, t, v1, t);
153
+ tcg_gen_shrv_vec(vece, v0, v1, sh);
154
+ } else {
155
+ tcg_gen_shrv_vec(vece, t, v1, t);
156
+ tcg_gen_shlv_vec(vece, v0, v1, sh);
157
+ }
158
+ tcg_gen_or_vec(vece, v0, v0, t);
159
+ tcg_temp_free_vec(t);
160
+}
161
+
162
static void expand_vec_mul(TCGType type, unsigned vece,
163
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
43
{
164
{
44
env_tlb(env)->d[mmu_idx].n_used_entries++;
165
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
166
switch (opc) {
167
case INDEX_op_shli_vec:
168
case INDEX_op_shri_vec:
169
- expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
170
+ expand_vec_shi(type, vece, opc, v0, v1, a2);
171
break;
172
173
case INDEX_op_sari_vec:
174
expand_vec_sari(type, vece, v0, v1, a2);
175
break;
176
177
+ case INDEX_op_rotli_vec:
178
+ expand_vec_rotli(type, vece, v0, v1, a2);
179
+ break;
180
+
181
+ case INDEX_op_rotls_vec:
182
+ expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
183
+ break;
184
+
185
+ case INDEX_op_rotlv_vec:
186
+ v2 = temp_tcgv_vec(arg_temp(a2));
187
+ expand_vec_rotv(type, vece, v0, v1, v2, false);
188
+ break;
189
+ case INDEX_op_rotrv_vec:
190
+ v2 = temp_tcgv_vec(arg_temp(a2));
191
+ expand_vec_rotv(type, vece, v0, v1, v2, true);
192
+ break;
193
+
194
case INDEX_op_mul_vec:
195
v2 = temp_tcgv_vec(arg_temp(a2));
196
expand_vec_mul(type, vece, v0, v1, v2);
45
--
197
--
46
2.20.1
198
2.25.1
47
199
48
200
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
For immediate rotate , we can implement this in two instructions,
2
using SLI. For variable rotate, the oddness of aarch64 right-shift-
3
as-negative-left-shift means a backend-specific expansion works best.
2
4
3
To avoid scrolling each instruction when reviewing tcg
4
helpers written for the decodetree script, display the
5
.decode files (similar to header declarations) before
6
the C source (implementation of previous declarations).
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Message-Id: <20191230082856.30556-1-philmd@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
scripts/git.orderfile | 3 +++
7
tcg/aarch64/tcg-target.opc.h | 1 +
15
1 file changed, 3 insertions(+)
8
tcg/aarch64/tcg-target.inc.c | 53 ++++++++++++++++++++++++++++++++++--
9
2 files changed, 52 insertions(+), 2 deletions(-)
16
10
17
diff --git a/scripts/git.orderfile b/scripts/git.orderfile
11
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/scripts/git.orderfile
13
--- a/tcg/aarch64/tcg-target.opc.h
20
+++ b/scripts/git.orderfile
14
+++ b/tcg/aarch64/tcg-target.opc.h
21
@@ -XXX,XX +XXX,XX @@ qga/*.json
15
@@ -XXX,XX +XXX,XX @@
22
# headers
16
*/
23
*.h
17
24
18
DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
25
+# decoding tree specification
19
+DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
26
+*.decode
20
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/aarch64/tcg-target.inc.c
23
+++ b/tcg/aarch64/tcg-target.inc.c
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
I3614_SSHR = 0x0f000400,
26
I3614_SSRA = 0x0f001400,
27
I3614_SHL = 0x0f005400,
28
+ I3614_SLI = 0x2f005400,
29
I3614_USHR = 0x2f000400,
30
I3614_USRA = 0x2f001400,
31
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
33
case INDEX_op_sari_vec:
34
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
35
break;
36
+ case INDEX_op_aa64_sli_vec:
37
+ tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
38
+ break;
39
case INDEX_op_shlv_vec:
40
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
41
break;
42
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
43
case INDEX_op_shlv_vec:
44
case INDEX_op_bitsel_vec:
45
return 1;
46
+ case INDEX_op_rotli_vec:
47
case INDEX_op_shrv_vec:
48
case INDEX_op_sarv_vec:
49
+ case INDEX_op_rotlv_vec:
50
+ case INDEX_op_rotrv_vec:
51
return -1;
52
case INDEX_op_mul_vec:
53
case INDEX_op_smax_vec:
54
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
55
TCGArg a0, ...)
56
{
57
va_list va;
58
- TCGv_vec v0, v1, v2, t1;
59
+ TCGv_vec v0, v1, v2, t1, t2;
60
+ TCGArg a2;
61
62
va_start(va, a0);
63
v0 = temp_tcgv_vec(arg_temp(a0));
64
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
65
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
66
+ a2 = va_arg(va, TCGArg);
67
+ v2 = temp_tcgv_vec(arg_temp(a2));
68
69
switch (opc) {
70
+ case INDEX_op_rotli_vec:
71
+ t1 = tcg_temp_new_vec(type);
72
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
73
+ vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
74
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
75
+ tcg_temp_free_vec(t1);
76
+ break;
27
+
77
+
28
# code
78
case INDEX_op_shrv_vec:
29
*.c
79
case INDEX_op_sarv_vec:
80
/* Right shifts are negative left shifts for AArch64. */
81
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
82
tcg_temp_free_vec(t1);
83
break;
84
85
+ case INDEX_op_rotlv_vec:
86
+ t1 = tcg_temp_new_vec(type);
87
+ tcg_gen_dupi_vec(vece, t1, 8 << vece);
88
+ tcg_gen_sub_vec(vece, t1, v2, t1);
89
+ /* Right shifts are negative left shifts for AArch64. */
90
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
91
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
92
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
93
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
94
+ tcg_gen_or_vec(vece, v0, v0, t1);
95
+ tcg_temp_free_vec(t1);
96
+ break;
97
+
98
+ case INDEX_op_rotrv_vec:
99
+ t1 = tcg_temp_new_vec(type);
100
+ t2 = tcg_temp_new_vec(type);
101
+ tcg_gen_neg_vec(vece, t1, v2);
102
+ tcg_gen_dupi_vec(vece, t2, 8 << vece);
103
+ tcg_gen_add_vec(vece, t2, t1, t2);
104
+ /* Right shifts are negative left shifts for AArch64. */
105
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
106
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
107
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
108
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
109
+ tcg_gen_or_vec(vece, v0, t1, t2);
110
+ tcg_temp_free_vec(t1);
111
+ tcg_temp_free_vec(t2);
112
+ break;
113
+
114
default:
115
g_assert_not_reached();
116
}
117
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
118
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
119
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
120
static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
121
+ static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
122
static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
123
static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
124
static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
125
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
126
return &w_w_wZ;
127
case INDEX_op_bitsel_vec:
128
return &w_w_w_w;
129
+ case INDEX_op_aa64_sli_vec:
130
+ return &w_0_w;
131
132
default:
133
return NULL;
30
--
134
--
31
2.20.1
135
2.25.1
32
136
33
137
diff view generated by jsdifflib
1
Do not call get_clock_realtime() in tlb_mmu_resize_locked,
1
We already had support for rotlv, using a target-specific opcode;
2
but hoist outside of any loop over a set of tlbs. This is
2
convert to use the generic opcode. Handle rotrv via simple negation.
3
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
4
and tlb_flush_page_locked, so not onerous.
5
3
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
5
---
11
accel/tcg/cputlb.c | 14 ++++++++------
6
tcg/ppc/tcg-target.h | 2 +-
12
1 file changed, 8 insertions(+), 6 deletions(-)
7
tcg/ppc/tcg-target.opc.h | 1 -
8
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++----
9
3 files changed, 20 insertions(+), 6 deletions(-)
13
10
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
13
--- a/tcg/ppc/tcg-target.h
17
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/ppc/tcg-target.h
18
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
15
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
19
* high), since otherwise we are likely to have a significant amount of
16
#define TCG_TARGET_HAS_abs_vec 0
20
* conflict misses.
17
#define TCG_TARGET_HAS_roti_vec 0
21
*/
18
#define TCG_TARGET_HAS_rots_vec 0
22
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
19
-#define TCG_TARGET_HAS_rotv_vec 0
23
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
20
+#define TCG_TARGET_HAS_rotv_vec 1
24
+ int64_t now)
21
#define TCG_TARGET_HAS_shi_vec 0
22
#define TCG_TARGET_HAS_shs_vec 0
23
#define TCG_TARGET_HAS_shv_vec 1
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.opc.h
27
+++ b/tcg/ppc/tcg-target.opc.h
28
@@ -XXX,XX +XXX,XX @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
29
DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
30
DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
31
DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
32
-DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
33
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target.inc.c
36
+++ b/tcg/ppc/tcg-target.inc.c
37
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
38
case INDEX_op_shlv_vec:
39
case INDEX_op_shrv_vec:
40
case INDEX_op_sarv_vec:
41
+ case INDEX_op_rotlv_vec:
42
return vece <= MO_32 || have_isa_2_07;
43
case INDEX_op_ssadd_vec:
44
case INDEX_op_sssub_vec:
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
46
case INDEX_op_shli_vec:
47
case INDEX_op_shri_vec:
48
case INDEX_op_sari_vec:
49
+ case INDEX_op_rotli_vec:
50
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
51
case INDEX_op_neg_vec:
52
return vece >= MO_32 && have_isa_3_00;
53
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
54
return 0;
55
case INDEX_op_bitsel_vec:
56
return have_vsx;
57
+ case INDEX_op_rotrv_vec:
58
+ return -1;
59
default:
60
return 0;
61
}
62
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
63
case INDEX_op_ppc_pkum_vec:
64
insn = pkum_op[vece];
65
break;
66
- case INDEX_op_ppc_rotl_vec:
67
+ case INDEX_op_rotlv_vec:
68
insn = rotl_op[vece];
69
break;
70
case INDEX_op_ppc_msum_vec:
71
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
72
t3 = tcg_temp_new_vec(type);
73
t4 = tcg_temp_new_vec(type);
74
tcg_gen_dupi_vec(MO_8, t4, -16);
75
- vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
76
+ vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
77
tcgv_vec_arg(v2), tcgv_vec_arg(t4));
78
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
79
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
80
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
81
TCGArg a0, ...)
25
{
82
{
26
size_t old_size = tlb_n_entries(fast);
83
va_list va;
27
size_t rate;
84
- TCGv_vec v0, v1, v2;
28
size_t new_size = old_size;
85
+ TCGv_vec v0, v1, v2, t0;
29
- int64_t now = get_clock_realtime();
86
TCGArg a2;
30
int64_t window_len_ms = 100;
87
31
int64_t window_len_ns = window_len_ms * 1000 * 1000;
88
va_start(va, a0);
32
bool window_expired = now > desc->window_begin_ns + window_len_ns;
89
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
33
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
90
case INDEX_op_sari_vec:
34
memset(desc->vtable, -1, sizeof(desc->vtable));
91
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
35
}
92
break;
36
93
+ case INDEX_op_rotli_vec:
37
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
94
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
38
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
95
+ break;
39
+ int64_t now)
96
case INDEX_op_cmp_vec:
40
{
97
v2 = temp_tcgv_vec(arg_temp(a2));
41
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
98
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
42
CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
99
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
43
100
v2 = temp_tcgv_vec(arg_temp(a2));
44
- tlb_mmu_resize_locked(desc, fast);
101
expand_vec_mul(type, vece, v0, v1, v2);
45
+ tlb_mmu_resize_locked(desc, fast, now);
102
break;
46
tlb_mmu_flush_locked(desc, fast);
103
+ case INDEX_op_rotlv_vec:
47
}
104
+ v2 = temp_tcgv_vec(arg_temp(a2));
48
105
+ t0 = tcg_temp_new_vec(type);
49
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
106
+ tcg_gen_neg_vec(vece, t0, v2);
50
CPUArchState *env = cpu->env_ptr;
107
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
51
uint16_t asked = data.host_int;
108
+ tcg_temp_free_vec(t0);
52
uint16_t all_dirty, work, to_clean;
109
+ break;
53
+ int64_t now = get_clock_realtime();
110
default:
54
111
g_assert_not_reached();
55
assert_cpu_is_self(cpu);
56
57
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
58
59
for (work = to_clean; work != 0; work &= work - 1) {
60
int mmu_idx = ctz32(work);
61
- tlb_flush_one_mmuidx_locked(env, mmu_idx);
62
+ tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
63
}
112
}
64
113
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
65
qemu_spin_unlock(&env_tlb(env)->c.lock);
114
case INDEX_op_shlv_vec:
66
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
115
case INDEX_op_shrv_vec:
67
tlb_debug("forcing full flush midx %d ("
116
case INDEX_op_sarv_vec:
68
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
117
+ case INDEX_op_rotlv_vec:
69
midx, lp_addr, lp_mask);
118
+ case INDEX_op_rotrv_vec:
70
- tlb_flush_one_mmuidx_locked(env, midx);
119
case INDEX_op_ppc_mrgh_vec:
71
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
120
case INDEX_op_ppc_mrgl_vec:
72
} else {
121
case INDEX_op_ppc_muleu_vec:
73
if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
122
case INDEX_op_ppc_mulou_vec:
74
tlb_n_used_entries_dec(env, midx);
123
case INDEX_op_ppc_pkum_vec:
124
- case INDEX_op_ppc_rotl_vec:
125
case INDEX_op_dup2_vec:
126
return &v_v_v;
127
case INDEX_op_not_vec:
75
--
128
--
76
2.20.1
129
2.25.1
77
130
78
131
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
Acked-by: David Gibson <david@gibson.dropbear.id.au>
2
the code easier to read.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
accel/tcg/cputlb.c | 19 ++++++++++---------
4
target/ppc/helper.h | 4 ----
10
1 file changed, 10 insertions(+), 9 deletions(-)
5
target/ppc/int_helper.c | 17 -----------------
6
target/ppc/translate/vmx-impl.inc.c | 8 ++++----
7
3 files changed, 4 insertions(+), 25 deletions(-)
11
8
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
9
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
11
--- a/target/ppc/helper.h
15
+++ b/accel/tcg/cputlb.c
12
+++ b/target/ppc/helper.h
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
13
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
17
14
DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
18
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
15
DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
16
DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
17
-DEF_HELPER_3(vrlb, void, avr, avr, avr)
18
-DEF_HELPER_3(vrlh, void, avr, avr, avr)
19
-DEF_HELPER_3(vrlw, void, avr, avr, avr)
20
-DEF_HELPER_3(vrld, void, avr, avr, avr)
21
DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
22
DEF_HELPER_3(vextractub, void, avr, avr, i32)
23
DEF_HELPER_3(vextractuh, void, avr, avr, i32)
24
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/ppc/int_helper.c
27
+++ b/target/ppc/int_helper.c
28
@@ -XXX,XX +XXX,XX @@ VRFI(p, float_round_up)
29
VRFI(z, float_round_to_zero)
30
#undef VRFI
31
32
-#define VROTATE(suffix, element, mask) \
33
- void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
34
- { \
35
- int i; \
36
- \
37
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
38
- unsigned int shift = b->element[i] & mask; \
39
- r->element[i] = (a->element[i] << shift) | \
40
- (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
41
- } \
42
- }
43
-VROTATE(b, u8, 0x7)
44
-VROTATE(h, u16, 0xF)
45
-VROTATE(w, u32, 0x1F)
46
-VROTATE(d, u64, 0x3F)
47
-#undef VROTATE
48
-
49
void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
19
{
50
{
20
- tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
51
int i;
21
- env_tlb(env)->d[mmu_idx].n_used_entries = 0;
52
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
22
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
53
index XXXXXXX..XXXXXXX 100644
23
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
54
--- a/target/ppc/translate/vmx-impl.inc.c
24
- env_tlb(env)->d[mmu_idx].vindex = 0;
55
+++ b/target/ppc/translate/vmx-impl.inc.c
25
- memset(env_tlb(env)->f[mmu_idx].table, -1,
56
@@ -XXX,XX +XXX,XX @@ GEN_VXFORM3(vsubeuqm, 31, 0);
26
- sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
57
GEN_VXFORM3(vsubecuq, 31, 0);
27
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
58
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
28
- sizeof(env_tlb(env)->d[0].vtable));
59
vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
29
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
60
-GEN_VXFORM(vrlb, 2, 0);
30
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
61
-GEN_VXFORM(vrlh, 2, 1);
31
+
62
-GEN_VXFORM(vrlw, 2, 2);
32
+ tlb_mmu_resize_locked(desc, fast);
63
+GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0);
33
+ desc->n_used_entries = 0;
64
+GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1);
34
+ desc->large_page_addr = -1;
65
+GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2);
35
+ desc->large_page_mask = -1;
66
GEN_VXFORM(vrlwmi, 2, 2);
36
+ desc->vindex = 0;
67
GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
37
+ memset(fast->table, -1, sizeof_tlb(fast));
68
vrlwmi, PPC_NONE, PPC2_ISA300)
38
+ memset(desc->vtable, -1, sizeof(desc->vtable));
69
-GEN_VXFORM(vrld, 2, 3);
39
}
70
+GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3);
40
71
GEN_VXFORM(vrldmi, 2, 3);
41
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
72
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
73
vrldmi, PPC_NONE, PPC2_ISA300)
42
--
74
--
43
2.20.1
75
2.25.1
44
76
45
77
diff view generated by jsdifflib
1
There is only one caller for tlb_table_flush_by_mmuidx. Place
1
Merge VERLL and VERLLV into op_vesv and op_ves, alongside
2
the result at the earlier line number, due to an expected user
2
all of the other vector shift operations.
3
in the near future.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
accel/tcg/cputlb.c | 19 +++++++------------
7
target/s390x/helper.h | 4 --
10
1 file changed, 7 insertions(+), 12 deletions(-)
8
target/s390x/translate_vx.inc.c | 66 +++++----------------------------
9
target/s390x/vec_int_helper.c | 31 ----------------
10
target/s390x/insn-data.def | 4 +-
11
4 files changed, 11 insertions(+), 94 deletions(-)
11
12
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
15
--- a/target/s390x/helper.h
15
+++ b/accel/tcg/cputlb.c
16
+++ b/target/s390x/helper.h
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
17
}
18
DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
19
DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
20
DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
21
-DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
22
-DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
23
-DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
24
-DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
25
DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
26
DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
27
DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
28
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/s390x/translate_vx.inc.c
31
+++ b/target/s390x/translate_vx.inc.c
32
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
33
return DISAS_NEXT;
18
}
34
}
19
35
20
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
36
-static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
21
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
22
{
23
tlb_mmu_resize_locked(env, mmu_idx);
24
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
25
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
26
+ env_tlb(env)->d[mmu_idx].large_page_addr = -1;
27
+ env_tlb(env)->d[mmu_idx].large_page_mask = -1;
28
+ env_tlb(env)->d[mmu_idx].vindex = 0;
29
+ memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
30
+ memset(env_tlb(env)->d[mmu_idx].vtable, -1,
31
+ sizeof(env_tlb(env)->d[0].vtable));
32
}
33
34
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
35
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
36
*pelide = elide;
37
}
38
39
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
40
-{
37
-{
41
- tlb_table_flush_by_mmuidx(env, mmu_idx);
38
- TCGv_i32 t0 = tcg_temp_new_i32();
42
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
39
-
43
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
40
- tcg_gen_andi_i32(t0, b, 31);
44
- env_tlb(env)->d[mmu_idx].vindex = 0;
41
- tcg_gen_rotl_i32(d, a, t0);
45
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
42
- tcg_temp_free_i32(t0);
46
- sizeof(env_tlb(env)->d[0].vtable));
47
-}
43
-}
48
-
44
-
49
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
45
-static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
46
-{
47
- TCGv_i64 t0 = tcg_temp_new_i64();
48
-
49
- tcg_gen_andi_i64(t0, b, 63);
50
- tcg_gen_rotl_i64(d, a, t0);
51
- tcg_temp_free_i64(t0);
52
-}
53
-
54
-static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
55
-{
56
- const uint8_t es = get_field(s, m4);
57
- static const GVecGen3 g[4] = {
58
- { .fno = gen_helper_gvec_verllv8, },
59
- { .fno = gen_helper_gvec_verllv16, },
60
- { .fni4 = gen_rll_i32, },
61
- { .fni8 = gen_rll_i64, },
62
- };
63
-
64
- if (es > ES_64) {
65
- gen_program_exception(s, PGM_SPECIFICATION);
66
- return DISAS_NORETURN;
67
- }
68
-
69
- gen_gvec_3(get_field(s, v1), get_field(s, v2),
70
- get_field(s, v3), &g[es]);
71
- return DISAS_NEXT;
72
-}
73
-
74
-static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
75
-{
76
- const uint8_t es = get_field(s, m4);
77
- static const GVecGen2s g[4] = {
78
- { .fno = gen_helper_gvec_verll8, },
79
- { .fno = gen_helper_gvec_verll16, },
80
- { .fni4 = gen_rll_i32, },
81
- { .fni8 = gen_rll_i64, },
82
- };
83
-
84
- if (es > ES_64) {
85
- gen_program_exception(s, PGM_SPECIFICATION);
86
- return DISAS_NORETURN;
87
- }
88
- gen_gvec_2s(get_field(s, v1), get_field(s, v3), o->addr1,
89
- &g[es]);
90
- return DISAS_NEXT;
91
-}
92
-
93
static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
50
{
94
{
51
CPUArchState *env = cpu->env_ptr;
95
TCGv_i32 t = tcg_temp_new_i32();
96
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
97
case 0x70:
98
gen_gvec_fn_3(shlv, es, v1, v2, v3);
99
break;
100
+ case 0x73:
101
+ gen_gvec_fn_3(rotlv, es, v1, v2, v3);
102
+ break;
103
case 0x7a:
104
gen_gvec_fn_3(sarv, es, v1, v2, v3);
105
break;
106
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
107
case 0x30:
108
gen_gvec_fn_2i(shli, es, v1, v3, d2);
109
break;
110
+ case 0x33:
111
+ gen_gvec_fn_2i(rotli, es, v1, v3, d2);
112
+ break;
113
case 0x3a:
114
gen_gvec_fn_2i(sari, es, v1, v3, d2);
115
break;
116
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
117
case 0x30:
118
gen_gvec_fn_2s(shls, es, v1, v3, shift);
119
break;
120
+ case 0x33:
121
+ gen_gvec_fn_2s(rotls, es, v1, v3, shift);
122
+ break;
123
case 0x3a:
124
gen_gvec_fn_2s(sars, es, v1, v3, shift);
125
break;
126
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/s390x/vec_int_helper.c
129
+++ b/target/s390x/vec_int_helper.c
130
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
131
DEF_VPOPCT(8)
132
DEF_VPOPCT(16)
133
134
-#define DEF_VERLLV(BITS) \
135
-void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
136
- uint32_t desc) \
137
-{ \
138
- int i; \
139
- \
140
- for (i = 0; i < (128 / BITS); i++) { \
141
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
142
- const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
143
- \
144
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
145
- } \
146
-}
147
-DEF_VERLLV(8)
148
-DEF_VERLLV(16)
149
-
150
-#define DEF_VERLL(BITS) \
151
-void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
152
- uint32_t desc) \
153
-{ \
154
- int i; \
155
- \
156
- for (i = 0; i < (128 / BITS); i++) { \
157
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
158
- \
159
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
160
- } \
161
-}
162
-DEF_VERLL(8)
163
-DEF_VERLL(16)
164
-
165
#define DEF_VERIM(BITS) \
166
void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
167
uint32_t desc) \
168
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
169
index XXXXXXX..XXXXXXX 100644
170
--- a/target/s390x/insn-data.def
171
+++ b/target/s390x/insn-data.def
172
@@ -XXX,XX +XXX,XX @@
173
/* VECTOR POPULATION COUNT */
174
F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC)
175
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
176
- F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC)
177
- F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC)
178
+ F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
179
+ F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
180
/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
181
F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC)
182
/* VECTOR ELEMENT SHIFT LEFT */
52
--
183
--
53
2.20.1
184
2.25.1
54
185
55
186
diff view generated by jsdifflib
1
By choosing "tcg:kvm" when kvm is not enabled, we generate
1
If the output of the move is dead, then the last use is in
2
an incorrect warning: "invalid accelerator kvm".
2
the store. If we propagate the input to the store, then we
3
can remove the move opcode entirely.
3
4
4
At the same time, use g_str_has_suffix rather than open-coding
5
the same operation.
6
7
Presumably the inverse is also true with --disable-tcg.
8
9
Fixes: 28a0961757fc
10
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
6
---
15
vl.c | 21 +++++++++++++--------
7
tcg/tcg.c | 78 +++++++++++++++++++++++++++++++++++++++----------------
16
1 file changed, 13 insertions(+), 8 deletions(-)
8
1 file changed, 56 insertions(+), 22 deletions(-)
17
9
18
diff --git a/vl.c b/vl.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
19
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
20
--- a/vl.c
12
--- a/tcg/tcg.c
21
+++ b/vl.c
13
+++ b/tcg/tcg.c
22
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
14
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
23
15
}
24
if (accel == NULL) {
16
25
/* Select the default accelerator */
17
/* Outputs become available. */
26
- if (!accel_find("tcg") && !accel_find("kvm")) {
18
- for (i = 0; i < nb_oargs; i++) {
27
- error_report("No accelerator selected and"
19
- arg_ts = arg_temp(op->args[i]);
28
- " no default accelerator available");
20
+ if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
29
- exit(1);
21
+ arg_ts = arg_temp(op->args[0]);
30
- } else {
22
dir_ts = arg_ts->state_ptr;
31
- int pnlen = strlen(progname);
23
- if (!dir_ts) {
32
- if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
24
- continue;
33
+ bool have_tcg = accel_find("tcg");
25
+ if (dir_ts) {
34
+ bool have_kvm = accel_find("kvm");
26
+ op->args[0] = temp_arg(dir_ts);
27
+ changes = true;
35
+
28
+
36
+ if (have_tcg && have_kvm) {
29
+ /* The output is now live and modified. */
37
+ if (g_str_has_suffix(progname, "kvm")) {
30
+ arg_ts->state = 0;
38
/* If the program name ends with "kvm", we prefer KVM */
31
+
39
accel = "kvm:tcg";
32
+ if (NEED_SYNC_ARG(0)) {
40
} else {
33
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
41
accel = "tcg:kvm";
34
+ ? INDEX_op_st_i32
42
}
35
+ : INDEX_op_st_i64);
43
+ } else if (have_kvm) {
36
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
44
+ accel = "kvm";
37
+ TCGTemp *out_ts = dir_ts;
45
+ } else if (have_tcg) {
38
+
46
+ accel = "tcg";
39
+ if (IS_DEAD_ARG(0)) {
47
+ } else {
40
+ out_ts = arg_temp(op->args[1]);
48
+ error_report("No accelerator selected and"
41
+ arg_ts->state = TS_DEAD;
49
+ " no default accelerator available");
42
+ tcg_op_remove(s, op);
50
+ exit(1);
43
+ } else {
44
+ arg_ts->state = TS_MEM;
45
+ }
46
+
47
+ sop->args[0] = temp_arg(out_ts);
48
+ sop->args[1] = temp_arg(arg_ts->mem_base);
49
+ sop->args[2] = arg_ts->mem_offset;
50
+ } else {
51
+ tcg_debug_assert(!IS_DEAD_ARG(0));
52
+ }
53
}
54
- op->args[i] = temp_arg(dir_ts);
55
- changes = true;
56
+ } else {
57
+ for (i = 0; i < nb_oargs; i++) {
58
+ arg_ts = arg_temp(op->args[i]);
59
+ dir_ts = arg_ts->state_ptr;
60
+ if (!dir_ts) {
61
+ continue;
62
+ }
63
+ op->args[i] = temp_arg(dir_ts);
64
+ changes = true;
65
66
- /* The output is now live and modified. */
67
- arg_ts->state = 0;
68
+ /* The output is now live and modified. */
69
+ arg_ts->state = 0;
70
71
- /* Sync outputs upon their last write. */
72
- if (NEED_SYNC_ARG(i)) {
73
- TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
74
- ? INDEX_op_st_i32
75
- : INDEX_op_st_i64);
76
- TCGOp *sop = tcg_op_insert_after(s, op, sopc);
77
+ /* Sync outputs upon their last write. */
78
+ if (NEED_SYNC_ARG(i)) {
79
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
80
+ ? INDEX_op_st_i32
81
+ : INDEX_op_st_i64);
82
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
83
84
- sop->args[0] = temp_arg(dir_ts);
85
- sop->args[1] = temp_arg(arg_ts->mem_base);
86
- sop->args[2] = arg_ts->mem_offset;
87
+ sop->args[0] = temp_arg(dir_ts);
88
+ sop->args[1] = temp_arg(arg_ts->mem_base);
89
+ sop->args[2] = arg_ts->mem_offset;
90
91
- arg_ts->state = TS_MEM;
92
- }
93
- /* Drop outputs that are dead. */
94
- if (IS_DEAD_ARG(i)) {
95
- arg_ts->state = TS_DEAD;
96
+ arg_ts->state = TS_MEM;
97
+ }
98
+ /* Drop outputs that are dead. */
99
+ if (IS_DEAD_ARG(i)) {
100
+ arg_ts->state = TS_DEAD;
101
+ }
51
}
102
}
52
}
103
}
53
-
104
}
54
accel_list = g_strsplit(accel, ":", 0);
55
56
for (tmp = accel_list; *tmp; tmp++) {
57
--
105
--
58
2.20.1
106
2.25.1
59
107
60
108
diff view generated by jsdifflib
1
We do not need the entire CPUArchState to compute these values.
1
From: Nick Hudson <skrll@netbsd.org>
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Fix building on NetBSD/arm by extracting the FSR value from the
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
correct siginfo_t field.
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Nick Hudson <skrll@netbsd.org>
8
Message-Id: <20200516154147.24842-1-skrll@netbsd.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
accel/tcg/cputlb.c | 15 ++++++++-------
11
accel/tcg/user-exec.c | 16 +++++++++++++---
9
1 file changed, 8 insertions(+), 7 deletions(-)
12
1 file changed, 13 insertions(+), 3 deletions(-)
10
13
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
14
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
16
--- a/accel/tcg/user-exec.c
14
+++ b/accel/tcg/cputlb.c
17
+++ b/accel/tcg/user-exec.c
15
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
18
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
16
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
19
17
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
20
#if defined(__NetBSD__)
18
21
#include <ucontext.h>
19
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
22
+#include <sys/siginfo.h>
20
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
23
#endif
21
{
24
22
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
25
int cpu_signal_handler(int host_signum, void *pinfo,
23
+ return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
26
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
27
siginfo_t *info = pinfo;
28
#if defined(__NetBSD__)
29
ucontext_t *uc = puc;
30
+ siginfo_t *si = pinfo;
31
#else
32
ucontext_t *uc = puc;
33
#endif
34
unsigned long pc;
35
+ uint32_t fsr;
36
int is_write;
37
38
#if defined(__NetBSD__)
39
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
40
pc = uc->uc_mcontext.arm_pc;
41
#endif
42
43
- /* error_code is the FSR value, in which bit 11 is WnR (assuming a v6 or
44
- * later processor; on v5 we will always report this as a read).
45
+#ifdef __NetBSD__
46
+ fsr = si->si_trap;
47
+#else
48
+ fsr = uc->uc_mcontext.error_code;
49
+#endif
50
+ /*
51
+ * In the FSR, bit 11 is WnR, assuming a v6 or
52
+ * later processor. On v5 we will always report
53
+ * this as a read, which will fail later.
54
*/
55
- is_write = extract32(uc->uc_mcontext.error_code, 11, 1);
56
+ is_write = extract32(fsr, 11, 1);
57
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
24
}
58
}
25
59
26
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
27
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
28
{
29
- return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
30
+ return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
31
}
32
33
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
34
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
35
static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
36
{
37
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
38
- size_t old_size = tlb_n_entries(env, mmu_idx);
39
+ size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
40
size_t rate;
41
size_t new_size = old_size;
42
int64_t now = get_clock_realtime();
43
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
44
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
45
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
46
env_tlb(env)->d[mmu_idx].vindex = 0;
47
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
48
+ memset(env_tlb(env)->f[mmu_idx].table, -1,
49
+ sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
50
memset(env_tlb(env)->d[mmu_idx].vtable, -1,
51
sizeof(env_tlb(env)->d[0].vtable));
52
}
53
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
54
qemu_spin_lock(&env_tlb(env)->c.lock);
55
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
56
unsigned int i;
57
- unsigned int n = tlb_n_entries(env, mmu_idx);
58
+ unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
59
60
for (i = 0; i < n; i++) {
61
tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
62
--
60
--
63
2.20.1
61
2.25.1
64
62
65
63
diff view generated by jsdifflib
1
There are no users of this function outside cputlb.c,
1
From: Nick Hudson <skrll@netbsd.org>
2
and its interface will change in the next patch.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Fix qemu build on NetBSD/evbarm-aarch64 by providing a NetBSD specific
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
cpu_signal_handler.
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Nick Hudson <skrll@netbsd.org>
8
Message-Id: <20200517101529.5367-1-skrll@netbsd.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
include/exec/cpu_ldst.h | 5 -----
11
accel/tcg/user-exec.c | 27 +++++++++++++++++++++++++++
10
accel/tcg/cputlb.c | 5 +++++
12
1 file changed, 27 insertions(+)
11
2 files changed, 5 insertions(+), 5 deletions(-)
12
13
13
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
14
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu_ldst.h
16
--- a/accel/tcg/user-exec.c
16
+++ b/include/exec/cpu_ldst.h
17
+++ b/accel/tcg/user-exec.c
17
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
18
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
18
return (addr >> TARGET_PAGE_BITS) & size_mask;
19
19
}
20
#elif defined(__aarch64__)
20
21
21
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
22
+#if defined(__NetBSD__)
22
-{
23
+
23
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
24
+#include <ucontext.h>
24
-}
25
+#include <sys/siginfo.h>
25
-
26
+
26
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
27
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
27
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
28
target_ulong addr)
29
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/accel/tcg/cputlb.c
32
+++ b/accel/tcg/cputlb.c
33
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
34
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
35
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
36
37
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
38
+{
28
+{
39
+ return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
29
+ ucontext_t *uc = puc;
30
+ siginfo_t *si = pinfo;
31
+ unsigned long pc;
32
+ int is_write;
33
+ uint32_t esr;
34
+
35
+ pc = uc->uc_mcontext.__gregs[_REG_PC];
36
+ esr = si->si_trap;
37
+
38
+ /*
39
+ * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
40
+ * is 0b10010x: then bit 6 is the WnR bit
41
+ */
42
+ is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
43
+ return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
40
+}
44
+}
41
+
45
+
42
static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
46
+#else
43
{
47
+
44
return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
48
#ifndef ESR_MAGIC
49
/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
50
#define ESR_MAGIC 0x45535201
51
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
52
}
53
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
54
}
55
+#endif
56
57
#elif defined(__s390__)
58
45
--
59
--
46
2.20.1
60
2.25.1
47
61
48
62
diff view generated by jsdifflib