1
The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb:
1
The following changes since commit 390e8fc6b0e7b521c9eceb8dfe0958e141009ab9:
2
2
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100)
3
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging (2023-06-26 16:05:45 +0200)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20191013
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230626
8
8
9
for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328:
9
for you to fetch changes up to a0eaae08c7c6a59c185cf646b02f4167b2ac6ec0:
10
10
11
cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400)
11
accel/tcg: Renumber TLB_DISCARD_WRITE (2023-06-26 17:33:00 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Host vector support for tcg/ppc.
14
accel/tcg: Replace target_ulong in some APIs
15
Fix thread=single cpu kicking.
15
accel/tcg: Remove CONFIG_PROFILER
16
accel/tcg: Store some tlb flags in CPUTLBEntryFull
17
tcg: Issue memory barriers as required for the guest memory model
18
tcg: Fix temporary variable in tcg_gen_gvec_andcs
16
19
17
----------------------------------------------------------------
20
----------------------------------------------------------------
18
Alex Bennée (1):
21
Alex Bennée (1):
19
cpus: kick all vCPUs when running thread=single
22
softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining
20
23
21
Richard Henderson (22):
24
Anton Johansson (11):
22
tcg/ppc: Introduce Altivec registers
25
accel: Replace target_ulong in tlb_*()
23
tcg/ppc: Introduce macro VX4()
26
accel/tcg/translate-all.c: Widen pc and cs_base
24
tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC()
27
target: Widen pc/cs_base in cpu_get_tb_cpu_state
25
tcg/ppc: Create TCGPowerISA and have_isa
28
accel/tcg/cputlb.c: Widen CPUTLBEntry access functions
26
tcg/ppc: Replace HAVE_ISA_2_06
29
accel/tcg/cputlb.c: Widen addr in MMULookupPageData
27
tcg/ppc: Replace HAVE_ISEL macro with a variable
30
accel/tcg/cpu-exec.c: Widen pc to vaddr
28
tcg/ppc: Enable tcg backend vector compilation
31
accel/tcg: Widen pc to vaddr in CPUJumpCache
29
tcg/ppc: Add support for load/store/logic/comparison
32
accel: Replace target_ulong with vaddr in probe_*()
30
tcg/ppc: Add support for vector maximum/minimum
33
accel/tcg: Replace target_ulong with vaddr in *_mmu_lookup()
31
tcg/ppc: Add support for vector add/subtract
34
accel/tcg: Replace target_ulong with vaddr in translator_*()
32
tcg/ppc: Add support for vector saturated add/subtract
35
cpu: Replace target_ulong with hwaddr in tb_invalidate_phys_addr()
33
tcg/ppc: Support vector shift by immediate
34
tcg/ppc: Support vector multiply
35
tcg/ppc: Support vector dup2
36
tcg/ppc: Enable Altivec detection
37
tcg/ppc: Update vector support for VSX
38
tcg/ppc: Update vector support for v2.07 Altivec
39
tcg/ppc: Update vector support for v2.07 VSX
40
tcg/ppc: Update vector support for v2.07 FP
41
tcg/ppc: Update vector support for v3.00 Altivec
42
tcg/ppc: Update vector support for v3.00 load/store
43
tcg/ppc: Update vector support for v3.00 dup/dupi
44
36
45
tcg/ppc/tcg-target.h | 51 ++-
37
Fei Wu (1):
46
tcg/ppc/tcg-target.opc.h | 13 +
38
accel/tcg: remove CONFIG_PROFILER
47
cpus.c | 24 +-
48
tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++----
49
4 files changed, 1119 insertions(+), 87 deletions(-)
50
create mode 100644 tcg/ppc/tcg-target.opc.h
51
39
40
Max Chou (1):
41
tcg: Fix temporary variable in tcg_gen_gvec_andcs
42
43
Richard Henderson (8):
44
tests/plugin: Remove duplicate insn log from libinsn.so
45
target/microblaze: Define TCG_GUEST_DEFAULT_MO
46
tcg: Do not elide memory barriers for !CF_PARALLEL in system mode
47
tcg: Add host memory barriers to cpu_ldst.h interfaces
48
accel/tcg: Remove check_tcg_memory_orders_compatible
49
accel/tcg: Store some tlb flags in CPUTLBEntryFull
50
accel/tcg: Move TLB_WATCHPOINT to TLB_SLOW_FLAGS_MASK
51
accel/tcg: Renumber TLB_DISCARD_WRITE
52
53
meson.build | 2 -
54
qapi/machine.json | 18 --
55
accel/tcg/internal.h | 40 +++-
56
accel/tcg/tb-hash.h | 12 +-
57
accel/tcg/tb-jmp-cache.h | 2 +-
58
include/exec/cpu-all.h | 27 ++-
59
include/exec/cpu-defs.h | 10 +-
60
include/exec/cpu_ldst.h | 10 +-
61
include/exec/exec-all.h | 95 +++++----
62
include/exec/translator.h | 6 +-
63
include/hw/core/cpu.h | 1 +
64
include/qemu/plugin-memory.h | 2 +-
65
include/qemu/timer.h | 9 -
66
include/tcg/tcg.h | 26 ---
67
target/alpha/cpu.h | 4 +-
68
target/arm/cpu.h | 4 +-
69
target/avr/cpu.h | 4 +-
70
target/cris/cpu.h | 4 +-
71
target/hexagon/cpu.h | 4 +-
72
target/hppa/cpu.h | 5 +-
73
target/i386/cpu.h | 4 +-
74
target/loongarch/cpu.h | 6 +-
75
target/m68k/cpu.h | 4 +-
76
target/microblaze/cpu.h | 7 +-
77
target/mips/cpu.h | 4 +-
78
target/nios2/cpu.h | 4 +-
79
target/openrisc/cpu.h | 5 +-
80
target/ppc/cpu.h | 8 +-
81
target/riscv/cpu.h | 4 +-
82
target/rx/cpu.h | 4 +-
83
target/s390x/cpu.h | 4 +-
84
target/sh4/cpu.h | 4 +-
85
target/sparc/cpu.h | 4 +-
86
target/tricore/cpu.h | 4 +-
87
target/xtensa/cpu.h | 4 +-
88
accel/stubs/tcg-stub.c | 6 +-
89
accel/tcg/cpu-exec.c | 43 ++--
90
accel/tcg/cputlb.c | 351 +++++++++++++++++--------------
91
accel/tcg/monitor.c | 31 ---
92
accel/tcg/tb-maint.c | 2 +-
93
accel/tcg/tcg-accel-ops.c | 10 -
94
accel/tcg/tcg-all.c | 39 +---
95
accel/tcg/translate-all.c | 46 +---
96
accel/tcg/translator.c | 10 +-
97
accel/tcg/user-exec.c | 24 ++-
98
cpu.c | 2 +-
99
fpu/softfloat.c | 22 +-
100
softmmu/runstate.c | 9 -
101
target/arm/helper.c | 4 +-
102
target/ppc/helper_regs.c | 4 +-
103
target/riscv/cpu_helper.c | 4 +-
104
tcg/tcg-op-gvec.c | 2 +-
105
tcg/tcg-op-ldst.c | 2 +-
106
tcg/tcg-op.c | 14 +-
107
tcg/tcg.c | 214 -------------------
108
tests/plugin/insn.c | 9 +-
109
tests/qtest/qmp-cmd-test.c | 3 -
110
hmp-commands-info.hx | 15 --
111
meson_options.txt | 2 -
112
scripts/meson-buildoptions.sh | 3 -
113
tests/tcg/i386/Makefile.softmmu-target | 9 -
114
tests/tcg/i386/Makefile.target | 6 -
115
tests/tcg/x86_64/Makefile.softmmu-target | 9 -
116
63 files changed, 469 insertions(+), 781 deletions(-)
117
diff view generated by jsdifflib
1
These new instructions are conditional on MSR.VEC for TX=1,
1
From: Anton Johansson <anjo@rev.ng>
2
so we can consider these Altivec instructions.
3
2
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
Replaces target_ulong with vaddr for guest virtual addresses in tlb_*()
4
functions and auxilliary structs.
5
6
Signed-off-by: Anton Johansson <anjo@rev.ng>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230621135633.1649-2-anjo@rev.ng>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++--
11
include/exec/cpu-defs.h | 4 +-
8
1 file changed, 26 insertions(+), 2 deletions(-)
12
include/exec/exec-all.h | 79 ++++++++--------
13
include/qemu/plugin-memory.h | 2 +-
14
accel/stubs/tcg-stub.c | 2 +-
15
accel/tcg/cputlb.c | 177 +++++++++++++++++------------------
16
accel/tcg/tb-maint.c | 2 +-
17
6 files changed, 131 insertions(+), 135 deletions(-)
9
18
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
19
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
11
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
21
--- a/include/exec/cpu-defs.h
13
+++ b/tcg/ppc/tcg-target.inc.c
22
+++ b/include/exec/cpu-defs.h
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
23
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
15
24
* we must flush the entire tlb. The region is matched if
16
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
25
* (addr & large_page_mask) == large_page_addr.
17
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
26
*/
18
+#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
27
- target_ulong large_page_addr;
19
28
- target_ulong large_page_mask;
20
#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
29
+ vaddr large_page_addr;
21
#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
30
+ vaddr large_page_mask;
22
#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
31
/* host time (in ns) at the beginning of the time window */
23
#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
32
int64_t window_begin_ns;
24
+#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
33
/* maximum number of entries observed in the window */
25
+#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
34
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
26
35
index XXXXXXX..XXXXXXX 100644
27
#define RT(r) ((r)<<21)
36
--- a/include/exec/exec-all.h
28
#define RS(r) ((r)<<21)
37
+++ b/include/exec/exec-all.h
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
38
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu);
30
return;
39
* Flush one page from the TLB of the specified CPU, for all
40
* MMU indexes.
41
*/
42
-void tlb_flush_page(CPUState *cpu, target_ulong addr);
43
+void tlb_flush_page(CPUState *cpu, vaddr addr);
44
/**
45
* tlb_flush_page_all_cpus:
46
* @cpu: src CPU of the flush
47
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr);
48
* Flush one page from the TLB of the specified CPU, for all
49
* MMU indexes.
50
*/
51
-void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
52
+void tlb_flush_page_all_cpus(CPUState *src, vaddr addr);
53
/**
54
* tlb_flush_page_all_cpus_synced:
55
* @cpu: src CPU of the flush
56
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
57
* the source vCPUs safe work is complete. This will depend on when
58
* the guests translation ends the TB.
59
*/
60
-void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr);
61
+void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
62
/**
63
* tlb_flush:
64
* @cpu: CPU whose TLB should be flushed
65
@@ -XXX,XX +XXX,XX @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu);
66
* Flush one page from the TLB of the specified CPU, for the specified
67
* MMU indexes.
68
*/
69
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
70
+void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr,
71
uint16_t idxmap);
72
/**
73
* tlb_flush_page_by_mmuidx_all_cpus:
74
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
75
* Flush one page from the TLB of all CPUs, for the specified
76
* MMU indexes.
77
*/
78
-void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
79
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
80
uint16_t idxmap);
81
/**
82
* tlb_flush_page_by_mmuidx_all_cpus_synced:
83
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
84
* complete once the source vCPUs safe work is complete. This will
85
* depend on when the guests translation ends the TB.
86
*/
87
-void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
88
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
89
uint16_t idxmap);
90
/**
91
* tlb_flush_by_mmuidx:
92
@@ -XXX,XX +XXX,XX @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
93
*
94
* Similar to tlb_flush_page_mask, but with a bitmap of indexes.
95
*/
96
-void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
97
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
98
uint16_t idxmap, unsigned bits);
99
100
/* Similarly, with broadcast and syncing. */
101
-void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
102
+void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
103
uint16_t idxmap, unsigned bits);
104
void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
105
- (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits);
106
+ (CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits);
107
108
/**
109
* tlb_flush_range_by_mmuidx
110
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
111
* For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len),
112
* comparing only the low @bits worth of each virtual page.
113
*/
114
-void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
115
- target_ulong len, uint16_t idxmap,
116
+void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
117
+ vaddr len, uint16_t idxmap,
118
unsigned bits);
119
120
/* Similarly, with broadcast and syncing. */
121
-void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
122
- target_ulong len, uint16_t idxmap,
123
+void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
124
+ vaddr len, uint16_t idxmap,
125
unsigned bits);
126
void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
127
- target_ulong addr,
128
- target_ulong len,
129
+ vaddr addr,
130
+ vaddr len,
131
uint16_t idxmap,
132
unsigned bits);
133
134
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
135
* tlb_set_page_full:
136
* @cpu: CPU context
137
* @mmu_idx: mmu index of the tlb to modify
138
- * @vaddr: virtual address of the entry to add
139
+ * @addr: virtual address of the entry to add
140
* @full: the details of the tlb entry
141
*
142
* Add an entry to @cpu tlb index @mmu_idx. All of the fields of
143
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
144
* single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
145
* used by tlb_flush_page.
146
*/
147
-void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
148
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr,
149
CPUTLBEntryFull *full);
150
151
/**
152
* tlb_set_page_with_attrs:
153
* @cpu: CPU to add this TLB entry for
154
- * @vaddr: virtual address of page to add entry for
155
+ * @addr: virtual address of page to add entry for
156
* @paddr: physical address of the page
157
* @attrs: memory transaction attributes
158
* @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits)
159
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
160
* @size: size of the page in bytes
161
*
162
* Add an entry to this CPU's TLB (a mapping from virtual address
163
- * @vaddr to physical address @paddr) with the specified memory
164
+ * @addr to physical address @paddr) with the specified memory
165
* transaction attributes. This is generally called by the target CPU
166
* specific code after it has been called through the tlb_fill()
167
* entry point and performed a successful page table walk to find
168
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
169
* single TARGET_PAGE_SIZE region is mapped; the supplied @size is only
170
* used by tlb_flush_page.
171
*/
172
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
173
+void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
174
hwaddr paddr, MemTxAttrs attrs,
175
- int prot, int mmu_idx, target_ulong size);
176
+ int prot, int mmu_idx, vaddr size);
177
/* tlb_set_page:
178
*
179
* This function is equivalent to calling tlb_set_page_with_attrs()
180
* with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided
181
* as a convenience for CPUs which don't use memory transaction attributes.
182
*/
183
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
184
+void tlb_set_page(CPUState *cpu, vaddr addr,
185
hwaddr paddr, int prot,
186
- int mmu_idx, target_ulong size);
187
+ int mmu_idx, vaddr size);
188
#else
189
static inline void tlb_init(CPUState *cpu)
190
{
191
@@ -XXX,XX +XXX,XX @@ static inline void tlb_init(CPUState *cpu)
192
static inline void tlb_destroy(CPUState *cpu)
193
{
194
}
195
-static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
196
+static inline void tlb_flush_page(CPUState *cpu, vaddr addr)
197
{
198
}
199
-static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
200
+static inline void tlb_flush_page_all_cpus(CPUState *src, vaddr addr)
201
{
202
}
203
-static inline void tlb_flush_page_all_cpus_synced(CPUState *src,
204
- target_ulong addr)
205
+static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
206
{
207
}
208
static inline void tlb_flush(CPUState *cpu)
209
@@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
210
{
211
}
212
static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
213
- target_ulong addr, uint16_t idxmap)
214
+ vaddr addr, uint16_t idxmap)
215
{
216
}
217
218
@@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
219
{
220
}
221
static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
222
- target_ulong addr,
223
+ vaddr addr,
224
uint16_t idxmap)
225
{
226
}
227
static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
228
- target_ulong addr,
229
+ vaddr addr,
230
uint16_t idxmap)
231
{
232
}
233
@@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
234
{
235
}
236
static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
237
- target_ulong addr,
238
+ vaddr addr,
239
uint16_t idxmap,
240
unsigned bits)
241
{
242
}
243
static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu,
244
- target_ulong addr,
245
+ vaddr addr,
246
uint16_t idxmap,
247
unsigned bits)
248
{
249
}
250
static inline void
251
-tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
252
+tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
253
uint16_t idxmap, unsigned bits)
254
{
255
}
256
-static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
257
- target_ulong len, uint16_t idxmap,
258
+static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
259
+ vaddr len, uint16_t idxmap,
260
unsigned bits)
261
{
262
}
263
static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu,
264
- target_ulong addr,
265
- target_ulong len,
266
+ vaddr addr,
267
+ vaddr len,
268
uint16_t idxmap,
269
unsigned bits)
270
{
271
}
272
static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
273
- target_ulong addr,
274
- target_long len,
275
+ vaddr addr,
276
+ vaddr len,
277
uint16_t idxmap,
278
unsigned bits)
279
{
280
@@ -XXX,XX +XXX,XX @@ static inline void mmap_lock(void) {}
281
static inline void mmap_unlock(void) {}
282
283
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
284
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
285
+void tlb_set_dirty(CPUState *cpu, vaddr addr);
286
287
MemoryRegionSection *
288
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
289
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
290
index XXXXXXX..XXXXXXX 100644
291
--- a/include/qemu/plugin-memory.h
292
+++ b/include/qemu/plugin-memory.h
293
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_hwaddr {
294
* It would only fail if not called from an instrumented memory access
295
* which would be an abuse of the API.
296
*/
297
-bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
298
+bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
299
bool is_store, struct qemu_plugin_hwaddr *data);
300
301
#endif /* PLUGIN_MEMORY_H */
302
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
303
index XXXXXXX..XXXXXXX 100644
304
--- a/accel/stubs/tcg-stub.c
305
+++ b/accel/stubs/tcg-stub.c
306
@@ -XXX,XX +XXX,XX @@ void tb_flush(CPUState *cpu)
307
{
308
}
309
310
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
311
+void tlb_set_dirty(CPUState *cpu, vaddr vaddr)
312
{
313
}
314
315
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
316
index XXXXXXX..XXXXXXX 100644
317
--- a/accel/tcg/cputlb.c
318
+++ b/accel/tcg/cputlb.c
319
@@ -XXX,XX +XXX,XX @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu)
320
}
321
322
static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
323
- target_ulong page, target_ulong mask)
324
+ vaddr page, vaddr mask)
325
{
326
page &= mask;
327
mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
328
@@ -XXX,XX +XXX,XX @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
329
page == (tlb_entry->addr_code & mask));
330
}
331
332
-static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
333
- target_ulong page)
334
+static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
335
{
336
return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
337
}
338
@@ -XXX,XX +XXX,XX @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
339
340
/* Called with tlb_c.lock held */
341
static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
342
- target_ulong page,
343
- target_ulong mask)
344
+ vaddr page,
345
+ vaddr mask)
346
{
347
if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
348
memset(tlb_entry, -1, sizeof(*tlb_entry));
349
@@ -XXX,XX +XXX,XX @@ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
350
return false;
351
}
352
353
-static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
354
- target_ulong page)
355
+static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page)
356
{
357
return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
358
}
359
360
/* Called with tlb_c.lock held */
361
static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
362
- target_ulong page,
363
- target_ulong mask)
364
+ vaddr page,
365
+ vaddr mask)
366
{
367
CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
368
int k;
369
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
370
}
371
372
static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
373
- target_ulong page)
374
+ vaddr page)
375
{
376
tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
377
}
378
379
-static void tlb_flush_page_locked(CPUArchState *env, int midx,
380
- target_ulong page)
381
+static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page)
382
{
383
- target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
384
- target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
385
+ vaddr lp_addr = env_tlb(env)->d[midx].large_page_addr;
386
+ vaddr lp_mask = env_tlb(env)->d[midx].large_page_mask;
387
388
/* Check if we need to flush due to large pages. */
389
if ((page & lp_mask) == lp_addr) {
390
- tlb_debug("forcing full flush midx %d ("
391
- TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
392
+ tlb_debug("forcing full flush midx %d (%"
393
+ VADDR_PRIx "/%" VADDR_PRIx ")\n",
394
midx, lp_addr, lp_mask);
395
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
396
} else {
397
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
398
* at @addr from the tlbs indicated by @idxmap from @cpu.
399
*/
400
static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
401
- target_ulong addr,
402
+ vaddr addr,
403
uint16_t idxmap)
404
{
405
CPUArchState *env = cpu->env_ptr;
406
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
407
408
assert_cpu_is_self(cpu);
409
410
- tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
411
+ tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
412
413
qemu_spin_lock(&env_tlb(env)->c.lock);
414
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
415
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
416
static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
417
run_on_cpu_data data)
418
{
419
- target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
420
- target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
421
+ vaddr addr_and_idxmap = data.target_ptr;
422
+ vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK;
423
uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
424
425
tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
426
}
427
428
typedef struct {
429
- target_ulong addr;
430
+ vaddr addr;
431
uint16_t idxmap;
432
} TLBFlushPageByMMUIdxData;
433
434
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
435
g_free(d);
436
}
437
438
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
439
+void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
440
{
441
- tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
442
+ tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
443
444
/* This should already be page aligned */
445
addr &= TARGET_PAGE_MASK;
446
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
447
}
448
}
449
450
-void tlb_flush_page(CPUState *cpu, target_ulong addr)
451
+void tlb_flush_page(CPUState *cpu, vaddr addr)
452
{
453
tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
454
}
455
456
-void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
457
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr,
458
uint16_t idxmap)
459
{
460
- tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
461
+ tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
462
463
/* This should already be page aligned */
464
addr &= TARGET_PAGE_MASK;
465
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
466
tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
467
}
468
469
-void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
470
+void tlb_flush_page_all_cpus(CPUState *src, vaddr addr)
471
{
472
tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
473
}
474
475
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
476
- target_ulong addr,
477
+ vaddr addr,
478
uint16_t idxmap)
479
{
480
- tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
481
+ tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
482
483
/* This should already be page aligned */
484
addr &= TARGET_PAGE_MASK;
485
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
486
}
487
}
488
489
-void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
490
+void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
491
{
492
tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
493
}
494
495
static void tlb_flush_range_locked(CPUArchState *env, int midx,
496
- target_ulong addr, target_ulong len,
497
+ vaddr addr, vaddr len,
498
unsigned bits)
499
{
500
CPUTLBDesc *d = &env_tlb(env)->d[midx];
501
CPUTLBDescFast *f = &env_tlb(env)->f[midx];
502
- target_ulong mask = MAKE_64BIT_MASK(0, bits);
503
+ vaddr mask = MAKE_64BIT_MASK(0, bits);
504
505
/*
506
* If @bits is smaller than the tlb size, there may be multiple entries
507
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
508
*/
509
if (mask < f->mask || len > f->mask) {
510
tlb_debug("forcing full flush midx %d ("
511
- TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n",
512
+ "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n",
513
midx, addr, mask, len);
514
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
515
return;
516
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
517
*/
518
if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
519
tlb_debug("forcing full flush midx %d ("
520
- TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
521
+ "%" VADDR_PRIx "/%" VADDR_PRIx ")\n",
522
midx, d->large_page_addr, d->large_page_mask);
523
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
524
return;
525
}
526
527
- for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) {
528
- target_ulong page = addr + i;
529
+ for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
530
+ vaddr page = addr + i;
531
CPUTLBEntry *entry = tlb_entry(env, midx, page);
532
533
if (tlb_flush_entry_mask_locked(entry, page, mask)) {
534
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
535
}
536
537
typedef struct {
538
- target_ulong addr;
539
- target_ulong len;
540
+ vaddr addr;
541
+ vaddr len;
542
uint16_t idxmap;
543
uint16_t bits;
544
} TLBFlushRangeData;
545
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
546
547
assert_cpu_is_self(cpu);
548
549
- tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n",
550
+ tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n",
551
d.addr, d.bits, d.len, d.idxmap);
552
553
qemu_spin_lock(&env_tlb(env)->c.lock);
554
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
555
* overlap the flushed pages, which includes the previous.
556
*/
557
d.addr -= TARGET_PAGE_SIZE;
558
- for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
559
+ for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
560
tb_jmp_cache_clear_page(cpu, d.addr);
561
d.addr += TARGET_PAGE_SIZE;
562
}
563
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
564
g_free(d);
565
}
566
567
-void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
568
- target_ulong len, uint16_t idxmap,
569
+void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
570
+ vaddr len, uint16_t idxmap,
571
unsigned bits)
572
{
573
TLBFlushRangeData d;
574
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
575
}
576
}
577
578
-void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
579
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
580
uint16_t idxmap, unsigned bits)
581
{
582
tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
583
}
584
585
void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
586
- target_ulong addr, target_ulong len,
587
+ vaddr addr, vaddr len,
588
uint16_t idxmap, unsigned bits)
589
{
590
TLBFlushRangeData d;
591
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
592
}
593
594
void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
595
- target_ulong addr,
596
- uint16_t idxmap, unsigned bits)
597
+ vaddr addr, uint16_t idxmap,
598
+ unsigned bits)
599
{
600
tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
601
idxmap, bits);
602
}
603
604
void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
605
- target_ulong addr,
606
- target_ulong len,
607
+ vaddr addr,
608
+ vaddr len,
609
uint16_t idxmap,
610
unsigned bits)
611
{
612
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
613
}
614
615
void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
616
- target_ulong addr,
617
+ vaddr addr,
618
uint16_t idxmap,
619
unsigned bits)
620
{
621
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
622
623
/* Called with tlb_c.lock held */
624
static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
625
- target_ulong vaddr)
626
+ vaddr addr)
627
{
628
- if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
629
- tlb_entry->addr_write = vaddr;
630
+ if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) {
631
+ tlb_entry->addr_write = addr;
632
}
633
}
634
635
/* update the TLB corresponding to virtual page vaddr
636
so that it is no longer dirty */
637
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
638
+void tlb_set_dirty(CPUState *cpu, vaddr addr)
639
{
640
CPUArchState *env = cpu->env_ptr;
641
int mmu_idx;
642
643
assert_cpu_is_self(cpu);
644
645
- vaddr &= TARGET_PAGE_MASK;
646
+ addr &= TARGET_PAGE_MASK;
647
qemu_spin_lock(&env_tlb(env)->c.lock);
648
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
649
- tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
650
+ tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, addr), addr);
651
}
652
653
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
654
int k;
655
for (k = 0; k < CPU_VTLB_SIZE; k++) {
656
- tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
657
+ tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], addr);
31
}
658
}
32
}
659
}
33
+ if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
660
qemu_spin_unlock(&env_tlb(env)->c.lock);
34
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
661
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
35
+ return;
662
/* Our TLB does not support large pages, so remember the area covered by
36
+ }
663
large pages and trigger a full TLB flush if these are invalidated. */
664
static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
665
- target_ulong vaddr, target_ulong size)
666
+ vaddr addr, uint64_t size)
667
{
668
- target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
669
- target_ulong lp_mask = ~(size - 1);
670
+ vaddr lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
671
+ vaddr lp_mask = ~(size - 1);
672
673
- if (lp_addr == (target_ulong)-1) {
674
+ if (lp_addr == (vaddr)-1) {
675
/* No previous large page. */
676
- lp_addr = vaddr;
677
+ lp_addr = addr;
678
} else {
679
/* Extend the existing region to include the new page.
680
This is a compromise between unnecessary flushes and
681
the cost of maintaining a full variable size TLB. */
682
lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
683
- while (((lp_addr ^ vaddr) & lp_mask) != 0) {
684
+ while (((lp_addr ^ addr) & lp_mask) != 0) {
685
lp_mask <<= 1;
686
}
687
}
688
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
689
* critical section.
690
*/
691
void tlb_set_page_full(CPUState *cpu, int mmu_idx,
692
- target_ulong vaddr, CPUTLBEntryFull *full)
693
+ vaddr addr, CPUTLBEntryFull *full)
694
{
695
CPUArchState *env = cpu->env_ptr;
696
CPUTLB *tlb = env_tlb(env);
697
CPUTLBDesc *desc = &tlb->d[mmu_idx];
698
MemoryRegionSection *section;
699
unsigned int index;
700
- target_ulong address;
701
- target_ulong write_address;
702
+ vaddr address;
703
+ vaddr write_address;
704
uintptr_t addend;
705
CPUTLBEntry *te, tn;
706
hwaddr iotlb, xlat, sz, paddr_page;
707
- target_ulong vaddr_page;
708
+ vaddr addr_page;
709
int asidx, wp_flags, prot;
710
bool is_ram, is_romd;
711
712
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
713
sz = TARGET_PAGE_SIZE;
714
} else {
715
sz = (hwaddr)1 << full->lg_page_size;
716
- tlb_add_large_page(env, mmu_idx, vaddr, sz);
717
+ tlb_add_large_page(env, mmu_idx, addr, sz);
718
}
719
- vaddr_page = vaddr & TARGET_PAGE_MASK;
720
+ addr_page = addr & TARGET_PAGE_MASK;
721
paddr_page = full->phys_addr & TARGET_PAGE_MASK;
722
723
prot = full->prot;
724
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
725
&xlat, &sz, full->attrs, &prot);
726
assert(sz >= TARGET_PAGE_SIZE);
727
728
- tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" HWADDR_FMT_plx
729
+ tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
730
" prot=%x idx=%d\n",
731
- vaddr, full->phys_addr, prot, mmu_idx);
732
+ addr, full->phys_addr, prot, mmu_idx);
733
734
- address = vaddr_page;
735
+ address = addr_page;
736
if (full->lg_page_size < TARGET_PAGE_BITS) {
737
/* Repeat the MMU check and TLB fill on every access. */
738
address |= TLB_INVALID_MASK;
739
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
740
}
741
}
742
743
- wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
744
+ wp_flags = cpu_watchpoint_address_matches(cpu, addr_page,
745
TARGET_PAGE_SIZE);
746
747
- index = tlb_index(env, mmu_idx, vaddr_page);
748
- te = tlb_entry(env, mmu_idx, vaddr_page);
749
+ index = tlb_index(env, mmu_idx, addr_page);
750
+ te = tlb_entry(env, mmu_idx, addr_page);
37
751
38
/*
752
/*
39
* Otherwise we must load the value from the constant pool.
753
* Hold the TLB lock for the rest of the function. We could acquire/release
40
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
754
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
41
TCGReg dst, TCGReg src)
755
tlb->c.dirty |= 1 << mmu_idx;
42
{
756
43
tcg_debug_assert(dst >= TCG_REG_V0);
757
/* Make sure there's no cached translation for the new page. */
44
- tcg_debug_assert(src >= TCG_REG_V0);
758
- tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
45
+
759
+ tlb_flush_vtlb_page_locked(env, mmu_idx, addr_page);
46
+ /* Splat from integer reg allowed via constraints for v3.00. */
47
+ if (src < TCG_REG_V0) {
48
+ tcg_debug_assert(have_isa_3_00);
49
+ switch (vece) {
50
+ case MO_64:
51
+ tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
52
+ return true;
53
+ case MO_32:
54
+ tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
55
+ return true;
56
+ default:
57
+ /* Fail, so that we fall back on either dupm or mov+dup. */
58
+ return false;
59
+ }
60
+ }
61
760
62
/*
761
/*
63
* Recall we use (or emulate) VSX integer loads, so the integer is
762
* Only evict the old entry to the victim tlb if it's for a
64
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
763
* different page; otherwise just overwrite the stale data.
65
static const TCGTargetOpDef sub2
764
*/
66
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
765
- if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
67
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
766
+ if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
68
+ static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
767
unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
69
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
768
CPUTLBEntry *tv = &desc->vtable[vidx];
70
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
769
71
static const TCGTargetOpDef v_v_v_v
770
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
72
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
771
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
73
return &v_v_v;
772
*/
74
case INDEX_op_not_vec:
773
desc->fulltlb[index] = *full;
75
case INDEX_op_neg_vec:
774
- desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
76
- case INDEX_op_dup_vec:
775
+ desc->fulltlb[index].xlat_section = iotlb - addr_page;
77
return &v_v;
776
desc->fulltlb[index].phys_addr = paddr_page;
78
+ case INDEX_op_dup_vec:
777
79
+ return have_isa_3_00 ? &v_vr : &v_v;
778
/* Now calculate the new entry */
80
case INDEX_op_ld_vec:
779
- tn.addend = addend - vaddr_page;
81
case INDEX_op_st_vec:
780
+ tn.addend = addend - addr_page;
82
case INDEX_op_dupm_vec:
781
if (prot & PAGE_READ) {
782
tn.addr_read = address;
783
if (wp_flags & BP_MEM_READ) {
784
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
785
qemu_spin_unlock(&tlb->c.lock);
786
}
787
788
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
789
+void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
790
hwaddr paddr, MemTxAttrs attrs, int prot,
791
- int mmu_idx, target_ulong size)
792
+ int mmu_idx, uint64_t size)
793
{
794
CPUTLBEntryFull full = {
795
.phys_addr = paddr,
796
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
797
};
798
799
assert(is_power_of_2(size));
800
- tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
801
+ tlb_set_page_full(cpu, mmu_idx, addr, &full);
802
}
803
804
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
805
+void tlb_set_page(CPUState *cpu, vaddr addr,
806
hwaddr paddr, int prot,
807
- int mmu_idx, target_ulong size)
808
+ int mmu_idx, uint64_t size)
809
{
810
- tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
811
+ tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED,
812
prot, mmu_idx, size);
813
}
814
815
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
816
* caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
817
* be discarded and looked up again (e.g. via tlb_entry()).
818
*/
819
-static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
820
+static void tlb_fill(CPUState *cpu, vaddr addr, int size,
821
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
822
{
823
bool ok;
824
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
825
}
826
827
static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
828
- int mmu_idx, target_ulong addr, uintptr_t retaddr,
829
+ int mmu_idx, vaddr addr, uintptr_t retaddr,
830
MMUAccessType access_type, MemOp op)
831
{
832
CPUState *cpu = env_cpu(env);
833
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
834
}
835
836
static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
837
- int mmu_idx, uint64_t val, target_ulong addr,
838
+ int mmu_idx, uint64_t val, vaddr addr,
839
uintptr_t retaddr, MemOp op)
840
{
841
CPUState *cpu = env_cpu(env);
842
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
843
/* Return true if ADDR is present in the victim tlb, and has been copied
844
back to the main tlb. */
845
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
846
- MMUAccessType access_type, target_ulong page)
847
+ MMUAccessType access_type, vaddr page)
848
{
849
size_t vidx;
850
851
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
852
* from the same thread (which a mem callback will be) this is safe.
853
*/
854
855
-bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
856
+bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
857
bool is_store, struct qemu_plugin_hwaddr *data)
858
{
859
CPUArchState *env = cpu->env_ptr;
860
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
861
uintptr_t index = tlb_index(env, mmu_idx, addr);
862
- target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
863
+ vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
864
865
if (likely(tlb_hit(tlb_addr, addr))) {
866
/* We must have an iotlb entry for MMIO */
867
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
868
index XXXXXXX..XXXXXXX 100644
869
--- a/accel/tcg/tb-maint.c
870
+++ b/accel/tcg/tb-maint.c
871
@@ -XXX,XX +XXX,XX @@ static void tb_remove_all(void)
872
/* Call with mmap_lock held. */
873
static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
874
{
875
- target_ulong addr;
876
+ vaddr addr;
877
int flags;
878
879
assert_memory_lock();
83
--
880
--
84
2.17.1
881
2.34.1
85
86
diff view generated by jsdifflib
1
These new instructions are a mix of those like LXSD that are
1
From: Anton Johansson <anjo@rev.ng>
2
only conditional only on MSR.VEC and those like LXV that are
3
conditional on MSR.VEC for TX=1. Thus, in the end, we can
4
consider all of these as Altivec instructions.
5
2
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230621135633.1649-3-anjo@rev.ng>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++--------
8
accel/tcg/internal.h | 6 +++---
10
1 file changed, 38 insertions(+), 9 deletions(-)
9
accel/tcg/translate-all.c | 10 +++++-----
10
2 files changed, 8 insertions(+), 8 deletions(-)
11
11
12
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.inc.c
14
--- a/accel/tcg/internal.h
15
+++ b/tcg/ppc/tcg-target.inc.c
15
+++ b/accel/tcg/internal.h
16
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
16
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
17
G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
18
#endif /* CONFIG_SOFTMMU */
19
#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
19
20
+#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
20
-TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
21
+#define LXSD (OPCD(57) | 2) /* v3.00 */
21
- target_ulong cs_base, uint32_t flags,
22
+#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
22
+TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc,
23
23
+ uint64_t cs_base, uint32_t flags,
24
#define STVX XO31(231)
24
int cflags);
25
#define STVEWX XO31(199)
25
void page_init(void);
26
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
26
void tb_htable_init(void);
27
#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
27
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
28
+#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
28
uintptr_t host_pc);
29
+#define STXSD (OPCD(61) | 2) /* v3.00 */
29
30
30
/* Return the current PC from CPU, which may be cached in TB. */
31
#define VADDSBS VX4(768)
31
-static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
32
#define VADDUBS VX4(512)
32
+static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb)
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
34
TCGReg base, tcg_target_long offset)
35
{
33
{
36
tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
34
if (tb_cflags(tb) & CF_PCREL) {
37
- bool is_store = false;
35
return cpu->cc->get_pc(cpu);
38
+ bool is_int_store = false;
36
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
39
TCGReg rs = TCG_REG_TMP1;
37
index XXXXXXX..XXXXXXX 100644
40
38
--- a/accel/tcg/translate-all.c
41
switch (opi) {
39
+++ b/accel/tcg/translate-all.c
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
40
@@ -XXX,XX +XXX,XX @@ void page_init(void)
43
break;
41
* Return the size of the generated code, or negative on error.
42
*/
43
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
44
- target_ulong pc, void *host_pc,
45
+ vaddr pc, void *host_pc,
46
int *max_insns, int64_t *ti)
47
{
48
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
49
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
50
51
/* Called with mmap_lock held for user mode emulation. */
52
TranslationBlock *tb_gen_code(CPUState *cpu,
53
- target_ulong pc, target_ulong cs_base,
54
+ vaddr pc, uint64_t cs_base,
55
uint32_t flags, int cflags)
56
{
57
CPUArchState *env = cpu->env_ptr;
58
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
59
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
60
61
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
62
- target_ulong pc = log_pc(cpu, tb);
63
+ vaddr pc = log_pc(cpu, tb);
64
if (qemu_log_in_addr_range(pc)) {
65
- qemu_log("cpu_io_recompile: rewound execution of TB to "
66
- TARGET_FMT_lx "\n", pc);
67
+ qemu_log("cpu_io_recompile: rewound execution of TB to %"
68
+ VADDR_PRIx "\n", pc);
44
}
69
}
45
break;
46
+ case LXSD:
47
+ case STXSD:
48
+ align = 3;
49
+ break;
50
+ case LXV:
51
+ case STXV:
52
+ align = 15;
53
+ break;
54
case STD:
55
align = 3;
56
/* FALLTHRU */
57
case STB: case STH: case STW:
58
- is_store = true;
59
+ is_int_store = true;
60
break;
61
}
70
}
62
71
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
64
if (rs == base) {
65
rs = TCG_REG_R0;
66
}
67
- tcg_debug_assert(!is_store || rs != rt);
68
+ tcg_debug_assert(!is_int_store || rs != rt);
69
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
70
tcg_out32(s, opx | TAB(rt & 31, base, rs));
71
return;
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
73
case TCG_TYPE_V64:
74
tcg_debug_assert(ret >= TCG_REG_V0);
75
if (have_vsx) {
76
- tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
77
+ tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
78
+ ret, base, offset);
79
break;
80
}
81
tcg_debug_assert((offset & 7) == 0);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
83
case TCG_TYPE_V128:
84
tcg_debug_assert(ret >= TCG_REG_V0);
85
tcg_debug_assert((offset & 15) == 0);
86
- tcg_out_mem_long(s, 0, LVX, ret, base, offset);
87
+ tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
88
+ LVX, ret, base, offset);
89
break;
90
default:
91
g_assert_not_reached();
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
93
case TCG_TYPE_V64:
94
tcg_debug_assert(arg >= TCG_REG_V0);
95
if (have_vsx) {
96
- tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
97
+ tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
98
+ STXSDX, arg, base, offset);
99
break;
100
}
101
tcg_debug_assert((offset & 7) == 0);
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
103
break;
104
case TCG_TYPE_V128:
105
tcg_debug_assert(arg >= TCG_REG_V0);
106
- tcg_out_mem_long(s, 0, STVX, arg, base, offset);
107
+ tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
108
+ STVX, arg, base, offset);
109
break;
110
default:
111
g_assert_not_reached();
112
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
113
tcg_debug_assert(out >= TCG_REG_V0);
114
switch (vece) {
115
case MO_8:
116
- tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
117
+ if (have_isa_3_00) {
118
+ tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
119
+ } else {
120
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
121
+ }
122
elt = extract32(offset, 0, 4);
123
#ifndef HOST_WORDS_BIGENDIAN
124
elt ^= 15;
125
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
126
break;
127
case MO_16:
128
tcg_debug_assert((offset & 1) == 0);
129
- tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
130
+ if (have_isa_3_00) {
131
+ tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
132
+ } else {
133
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
134
+ }
135
elt = extract32(offset, 1, 3);
136
#ifndef HOST_WORDS_BIGENDIAN
137
elt ^= 7;
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
139
tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
140
break;
141
case MO_32:
142
+ if (have_isa_3_00) {
143
+ tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
144
+ break;
145
+ }
146
tcg_debug_assert((offset & 3) == 0);
147
tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
148
elt = extract32(offset, 2, 2);
149
--
72
--
150
2.17.1
73
2.34.1
151
152
diff view generated by jsdifflib
1
Altivec supports 32 128-bit vector registers, whose names are
1
From: Anton Johansson <anjo@rev.ng>
2
by convention v0 through v31.
3
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230621135633.1649-4-anjo@rev.ng>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
---
7
tcg/ppc/tcg-target.h | 11 ++++-
8
target/alpha/cpu.h | 4 ++--
8
tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++---------------
9
target/arm/cpu.h | 4 ++--
9
2 files changed, 65 insertions(+), 34 deletions(-)
10
target/avr/cpu.h | 4 ++--
11
target/cris/cpu.h | 4 ++--
12
target/hexagon/cpu.h | 4 ++--
13
target/hppa/cpu.h | 5 ++---
14
target/i386/cpu.h | 4 ++--
15
target/loongarch/cpu.h | 6 ++----
16
target/m68k/cpu.h | 4 ++--
17
target/microblaze/cpu.h | 4 ++--
18
target/mips/cpu.h | 4 ++--
19
target/nios2/cpu.h | 4 ++--
20
target/openrisc/cpu.h | 5 ++---
21
target/ppc/cpu.h | 8 ++++----
22
target/riscv/cpu.h | 4 ++--
23
target/rx/cpu.h | 4 ++--
24
target/s390x/cpu.h | 4 ++--
25
target/sh4/cpu.h | 4 ++--
26
target/sparc/cpu.h | 4 ++--
27
target/tricore/cpu.h | 4 ++--
28
target/xtensa/cpu.h | 4 ++--
29
accel/tcg/cpu-exec.c | 9 ++++++---
30
accel/tcg/translate-all.c | 3 ++-
31
target/arm/helper.c | 4 ++--
32
target/ppc/helper_regs.c | 4 ++--
33
target/riscv/cpu_helper.c | 4 ++--
34
26 files changed, 58 insertions(+), 58 deletions(-)
10
35
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
36
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
12
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
38
--- a/target/alpha/cpu.h
14
+++ b/tcg/ppc/tcg-target.h
39
+++ b/target/alpha/cpu.h
15
@@ -XXX,XX +XXX,XX @@
40
@@ -XXX,XX +XXX,XX @@ void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
16
# define TCG_TARGET_REG_BITS 32
41
MemTxResult response, uintptr_t retaddr);
17
#endif
42
#endif
18
43
19
-#define TCG_TARGET_NB_REGS 32
44
-static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, target_ulong *pc,
20
+#define TCG_TARGET_NB_REGS 64
45
- target_ulong *cs_base, uint32_t *pflags)
21
#define TCG_TARGET_INSN_UNIT_SIZE 4
46
+static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, vaddr *pc,
22
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
47
+ uint64_t *cs_base, uint32_t *pflags)
23
48
{
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
49
*pc = env->pc;
25
TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
50
*cs_base = 0;
26
TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
51
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
27
52
index XXXXXXX..XXXXXXX 100644
28
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53
--- a/target/arm/cpu.h
29
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54
+++ b/target/arm/cpu.h
30
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
55
@@ -XXX,XX +XXX,XX @@ static inline bool arm_cpu_bswap_data(CPUARMState *env)
31
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
56
}
32
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
33
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
34
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
35
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
36
+
37
TCG_REG_CALL_STACK = TCG_REG_R1,
38
TCG_AREG0 = TCG_REG_R27
39
} TCGReg;
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
43
+++ b/tcg/ppc/tcg-target.inc.c
44
@@ -XXX,XX +XXX,XX @@
45
# define TCG_REG_TMP1 TCG_REG_R12
46
#endif
57
#endif
47
58
48
+#define TCG_VEC_TMP1 TCG_REG_V0
59
-void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
49
+#define TCG_VEC_TMP2 TCG_REG_V1
60
- target_ulong *cs_base, uint32_t *flags);
50
+
61
+void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
51
#define TCG_REG_TB TCG_REG_R31
62
+ uint64_t *cs_base, uint32_t *flags);
52
#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
63
53
64
enum {
54
@@ -XXX,XX +XXX,XX @@ bool have_isa_3_00;
65
QEMU_PSCI_CONDUIT_DISABLED = 0,
66
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/avr/cpu.h
69
+++ b/target/avr/cpu.h
70
@@ -XXX,XX +XXX,XX @@ enum {
71
TB_FLAGS_SKIP = 2,
72
};
73
74
-static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc,
75
- target_ulong *cs_base, uint32_t *pflags)
76
+static inline void cpu_get_tb_cpu_state(CPUAVRState *env, vaddr *pc,
77
+ uint64_t *cs_base, uint32_t *pflags)
78
{
79
uint32_t flags = 0;
80
81
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/cris/cpu.h
84
+++ b/target/cris/cpu.h
85
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
86
87
#include "exec/cpu-all.h"
88
89
-static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc,
90
- target_ulong *cs_base, uint32_t *flags)
91
+static inline void cpu_get_tb_cpu_state(CPUCRISState *env, vaddr *pc,
92
+ uint64_t *cs_base, uint32_t *flags)
93
{
94
*pc = env->pc;
95
*cs_base = 0;
96
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
97
index XXXXXXX..XXXXXXX 100644
98
--- a/target/hexagon/cpu.h
99
+++ b/target/hexagon/cpu.h
100
@@ -XXX,XX +XXX,XX @@ struct ArchCPU {
101
102
FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
103
104
-static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
105
- target_ulong *cs_base, uint32_t *flags)
106
+static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
107
+ uint64_t *cs_base, uint32_t *flags)
108
{
109
uint32_t hex_flags = 0;
110
*pc = env->gpr[HEX_REG_PC];
111
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/target/hppa/cpu.h
114
+++ b/target/hppa/cpu.h
115
@@ -XXX,XX +XXX,XX @@ static inline target_ulong hppa_form_gva(CPUHPPAState *env, uint64_t spc,
116
#define TB_FLAG_PRIV_SHIFT 8
117
#define TB_FLAG_UNALIGN 0x400
118
119
-static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, target_ulong *pc,
120
- target_ulong *cs_base,
121
- uint32_t *pflags)
122
+static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc,
123
+ uint64_t *cs_base, uint32_t *pflags)
124
{
125
uint32_t flags = env->psw_n * PSW_N;
126
127
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
128
index XXXXXXX..XXXXXXX 100644
129
--- a/target/i386/cpu.h
130
+++ b/target/i386/cpu.h
131
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index_kernel(CPUX86State *env)
132
#include "hw/i386/apic.h"
55
#endif
133
#endif
56
134
135
-static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc,
136
- target_ulong *cs_base, uint32_t *flags)
137
+static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc,
138
+ uint64_t *cs_base, uint32_t *flags)
139
{
140
*cs_base = env->segs[R_CS].base;
141
*pc = *cs_base + env->eip;
142
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
143
index XXXXXXX..XXXXXXX 100644
144
--- a/target/loongarch/cpu.h
145
+++ b/target/loongarch/cpu.h
146
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch)
147
#define HW_FLAGS_EUEN_FPE 0x04
148
#define HW_FLAGS_EUEN_SXE 0x08
149
150
-static inline void cpu_get_tb_cpu_state(CPULoongArchState *env,
151
- target_ulong *pc,
152
- target_ulong *cs_base,
153
- uint32_t *flags)
154
+static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
155
+ uint64_t *cs_base, uint32_t *flags)
156
{
157
*pc = env->pc;
158
*cs_base = 0;
159
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/m68k/cpu.h
162
+++ b/target/m68k/cpu.h
163
@@ -XXX,XX +XXX,XX @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
164
#define TB_FLAGS_TRACE 16
165
#define TB_FLAGS_TRACE_BIT (1 << TB_FLAGS_TRACE)
166
167
-static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc,
168
- target_ulong *cs_base, uint32_t *flags)
169
+static inline void cpu_get_tb_cpu_state(CPUM68KState *env, vaddr *pc,
170
+ uint64_t *cs_base, uint32_t *flags)
171
{
172
*pc = env->pc;
173
*cs_base = 0;
174
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
175
index XXXXXXX..XXXXXXX 100644
176
--- a/target/microblaze/cpu.h
177
+++ b/target/microblaze/cpu.h
178
@@ -XXX,XX +XXX,XX @@ void mb_tcg_init(void);
179
/* Ensure there is no overlap between the two masks. */
180
QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK);
181
182
-static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc,
183
- target_ulong *cs_base, uint32_t *flags)
184
+static inline void cpu_get_tb_cpu_state(CPUMBState *env, vaddr *pc,
185
+ uint64_t *cs_base, uint32_t *flags)
186
{
187
*pc = env->pc;
188
*flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK);
189
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
190
index XXXXXXX..XXXXXXX 100644
191
--- a/target/mips/cpu.h
192
+++ b/target/mips/cpu.h
193
@@ -XXX,XX +XXX,XX @@ void itc_reconfigure(struct MIPSITUState *tag);
194
/* helper.c */
195
target_ulong exception_resume_pc(CPUMIPSState *env);
196
197
-static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
198
- target_ulong *cs_base, uint32_t *flags)
199
+static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, vaddr *pc,
200
+ uint64_t *cs_base, uint32_t *flags)
201
{
202
*pc = env->active_tc.PC;
203
*cs_base = 0;
204
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
205
index XXXXXXX..XXXXXXX 100644
206
--- a/target/nios2/cpu.h
207
+++ b/target/nios2/cpu.h
208
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAGS, CRS0, 0, 1) /* Set if CRS == 0. */
209
FIELD(TBFLAGS, U, 1, 1) /* Overlaps CR_STATUS_U */
210
FIELD(TBFLAGS, R0_0, 2, 1) /* Set if R0 == 0. */
211
212
-static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc,
213
- target_ulong *cs_base, uint32_t *flags)
214
+static inline void cpu_get_tb_cpu_state(CPUNios2State *env, vaddr *pc,
215
+ uint64_t *cs_base, uint32_t *flags)
216
{
217
unsigned crs = FIELD_EX32(env->ctrl[CR_STATUS], CR_STATUS, CRS);
218
219
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
220
index XXXXXXX..XXXXXXX 100644
221
--- a/target/openrisc/cpu.h
222
+++ b/target/openrisc/cpu.h
223
@@ -XXX,XX +XXX,XX @@ static inline void cpu_set_gpr(CPUOpenRISCState *env, int i, uint32_t val)
224
env->shadow_gpr[0][i] = val;
225
}
226
227
-static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
228
- target_ulong *pc,
229
- target_ulong *cs_base, uint32_t *flags)
230
+static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, vaddr *pc,
231
+ uint64_t *cs_base, uint32_t *flags)
232
{
233
*pc = env->pc;
234
*cs_base = 0;
235
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
236
index XXXXXXX..XXXXXXX 100644
237
--- a/target/ppc/cpu.h
238
+++ b/target/ppc/cpu.h
239
@@ -XXX,XX +XXX,XX @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer);
240
#define is_book3s_arch2x(ctx) (!!((ctx)->insns_flags & PPC_SEGMENT_64B))
241
57
#ifdef CONFIG_DEBUG_TCG
242
#ifdef CONFIG_DEBUG_TCG
58
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
243
-void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
59
- "r0",
244
- target_ulong *cs_base, uint32_t *flags);
60
- "r1",
245
+void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc,
61
- "r2",
246
+ uint64_t *cs_base, uint32_t *flags);
62
- "r3",
247
#else
63
- "r4",
248
-static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
64
- "r5",
249
- target_ulong *cs_base, uint32_t *flags)
65
- "r6",
250
+static inline void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc,
66
- "r7",
251
+ uint64_t *cs_base, uint32_t *flags)
67
- "r8",
252
{
68
- "r9",
253
*pc = env->nip;
69
- "r10",
254
*cs_base = 0;
70
- "r11",
255
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
71
- "r12",
256
index XXXXXXX..XXXXXXX 100644
72
- "r13",
257
--- a/target/riscv/cpu.h
73
- "r14",
258
+++ b/target/riscv/cpu.h
74
- "r15",
259
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype)
75
- "r16",
260
return cpu->cfg.vlen >> (sew + 3 - lmul);
76
- "r17",
261
}
77
- "r18",
262
78
- "r19",
263
-void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
79
- "r20",
264
- target_ulong *cs_base, uint32_t *pflags);
80
- "r21",
265
+void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
81
- "r22",
266
+ uint64_t *cs_base, uint32_t *pflags);
82
- "r23",
267
83
- "r24",
268
void riscv_cpu_update_mask(CPURISCVState *env);
84
- "r25",
269
85
- "r26",
270
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
86
- "r27",
271
index XXXXXXX..XXXXXXX 100644
87
- "r28",
272
--- a/target/rx/cpu.h
88
- "r29",
273
+++ b/target/rx/cpu.h
89
- "r30",
274
@@ -XXX,XX +XXX,XX @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
90
- "r31"
275
#define RX_CPU_IRQ 0
91
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
276
#define RX_CPU_FIR 1
92
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
277
93
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
278
-static inline void cpu_get_tb_cpu_state(CPURXState *env, target_ulong *pc,
94
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
279
- target_ulong *cs_base, uint32_t *flags)
95
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
280
+static inline void cpu_get_tb_cpu_state(CPURXState *env, vaddr *pc,
96
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
281
+ uint64_t *cs_base, uint32_t *flags)
97
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
282
{
98
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
283
*pc = env->pc;
99
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
284
*cs_base = 0;
100
};
285
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
286
index XXXXXXX..XXXXXXX 100644
287
--- a/target/s390x/cpu.h
288
+++ b/target/s390x/cpu.h
289
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch)
101
#endif
290
#endif
102
291
}
103
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
292
104
TCG_REG_R5,
293
-static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc,
105
TCG_REG_R4,
294
- target_ulong *cs_base, uint32_t *flags)
106
TCG_REG_R3,
295
+static inline void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
107
+
296
+ uint64_t *cs_base, uint32_t *flags)
108
+ /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
297
{
109
+ TCG_REG_V2, /* call clobbered, vectors */
298
if (env->psw.addr & 1) {
110
+ TCG_REG_V3,
299
/*
111
+ TCG_REG_V4,
300
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
112
+ TCG_REG_V5,
301
index XXXXXXX..XXXXXXX 100644
113
+ TCG_REG_V6,
302
--- a/target/sh4/cpu.h
114
+ TCG_REG_V7,
303
+++ b/target/sh4/cpu.h
115
+ TCG_REG_V8,
304
@@ -XXX,XX +XXX,XX @@ static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr)
116
+ TCG_REG_V9,
305
env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T));
117
+ TCG_REG_V10,
306
}
118
+ TCG_REG_V11,
307
119
+ TCG_REG_V12,
308
-static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
120
+ TCG_REG_V13,
309
- target_ulong *cs_base, uint32_t *flags)
121
+ TCG_REG_V14,
310
+static inline void cpu_get_tb_cpu_state(CPUSH4State *env, vaddr *pc,
122
+ TCG_REG_V15,
311
+ uint64_t *cs_base, uint32_t *flags)
123
+ TCG_REG_V16,
312
{
124
+ TCG_REG_V17,
313
*pc = env->pc;
125
+ TCG_REG_V18,
314
/* For a gUSA region, notice the end of the region. */
126
+ TCG_REG_V19,
315
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
127
};
316
index XXXXXXX..XXXXXXX 100644
128
317
--- a/target/sparc/cpu.h
129
static const int tcg_target_call_iarg_regs[] = {
318
+++ b/target/sparc/cpu.h
130
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
319
@@ -XXX,XX +XXX,XX @@ trap_state* cpu_tsptr(CPUSPARCState* env);
131
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
320
#define TB_FLAG_HYPER (1 << 7)
132
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
321
#define TB_FLAG_ASI_SHIFT 24
133
322
134
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
323
-static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, target_ulong *pc,
135
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
324
- target_ulong *cs_base, uint32_t *pflags)
136
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
325
+static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc,
137
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
326
+ uint64_t *cs_base, uint32_t *pflags)
138
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
327
{
139
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
328
uint32_t flags;
140
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
329
*pc = env->pc;
141
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
330
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
142
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
331
index XXXXXXX..XXXXXXX 100644
143
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
332
--- a/target/tricore/cpu.h
144
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
333
+++ b/target/tricore/cpu.h
145
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
334
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
146
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
335
void cpu_state_reset(CPUTriCoreState *s);
147
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
336
void tricore_tcg_init(void);
148
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
337
149
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
338
-static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, target_ulong *pc,
150
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
339
- target_ulong *cs_base, uint32_t *flags)
151
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
340
+static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
152
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
341
+ uint64_t *cs_base, uint32_t *flags)
153
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
342
{
154
+
343
uint32_t new_flags = 0;
155
s->reserved_regs = 0;
344
*pc = env->PC;
156
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
345
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
157
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
346
index XXXXXXX..XXXXXXX 100644
158
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
347
--- a/target/xtensa/cpu.h
159
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
348
+++ b/target/xtensa/cpu.h
349
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
350
351
#include "exec/cpu-all.h"
352
353
-static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
354
- target_ulong *cs_base, uint32_t *flags)
355
+static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc,
356
+ uint64_t *cs_base, uint32_t *flags)
357
{
358
*pc = env->pc;
359
*cs_base = 0;
360
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
361
index XXXXXXX..XXXXXXX 100644
362
--- a/accel/tcg/cpu-exec.c
363
+++ b/accel/tcg/cpu-exec.c
364
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
365
{
366
CPUState *cpu = env_cpu(env);
367
TranslationBlock *tb;
368
- target_ulong cs_base, pc;
369
+ vaddr pc;
370
+ uint64_t cs_base;
371
uint32_t flags, cflags;
372
373
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
374
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
375
{
376
CPUArchState *env = cpu->env_ptr;
377
TranslationBlock *tb;
378
- target_ulong cs_base, pc;
379
+ vaddr pc;
380
+ uint64_t cs_base;
381
uint32_t flags, cflags;
382
int tb_exit;
383
384
@@ -XXX,XX +XXX,XX @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
385
386
while (!cpu_handle_interrupt(cpu, &last_tb)) {
387
TranslationBlock *tb;
388
- target_ulong cs_base, pc;
389
+ vaddr pc;
390
+ uint64_t cs_base;
391
uint32_t flags, cflags;
392
393
cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
394
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
395
index XXXXXXX..XXXXXXX 100644
396
--- a/accel/tcg/translate-all.c
397
+++ b/accel/tcg/translate-all.c
398
@@ -XXX,XX +XXX,XX @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
399
/* The exception probably happened in a helper. The CPU state should
400
have been saved before calling it. Fetch the PC from there. */
401
CPUArchState *env = cpu->env_ptr;
402
- target_ulong pc, cs_base;
403
+ vaddr pc;
404
+ uint64_t cs_base;
405
tb_page_addr_t addr;
406
uint32_t flags;
407
408
diff --git a/target/arm/helper.c b/target/arm/helper.c
409
index XXXXXXX..XXXXXXX 100644
410
--- a/target/arm/helper.c
411
+++ b/target/arm/helper.c
412
@@ -XXX,XX +XXX,XX @@ static bool mve_no_pred(CPUARMState *env)
413
return true;
414
}
415
416
-void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
417
- target_ulong *cs_base, uint32_t *pflags)
418
+void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
419
+ uint64_t *cs_base, uint32_t *pflags)
420
{
421
CPUARMTBFlags flags;
422
423
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
424
index XXXXXXX..XXXXXXX 100644
425
--- a/target/ppc/helper_regs.c
426
+++ b/target/ppc/helper_regs.c
427
@@ -XXX,XX +XXX,XX @@ void hreg_update_pmu_hflags(CPUPPCState *env)
428
}
429
430
#ifdef CONFIG_DEBUG_TCG
431
-void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
432
- target_ulong *cs_base, uint32_t *flags)
433
+void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc,
434
+ uint64_t *cs_base, uint32_t *flags)
435
{
436
uint32_t hflags_current = env->hflags;
437
uint32_t hflags_rebuilt;
438
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
439
index XXXXXXX..XXXXXXX 100644
440
--- a/target/riscv/cpu_helper.c
441
+++ b/target/riscv/cpu_helper.c
442
@@ -XXX,XX +XXX,XX @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
160
#endif
443
#endif
161
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
444
}
162
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
445
163
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
446
-void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
164
if (USE_REG_TB) {
447
- target_ulong *cs_base, uint32_t *pflags)
165
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
448
+void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
166
}
449
+ uint64_t *cs_base, uint32_t *pflags)
450
{
451
CPUState *cs = env_cpu(env);
452
RISCVCPU *cpu = RISCV_CPU(cs);
167
--
453
--
168
2.17.1
454
2.34.1
169
170
diff view generated by jsdifflib
Deleted patch
1
Introduce macro VX4() used for encoding Altivec instructions.
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
6
tcg/ppc/tcg-target.inc.c | 1 +
7
1 file changed, 1 insertion(+)
8
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
12
+++ b/tcg/ppc/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
14
#define XO31(opc) (OPCD(31)|((opc)<<1))
15
#define XO58(opc) (OPCD(58)|(opc))
16
#define XO62(opc) (OPCD(62)|(opc))
17
+#define VX4(opc) (OPCD(4)|(opc))
18
19
#define B OPCD( 18)
20
#define BC OPCD( 16)
21
--
22
2.17.1
23
24
diff view generated by jsdifflib
1
This is only used for 32-bit hosts.
1
From: Anton Johansson <anjo@rev.ng>
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230621135633.1649-5-anjo@rev.ng>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
7
---
6
tcg/ppc/tcg-target.inc.c | 9 +++++++++
8
include/exec/cpu_ldst.h | 10 +++++-----
7
1 file changed, 9 insertions(+)
9
accel/tcg/cputlb.c | 8 ++++----
10
2 files changed, 9 insertions(+), 9 deletions(-)
8
11
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
14
--- a/include/exec/cpu_ldst.h
12
+++ b/tcg/ppc/tcg-target.inc.c
15
+++ b/include/exec/cpu_ldst.h
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
16
@@ -XXX,XX +XXX,XX @@ static inline void clear_helper_retaddr(void)
14
}
17
15
break;
18
#include "tcg/oversized-guest.h"
16
19
17
+ case INDEX_op_dup2_vec:
20
-static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
18
+ assert(TCG_TARGET_REG_BITS == 32);
21
- MMUAccessType access_type)
19
+ /* With inputs a1 = xLxx, a2 = xHxx */
22
+static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
20
+ tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
23
+ MMUAccessType access_type)
21
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
24
{
22
+ tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
25
/* Do not rearrange the CPUTLBEntry structure members. */
23
+ return;
26
QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
24
+
27
@@ -XXX,XX +XXX,XX @@ static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
25
case INDEX_op_ppc_mrgh_vec:
28
#endif
26
insn = mrgh_op[vece];
29
}
27
break;
30
28
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
31
-static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
29
case INDEX_op_ppc_mulou_vec:
32
+static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
30
case INDEX_op_ppc_pkum_vec:
33
{
31
case INDEX_op_ppc_rotl_vec:
34
return tlb_read_idx(entry, MMU_DATA_STORE);
32
+ case INDEX_op_dup2_vec:
35
}
33
return &v_v_v;
36
34
case INDEX_op_not_vec:
37
/* Find the TLB index corresponding to the mmu_idx + address pair. */
35
case INDEX_op_dup_vec:
38
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
39
- target_ulong addr)
40
+ vaddr addr)
41
{
42
uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
43
44
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
45
46
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
47
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
48
- target_ulong addr)
49
+ vaddr addr)
50
{
51
return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
52
}
53
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/accel/tcg/cputlb.c
56
+++ b/accel/tcg/cputlb.c
57
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
58
assert_cpu_is_self(env_cpu(env));
59
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
60
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
61
- target_ulong cmp = tlb_read_idx(vtlb, access_type);
62
+ uint64_t cmp = tlb_read_idx(vtlb, access_type);
63
64
if (cmp == page) {
65
/* Found entry in victim tlb, swap tlb and iotlb. */
66
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
67
{
68
uintptr_t index = tlb_index(env, mmu_idx, addr);
69
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
70
- target_ulong tlb_addr = tlb_read_idx(entry, access_type);
71
+ uint64_t tlb_addr = tlb_read_idx(entry, access_type);
72
target_ulong page_addr = addr & TARGET_PAGE_MASK;
73
int flags = TLB_FLAGS_MASK;
74
75
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
76
CPUArchState *env = cpu->env_ptr;
77
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
78
uintptr_t index = tlb_index(env, mmu_idx, addr);
79
- vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
80
+ uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
81
82
if (likely(tlb_hit(tlb_addr, addr))) {
83
/* We must have an iotlb entry for MMIO */
84
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
85
target_ulong addr = data->addr;
86
uintptr_t index = tlb_index(env, mmu_idx, addr);
87
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
88
- target_ulong tlb_addr = tlb_read_idx(entry, access_type);
89
+ uint64_t tlb_addr = tlb_read_idx(entry, access_type);
90
bool maybe_resized = false;
91
92
/* If the TLB entry is for a different page, reload and try again. */
36
--
93
--
37
2.17.1
94
2.34.1
38
39
diff view generated by jsdifflib
1
Add various bits and peaces related mostly to load and store
1
From: Anton Johansson <anjo@rev.ng>
2
operations. In that context, logic, compare, and splat Altivec
3
instructions are used, and, therefore, the support for emitting
4
them is included in this patch too.
5
2
3
Functions accessing MMULookupPageData are also updated.
4
5
Signed-off-by: Anton Johansson <anjo@rev.ng>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230621135633.1649-6-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
9
---
9
tcg/ppc/tcg-target.h | 6 +-
10
accel/tcg/cputlb.c | 30 +++++++++++++++---------------
10
tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++---
11
1 file changed, 15 insertions(+), 15 deletions(-)
11
2 files changed, 442 insertions(+), 36 deletions(-)
12
12
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
15
--- a/accel/tcg/cputlb.c
16
+++ b/tcg/ppc/tcg-target.h
16
+++ b/accel/tcg/cputlb.c
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
17
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
18
#define TCG_TARGET_HAS_v128 have_altivec
18
typedef struct MMULookupPageData {
19
#define TCG_TARGET_HAS_v256 0
19
CPUTLBEntryFull *full;
20
20
void *haddr;
21
-#define TCG_TARGET_HAS_andc_vec 0
21
- target_ulong addr;
22
+#define TCG_TARGET_HAS_andc_vec 1
22
+ vaddr addr;
23
#define TCG_TARGET_HAS_orc_vec 0
23
int flags;
24
-#define TCG_TARGET_HAS_not_vec 0
24
int size;
25
+#define TCG_TARGET_HAS_not_vec 1
25
} MMULookupPageData;
26
#define TCG_TARGET_HAS_neg_vec 0
26
@@ -XXX,XX +XXX,XX @@ typedef struct MMULookupLocals {
27
#define TCG_TARGET_HAS_abs_vec 0
27
static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
28
#define TCG_TARGET_HAS_shi_vec 0
28
int mmu_idx, MMUAccessType access_type, uintptr_t ra)
29
#define TCG_TARGET_HAS_shs_vec 0
30
#define TCG_TARGET_HAS_shv_vec 0
31
-#define TCG_TARGET_HAS_cmp_vec 0
32
+#define TCG_TARGET_HAS_cmp_vec 1
33
#define TCG_TARGET_HAS_mul_vec 0
34
#define TCG_TARGET_HAS_sat_vec 0
35
#define TCG_TARGET_HAS_minmax_vec 0
36
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/ppc/tcg-target.inc.c
39
+++ b/tcg/ppc/tcg-target.inc.c
40
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
41
ct->ct |= TCG_CT_REG;
42
ct->u.regs = 0xffffffff;
43
break;
44
+ case 'v':
45
+ ct->ct |= TCG_CT_REG;
46
+ ct->u.regs = 0xffffffff00000000ull;
47
+ break;
48
case 'L': /* qemu_ld constraint */
49
ct->ct |= TCG_CT_REG;
50
ct->u.regs = 0xffffffff;
51
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
52
53
#define NOP ORI /* ori 0,0,0 */
54
55
+#define LVX XO31(103)
56
+#define LVEBX XO31(7)
57
+#define LVEHX XO31(39)
58
+#define LVEWX XO31(71)
59
+
60
+#define STVX XO31(231)
61
+#define STVEWX XO31(199)
62
+
63
+#define VCMPEQUB VX4(6)
64
+#define VCMPEQUH VX4(70)
65
+#define VCMPEQUW VX4(134)
66
+#define VCMPGTSB VX4(774)
67
+#define VCMPGTSH VX4(838)
68
+#define VCMPGTSW VX4(902)
69
+#define VCMPGTUB VX4(518)
70
+#define VCMPGTUH VX4(582)
71
+#define VCMPGTUW VX4(646)
72
+
73
+#define VAND VX4(1028)
74
+#define VANDC VX4(1092)
75
+#define VNOR VX4(1284)
76
+#define VOR VX4(1156)
77
+#define VXOR VX4(1220)
78
+
79
+#define VSPLTB VX4(524)
80
+#define VSPLTH VX4(588)
81
+#define VSPLTW VX4(652)
82
+#define VSPLTISB VX4(780)
83
+#define VSPLTISH VX4(844)
84
+#define VSPLTISW VX4(908)
85
+
86
+#define VSLDOI VX4(44)
87
+
88
#define RT(r) ((r)<<21)
89
#define RS(r) ((r)<<21)
90
#define RA(r) ((r)<<16)
91
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
92
intptr_t value, intptr_t addend)
93
{
29
{
94
tcg_insn_unit *target;
30
- target_ulong addr = data->addr;
95
+ int16_t lo;
31
+ vaddr addr = data->addr;
96
+ int32_t hi;
32
uintptr_t index = tlb_index(env, mmu_idx, addr);
97
33
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
98
value += addend;
34
uint64_t tlb_addr = tlb_read_idx(entry, access_type);
99
target = (tcg_insn_unit *)value;
35
@@ -XXX,XX +XXX,XX @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
100
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
36
MMUAccessType access_type, uintptr_t ra)
101
}
102
*code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
103
break;
104
+ case R_PPC_ADDR32:
105
+ /*
106
+ * We are abusing this relocation type. Again, this points to
107
+ * a pair of insns, lis + load. This is an absolute address
108
+ * relocation for PPC32 so the lis cannot be removed.
109
+ */
110
+ lo = value;
111
+ hi = value - lo;
112
+ if (hi + lo != value) {
113
+ return false;
114
+ }
115
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
116
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
117
+ break;
118
default:
119
g_assert_not_reached();
120
}
121
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
122
123
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
124
{
37
{
125
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
38
CPUTLBEntryFull *full = data->full;
126
- if (ret != arg) {
39
- target_ulong addr = data->addr;
127
- tcg_out32(s, OR | SAB(arg, ret, arg));
40
+ vaddr addr = data->addr;
128
+ if (ret == arg) {
41
int flags = data->flags;
129
+ return true;
42
int size = data->size;
130
+ }
43
131
+ switch (type) {
44
@@ -XXX,XX +XXX,XX @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
132
+ case TCG_TYPE_I64:
45
* Resolve the translation for the page(s) beginning at @addr, for MemOp.size
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
46
* bytes. Return true if the lookup crosses a page boundary.
134
+ /* fallthru */
47
*/
135
+ case TCG_TYPE_I32:
48
-static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
136
+ if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
49
+static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
137
+ tcg_out32(s, OR | SAB(arg, ret, arg));
50
uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
138
+ break;
51
{
139
+ } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
52
unsigned a_bits;
140
+ /* Altivec does not support vector/integer moves. */
53
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
141
+ return false;
54
MMUAccessType type, uintptr_t ra)
142
+ }
55
{
143
+ /* fallthru */
56
CPUTLBEntryFull *full = p->full;
144
+ case TCG_TYPE_V64:
57
- target_ulong addr = p->addr;
145
+ case TCG_TYPE_V128:
58
+ vaddr addr = p->addr;
146
+ tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
59
int i, size = p->size;
147
+ tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
60
148
+ break;
61
QEMU_IOTHREAD_LOCK_GUARD();
149
+ default:
62
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
150
+ g_assert_not_reached();
63
return ret;
151
}
152
return true;
153
}
64
}
154
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
65
155
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
66
-static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
156
tcg_target_long val)
67
+static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
68
uintptr_t ra, MMUAccessType access_type)
157
{
69
{
158
- g_assert_not_reached();
70
MMULookupLocals l;
159
+ uint32_t load_insn;
71
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
160
+ int rel, low;
72
return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
161
+ intptr_t add;
162
+
163
+ low = (int8_t)val;
164
+ if (low >= -16 && low < 16) {
165
+ if (val == (tcg_target_long)dup_const(MO_8, low)) {
166
+ tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
167
+ return;
168
+ }
169
+ if (val == (tcg_target_long)dup_const(MO_16, low)) {
170
+ tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
171
+ return;
172
+ }
173
+ if (val == (tcg_target_long)dup_const(MO_32, low)) {
174
+ tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
175
+ return;
176
+ }
177
+ }
178
+
179
+ /*
180
+ * Otherwise we must load the value from the constant pool.
181
+ */
182
+ if (USE_REG_TB) {
183
+ rel = R_PPC_ADDR16;
184
+ add = -(intptr_t)s->code_gen_ptr;
185
+ } else {
186
+ rel = R_PPC_ADDR32;
187
+ add = 0;
188
+ }
189
+
190
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
191
+ if (TCG_TARGET_REG_BITS == 64) {
192
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
193
+ } else {
194
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
195
+ }
196
+
197
+ if (USE_REG_TB) {
198
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
199
+ load_insn |= RA(TCG_REG_TB);
200
+ } else {
201
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
202
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
203
+ }
204
+ tcg_out32(s, load_insn);
205
}
73
}
206
74
207
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
75
-static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
208
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
76
+static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
209
align = 3;
77
uintptr_t ra, MMUAccessType access_type)
210
/* FALLTHRU */
78
{
211
default:
79
MMULookupLocals l;
212
- if (rt != TCG_REG_R0) {
80
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
213
+ if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
81
return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
214
rs = rt;
215
break;
216
}
217
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
218
}
219
220
/* For unaligned, or very large offsets, use the indexed form. */
221
- if (offset & align || offset != (int32_t)offset) {
222
+ if (offset & align || offset != (int32_t)offset || opi == 0) {
223
if (rs == base) {
224
rs = TCG_REG_R0;
225
}
226
tcg_debug_assert(!is_store || rs != rt);
227
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
228
- tcg_out32(s, opx | TAB(rt, base, rs));
229
+ tcg_out32(s, opx | TAB(rt & 31, base, rs));
230
return;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
234
base = rs;
235
}
236
if (opi != ADDI || base != rt || l0 != 0) {
237
- tcg_out32(s, opi | TAI(rt, base, l0));
238
+ tcg_out32(s, opi | TAI(rt & 31, base, l0));
239
}
240
}
82
}
241
83
242
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
84
-static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
243
- TCGReg arg1, intptr_t arg2)
85
+static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
244
+static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
86
uintptr_t ra, MMUAccessType access_type)
245
+ TCGReg va, TCGReg vb, int shb)
246
{
87
{
247
- int opi, opx;
88
MMULookupLocals l;
248
-
89
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
249
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
90
return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
250
- if (type == TCG_TYPE_I32) {
251
- opi = LWZ, opx = LWZX;
252
- } else {
253
- opi = LD, opx = LDX;
254
- }
255
- tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
256
+ tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
257
}
91
}
258
92
259
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
93
-static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
260
- TCGReg arg1, intptr_t arg2)
94
+static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
261
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
95
uintptr_t ra, MMUAccessType access_type)
262
+ TCGReg base, intptr_t offset)
263
{
96
{
264
- int opi, opx;
97
MMULookupLocals l;
265
+ int shift;
98
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
266
99
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
267
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
268
- if (type == TCG_TYPE_I32) {
269
- opi = STW, opx = STWX;
270
- } else {
271
- opi = STD, opx = STDX;
272
+ switch (type) {
273
+ case TCG_TYPE_I32:
274
+ if (ret < TCG_REG_V0) {
275
+ tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
276
+ break;
277
+ }
278
+ tcg_debug_assert((offset & 3) == 0);
279
+ tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
280
+ shift = (offset - 4) & 0xc;
281
+ if (shift) {
282
+ tcg_out_vsldoi(s, ret, ret, ret, shift);
283
+ }
284
+ break;
285
+ case TCG_TYPE_I64:
286
+ if (ret < TCG_REG_V0) {
287
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
288
+ tcg_out_mem_long(s, LD, LDX, ret, base, offset);
289
+ break;
290
+ }
291
+ /* fallthru */
292
+ case TCG_TYPE_V64:
293
+ tcg_debug_assert(ret >= TCG_REG_V0);
294
+ tcg_debug_assert((offset & 7) == 0);
295
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
296
+ if (offset & 8) {
297
+ tcg_out_vsldoi(s, ret, ret, ret, 8);
298
+ }
299
+ break;
300
+ case TCG_TYPE_V128:
301
+ tcg_debug_assert(ret >= TCG_REG_V0);
302
+ tcg_debug_assert((offset & 15) == 0);
303
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset);
304
+ break;
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
+}
309
+
310
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
311
+ TCGReg base, intptr_t offset)
312
+{
313
+ int shift;
314
+
315
+ switch (type) {
316
+ case TCG_TYPE_I32:
317
+ if (arg < TCG_REG_V0) {
318
+ tcg_out_mem_long(s, STW, STWX, arg, base, offset);
319
+ break;
320
+ }
321
+ tcg_debug_assert((offset & 3) == 0);
322
+ shift = (offset - 4) & 0xc;
323
+ if (shift) {
324
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
325
+ arg = TCG_VEC_TMP1;
326
+ }
327
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
328
+ break;
329
+ case TCG_TYPE_I64:
330
+ if (arg < TCG_REG_V0) {
331
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
332
+ tcg_out_mem_long(s, STD, STDX, arg, base, offset);
333
+ break;
334
+ }
335
+ /* fallthru */
336
+ case TCG_TYPE_V64:
337
+ tcg_debug_assert(arg >= TCG_REG_V0);
338
+ tcg_debug_assert((offset & 7) == 0);
339
+ if (offset & 8) {
340
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
341
+ arg = TCG_VEC_TMP1;
342
+ }
343
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
344
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
345
+ break;
346
+ case TCG_TYPE_V128:
347
+ tcg_debug_assert(arg >= TCG_REG_V0);
348
+ tcg_out_mem_long(s, 0, STVX, arg, base, offset);
349
+ break;
350
+ default:
351
+ g_assert_not_reached();
352
}
353
- tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
354
}
100
}
355
101
356
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
102
-static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
357
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
103
+static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
358
104
MemOpIdx oi, uintptr_t ra)
359
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
360
{
105
{
361
- g_assert_not_reached();
106
MMULookupLocals l;
362
+ switch (opc) {
107
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
363
+ case INDEX_op_and_vec:
108
uint64_t val_le, int mmu_idx, uintptr_t ra)
364
+ case INDEX_op_or_vec:
109
{
365
+ case INDEX_op_xor_vec:
110
CPUTLBEntryFull *full = p->full;
366
+ case INDEX_op_andc_vec:
111
- target_ulong addr = p->addr;
367
+ case INDEX_op_not_vec:
112
+ vaddr addr = p->addr;
368
+ return 1;
113
int i, size = p->size;
369
+ case INDEX_op_cmp_vec:
114
370
+ return vece <= MO_32 ? -1 : 0;
115
QEMU_IOTHREAD_LOCK_GUARD();
371
+ default:
116
@@ -XXX,XX +XXX,XX @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
372
+ return 0;
117
do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
373
+ }
374
}
118
}
375
119
376
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
120
-static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
377
TCGReg dst, TCGReg src)
121
+static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val,
122
MemOpIdx oi, uintptr_t ra)
378
{
123
{
379
- g_assert_not_reached();
124
MMULookupLocals l;
380
+ tcg_debug_assert(dst >= TCG_REG_V0);
125
@@ -XXX,XX +XXX,XX @@ void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
381
+ tcg_debug_assert(src >= TCG_REG_V0);
126
do_st2_mmu(env, addr, val, oi, retaddr);
382
+
383
+ /*
384
+ * Recall we use (or emulate) VSX integer loads, so the integer is
385
+ * right justified within the left (zero-index) double-word.
386
+ */
387
+ switch (vece) {
388
+ case MO_8:
389
+ tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
390
+ break;
391
+ case MO_16:
392
+ tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
393
+ break;
394
+ case MO_32:
395
+ tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
396
+ break;
397
+ case MO_64:
398
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
399
+ tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
400
+ break;
401
+ default:
402
+ g_assert_not_reached();
403
+ }
404
+ return true;
405
}
127
}
406
128
407
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129
-static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
408
TCGReg out, TCGReg base, intptr_t offset)
130
+static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val,
131
MemOpIdx oi, uintptr_t ra)
409
{
132
{
410
- g_assert_not_reached();
133
MMULookupLocals l;
411
+ int elt;
134
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
412
+
135
do_st4_mmu(env, addr, val, oi, retaddr);
413
+ tcg_debug_assert(out >= TCG_REG_V0);
414
+ switch (vece) {
415
+ case MO_8:
416
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
417
+ elt = extract32(offset, 0, 4);
418
+#ifndef HOST_WORDS_BIGENDIAN
419
+ elt ^= 15;
420
+#endif
421
+ tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
422
+ break;
423
+ case MO_16:
424
+ tcg_debug_assert((offset & 1) == 0);
425
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
426
+ elt = extract32(offset, 1, 3);
427
+#ifndef HOST_WORDS_BIGENDIAN
428
+ elt ^= 7;
429
+#endif
430
+ tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
431
+ break;
432
+ case MO_32:
433
+ tcg_debug_assert((offset & 3) == 0);
434
+ tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
435
+ elt = extract32(offset, 2, 2);
436
+#ifndef HOST_WORDS_BIGENDIAN
437
+ elt ^= 3;
438
+#endif
439
+ tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
440
+ break;
441
+ case MO_64:
442
+ tcg_debug_assert((offset & 7) == 0);
443
+ tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
444
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
445
+ elt = extract32(offset, 3, 1);
446
+#ifndef HOST_WORDS_BIGENDIAN
447
+ elt = !elt;
448
+#endif
449
+ if (elt) {
450
+ tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
451
+ } else {
452
+ tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
453
+ }
454
+ break;
455
+ default:
456
+ g_assert_not_reached();
457
+ }
458
+ return true;
459
}
136
}
460
137
461
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
138
-static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
462
unsigned vecl, unsigned vece,
139
+static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val,
463
const TCGArg *args, const int *const_args)
140
MemOpIdx oi, uintptr_t ra)
464
{
141
{
465
- g_assert_not_reached();
142
MMULookupLocals l;
466
+ static const uint32_t
143
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
467
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
144
do_st8_mmu(env, addr, val, oi, retaddr);
468
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
469
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
470
+
471
+ TCGType type = vecl + TCG_TYPE_V64;
472
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
473
+ uint32_t insn;
474
+
475
+ switch (opc) {
476
+ case INDEX_op_ld_vec:
477
+ tcg_out_ld(s, type, a0, a1, a2);
478
+ return;
479
+ case INDEX_op_st_vec:
480
+ tcg_out_st(s, type, a0, a1, a2);
481
+ return;
482
+ case INDEX_op_dupm_vec:
483
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
484
+ return;
485
+
486
+ case INDEX_op_and_vec:
487
+ insn = VAND;
488
+ break;
489
+ case INDEX_op_or_vec:
490
+ insn = VOR;
491
+ break;
492
+ case INDEX_op_xor_vec:
493
+ insn = VXOR;
494
+ break;
495
+ case INDEX_op_andc_vec:
496
+ insn = VANDC;
497
+ break;
498
+ case INDEX_op_not_vec:
499
+ insn = VNOR;
500
+ a2 = a1;
501
+ break;
502
+
503
+ case INDEX_op_cmp_vec:
504
+ switch (args[3]) {
505
+ case TCG_COND_EQ:
506
+ insn = eq_op[vece];
507
+ break;
508
+ case TCG_COND_GT:
509
+ insn = gts_op[vece];
510
+ break;
511
+ case TCG_COND_GTU:
512
+ insn = gtu_op[vece];
513
+ break;
514
+ default:
515
+ g_assert_not_reached();
516
+ }
517
+ break;
518
+
519
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
520
+ case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
521
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
522
+ default:
523
+ g_assert_not_reached();
524
+ }
525
+
526
+ tcg_debug_assert(insn != 0);
527
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
528
+}
529
+
530
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
531
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
532
+{
533
+ bool need_swap = false, need_inv = false;
534
+
535
+ tcg_debug_assert(vece <= MO_32);
536
+
537
+ switch (cond) {
538
+ case TCG_COND_EQ:
539
+ case TCG_COND_GT:
540
+ case TCG_COND_GTU:
541
+ break;
542
+ case TCG_COND_NE:
543
+ case TCG_COND_LE:
544
+ case TCG_COND_LEU:
545
+ need_inv = true;
546
+ break;
547
+ case TCG_COND_LT:
548
+ case TCG_COND_LTU:
549
+ need_swap = true;
550
+ break;
551
+ case TCG_COND_GE:
552
+ case TCG_COND_GEU:
553
+ need_swap = need_inv = true;
554
+ break;
555
+ default:
556
+ g_assert_not_reached();
557
+ }
558
+
559
+ if (need_inv) {
560
+ cond = tcg_invert_cond(cond);
561
+ }
562
+ if (need_swap) {
563
+ TCGv_vec t1;
564
+ t1 = v1, v1 = v2, v2 = t1;
565
+ cond = tcg_swap_cond(cond);
566
+ }
567
+
568
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
569
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
570
+
571
+ if (need_inv) {
572
+ tcg_gen_not_vec(vece, v0, v0);
573
+ }
574
}
145
}
575
146
576
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
147
-static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
577
TCGArg a0, ...)
148
+static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
149
MemOpIdx oi, uintptr_t ra)
578
{
150
{
579
- g_assert_not_reached();
151
MMULookupLocals l;
580
+ va_list va;
581
+ TCGv_vec v0, v1, v2;
582
+
583
+ va_start(va, a0);
584
+ v0 = temp_tcgv_vec(arg_temp(a0));
585
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
586
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
587
+
588
+ switch (opc) {
589
+ case INDEX_op_cmp_vec:
590
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
591
+ break;
592
+ default:
593
+ g_assert_not_reached();
594
+ }
595
+ va_end(va);
596
}
597
598
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
599
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
600
= { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
601
static const TCGTargetOpDef sub2
602
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
603
+ static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
604
+ static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
605
+ static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
606
607
switch (op) {
608
case INDEX_op_goto_ptr:
609
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
610
return (TCG_TARGET_REG_BITS == 64 ? &S_S
611
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
612
613
+ case INDEX_op_and_vec:
614
+ case INDEX_op_or_vec:
615
+ case INDEX_op_xor_vec:
616
+ case INDEX_op_andc_vec:
617
+ case INDEX_op_orc_vec:
618
+ case INDEX_op_cmp_vec:
619
+ return &v_v_v;
620
+ case INDEX_op_not_vec:
621
+ case INDEX_op_dup_vec:
622
+ return &v_v;
623
+ case INDEX_op_ld_vec:
624
+ case INDEX_op_st_vec:
625
+ case INDEX_op_dupm_vec:
626
+ return &v_r;
627
+
628
default:
629
return NULL;
630
}
631
--
152
--
632
2.17.1
153
2.34.1
633
634
diff view generated by jsdifflib
1
Add support for vector saturated add/subtract using Altivec
1
From: Anton Johansson <anjo@rev.ng>
2
instructions:
3
VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and
4
VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS.
5
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230621135633.1649-7-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
7
---
9
tcg/ppc/tcg-target.h | 2 +-
8
accel/tcg/cpu-exec.c | 34 +++++++++++++++++-----------------
10
tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++
9
1 file changed, 17 insertions(+), 17 deletions(-)
11
2 files changed, 37 insertions(+), 1 deletion(-)
12
10
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
13
--- a/accel/tcg/cpu-exec.c
16
+++ b/tcg/ppc/tcg-target.h
14
+++ b/accel/tcg/cpu-exec.c
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
15
@@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu)
18
#define TCG_TARGET_HAS_shv_vec 0
16
}
19
#define TCG_TARGET_HAS_cmp_vec 1
17
20
#define TCG_TARGET_HAS_mul_vec 0
18
struct tb_desc {
21
-#define TCG_TARGET_HAS_sat_vec 0
19
- target_ulong pc;
22
+#define TCG_TARGET_HAS_sat_vec 1
20
- target_ulong cs_base;
23
#define TCG_TARGET_HAS_minmax_vec 1
21
+ vaddr pc;
24
#define TCG_TARGET_HAS_bitsel_vec 0
22
+ uint64_t cs_base;
25
#define TCG_TARGET_HAS_cmpsel_vec 0
23
CPUArchState *env;
26
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
24
tb_page_addr_t page_addr0;
27
index XXXXXXX..XXXXXXX 100644
25
uint32_t flags;
28
--- a/tcg/ppc/tcg-target.inc.c
26
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
29
+++ b/tcg/ppc/tcg-target.inc.c
27
return true;
30
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
28
} else {
31
#define STVX XO31(231)
29
tb_page_addr_t phys_page1;
32
#define STVEWX XO31(199)
30
- target_ulong virt_page1;
33
31
+ vaddr virt_page1;
34
+#define VADDSBS VX4(768)
32
35
+#define VADDUBS VX4(512)
33
/*
36
#define VADDUBM VX4(0)
34
* We know that the first page matched, and an otherwise valid TB
37
+#define VADDSHS VX4(832)
35
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
38
+#define VADDUHS VX4(576)
36
return false;
39
#define VADDUHM VX4(64)
37
}
40
+#define VADDSWS VX4(896)
38
41
+#define VADDUWS VX4(640)
39
-static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
42
#define VADDUWM VX4(128)
40
- target_ulong cs_base, uint32_t flags,
43
41
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
44
+#define VSUBSBS VX4(1792)
42
+ uint64_t cs_base, uint32_t flags,
45
+#define VSUBUBS VX4(1536)
43
uint32_t cflags)
46
#define VSUBUBM VX4(1024)
44
{
47
+#define VSUBSHS VX4(1856)
45
tb_page_addr_t phys_pc;
48
+#define VSUBUHS VX4(1600)
46
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
49
#define VSUBUHM VX4(1088)
47
}
50
+#define VSUBSWS VX4(1920)
48
51
+#define VSUBUWS VX4(1664)
49
/* Might cause an exception, so have a longjmp destination ready */
52
#define VSUBUWM VX4(1152)
50
-static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
53
51
- target_ulong cs_base,
54
#define VMAXSB VX4(258)
52
- uint32_t flags, uint32_t cflags)
55
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
53
+static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
56
case INDEX_op_smin_vec:
54
+ uint64_t cs_base, uint32_t flags,
57
case INDEX_op_umax_vec:
55
+ uint32_t cflags)
58
case INDEX_op_umin_vec:
56
{
59
+ case INDEX_op_ssadd_vec:
57
TranslationBlock *tb;
60
+ case INDEX_op_sssub_vec:
58
CPUJumpCache *jc;
61
+ case INDEX_op_usadd_vec:
59
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
62
+ case INDEX_op_ussub_vec:
60
return tb;
63
return vece <= MO_32;
61
}
64
case INDEX_op_cmp_vec:
62
65
return vece <= MO_32 ? -1 : 0;
63
-static void log_cpu_exec(target_ulong pc, CPUState *cpu,
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
64
+static void log_cpu_exec(vaddr pc, CPUState *cpu,
67
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
65
const TranslationBlock *tb)
68
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
66
{
69
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
67
if (qemu_log_in_addr_range(pc)) {
70
+ ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
68
qemu_log_mask(CPU_LOG_EXEC,
71
+ usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
69
"Trace %d: %p [%08" PRIx64
72
+ sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
70
- "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
73
+ ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
71
+ "/%" VADDR_PRIx "/%08x/%08x] %s\n",
74
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
72
cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
75
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
73
tb->flags, tb->cflags, lookup_symbol(pc));
76
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
74
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
@@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
78
case INDEX_op_sub_vec:
76
}
79
insn = sub_op[vece];
77
}
80
break;
78
81
+ case INDEX_op_ssadd_vec:
79
-static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
82
+ insn = ssadd_op[vece];
80
+static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
83
+ break;
81
uint32_t *cflags)
84
+ case INDEX_op_sssub_vec:
82
{
85
+ insn = sssub_op[vece];
83
CPUBreakpoint *bp;
86
+ break;
84
@@ -XXX,XX +XXX,XX @@ static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
87
+ case INDEX_op_usadd_vec:
85
return false;
88
+ insn = usadd_op[vece];
86
}
89
+ break;
87
90
+ case INDEX_op_ussub_vec:
88
-static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
91
+ insn = ussub_op[vece];
89
+static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
92
+ break;
90
uint32_t *cflags)
93
case INDEX_op_smin_vec:
91
{
94
insn = smin_op[vece];
92
return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
95
break;
93
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
96
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
cc->set_pc(cpu, last_tb->pc);
97
case INDEX_op_andc_vec:
95
}
98
case INDEX_op_orc_vec:
96
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
99
case INDEX_op_cmp_vec:
97
- target_ulong pc = log_pc(cpu, last_tb);
100
+ case INDEX_op_ssadd_vec:
98
+ vaddr pc = log_pc(cpu, last_tb);
101
+ case INDEX_op_sssub_vec:
99
if (qemu_log_in_addr_range(pc)) {
102
+ case INDEX_op_usadd_vec:
100
- qemu_log("Stopped execution of TB chain before %p ["
103
+ case INDEX_op_ussub_vec:
101
- TARGET_FMT_lx "] %s\n",
104
case INDEX_op_smax_vec:
102
+ qemu_log("Stopped execution of TB chain before %p [%"
105
case INDEX_op_smin_vec:
103
+ VADDR_PRIx "] %s\n",
106
case INDEX_op_umax_vec:
104
last_tb->tc.ptr, pc, lookup_symbol(pc));
105
}
106
}
107
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
108
}
109
110
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
111
- target_ulong pc,
112
- TranslationBlock **last_tb, int *tb_exit)
113
+ vaddr pc, TranslationBlock **last_tb,
114
+ int *tb_exit)
115
{
116
int32_t insns_left;
117
107
--
118
--
108
2.17.1
119
2.34.1
109
110
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VEC and
1
From: Anton Johansson <anjo@rev.ng>
2
are thus part of the Altivec instruction set, and not VSX.
3
This includes negation and compare not equal.
4
2
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
Related functions dealing with the jump cache are also updated.
4
5
Signed-off-by: Anton Johansson <anjo@rev.ng>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230621135633.1649-8-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
9
---
8
tcg/ppc/tcg-target.h | 2 +-
10
accel/tcg/tb-hash.h | 12 ++++++------
9
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++
11
accel/tcg/tb-jmp-cache.h | 2 +-
10
2 files changed, 24 insertions(+), 1 deletion(-)
12
accel/tcg/cputlb.c | 2 +-
13
3 files changed, 8 insertions(+), 8 deletions(-)
11
14
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
15
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
17
--- a/accel/tcg/tb-hash.h
15
+++ b/tcg/ppc/tcg-target.h
18
+++ b/accel/tcg/tb-hash.h
16
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
19
@@ -XXX,XX +XXX,XX @@
17
#define TCG_TARGET_HAS_andc_vec 1
20
#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
18
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
21
#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
19
#define TCG_TARGET_HAS_not_vec 1
22
20
-#define TCG_TARGET_HAS_neg_vec 0
23
-static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
21
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
24
+static inline unsigned int tb_jmp_cache_hash_page(vaddr pc)
22
#define TCG_TARGET_HAS_abs_vec 0
25
{
23
#define TCG_TARGET_HAS_shi_vec 0
26
- target_ulong tmp;
24
#define TCG_TARGET_HAS_shs_vec 0
27
+ vaddr tmp;
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
28
tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
29
return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK;
30
}
31
32
-static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
33
+static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
34
{
35
- target_ulong tmp;
36
+ vaddr tmp;
37
tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
38
return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK)
39
| (tmp & TB_JMP_ADDR_MASK));
40
@@ -XXX,XX +XXX,XX @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
41
#else
42
43
/* In user-mode we can get better hashing because we do not have a TLB */
44
-static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
45
+static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
46
{
47
return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
48
}
49
@@ -XXX,XX +XXX,XX @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
50
#endif /* CONFIG_SOFTMMU */
51
52
static inline
53
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
54
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, vaddr pc,
55
uint32_t flags, uint64_t flags2, uint32_t cf_mask)
56
{
57
return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask);
58
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
26
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
60
--- a/accel/tcg/tb-jmp-cache.h
28
+++ b/tcg/ppc/tcg-target.inc.c
61
+++ b/accel/tcg/tb-jmp-cache.h
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
62
@@ -XXX,XX +XXX,XX @@ struct CPUJumpCache {
30
#define VSUBUWM VX4(1152)
63
struct rcu_head rcu;
31
#define VSUBUDM VX4(1216) /* v2.07 */
64
struct {
32
65
TranslationBlock *tb;
33
+#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
66
- target_ulong pc;
34
+#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
67
+ vaddr pc;
35
+
68
} array[TB_JMP_CACHE_SIZE];
36
#define VMAXSB VX4(258)
69
};
37
#define VMAXSH VX4(322)
70
38
#define VMAXSW VX4(386)
71
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
39
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
72
index XXXXXXX..XXXXXXX 100644
40
#define VCMPGTUH VX4(582)
73
--- a/accel/tcg/cputlb.c
41
#define VCMPGTUW VX4(646)
74
+++ b/accel/tcg/cputlb.c
42
#define VCMPGTUD VX4(711) /* v2.07 */
75
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
43
+#define VCMPNEB VX4(7) /* v3.00 */
76
desc->window_max_entries = max_entries;
44
+#define VCMPNEH VX4(71) /* v3.00 */
77
}
45
+#define VCMPNEW VX4(135) /* v3.00 */
78
46
79
-static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
47
#define VSLB VX4(260)
80
+static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr)
48
#define VSLH VX4(324)
81
{
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
82
CPUJumpCache *jc = cpu->tb_jmp_cache;
50
case INDEX_op_shri_vec:
83
int i, i0;
51
case INDEX_op_sari_vec:
52
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
53
+ case INDEX_op_neg_vec:
54
+ return vece >= MO_32 && have_isa_3_00;
55
case INDEX_op_mul_vec:
56
switch (vece) {
57
case MO_8:
58
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
59
static const uint32_t
60
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
61
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
62
+ neg_op[4] = { 0, 0, VNEGW, VNEGD },
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
64
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
65
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
66
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
67
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
69
case INDEX_op_sub_vec:
70
insn = sub_op[vece];
71
break;
72
+ case INDEX_op_neg_vec:
73
+ insn = neg_op[vece];
74
+ a2 = a1;
75
+ a1 = 0;
76
+ break;
77
case INDEX_op_mul_vec:
78
tcg_debug_assert(vece == MO_32 && have_isa_2_07);
79
insn = VMULUWM;
80
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
81
case TCG_COND_EQ:
82
insn = eq_op[vece];
83
break;
84
+ case TCG_COND_NE:
85
+ insn = ne_op[vece];
86
+ break;
87
case TCG_COND_GT:
88
insn = gts_op[vece];
89
break;
90
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
91
case TCG_COND_GTU:
92
break;
93
case TCG_COND_NE:
94
+ if (have_isa_3_00 && vece <= MO_32) {
95
+ break;
96
+ }
97
+ /* fall through */
98
case TCG_COND_LE:
99
case TCG_COND_LEU:
100
need_inv = true;
101
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
102
case INDEX_op_dup2_vec:
103
return &v_v_v;
104
case INDEX_op_not_vec:
105
+ case INDEX_op_neg_vec:
106
case INDEX_op_dup_vec:
107
return &v_v;
108
case INDEX_op_ld_vec:
109
--
84
--
110
2.17.1
85
2.34.1
111
112
diff view generated by jsdifflib
1
For Altivec, this is always an expansion.
1
From: Anton Johansson <anjo@rev.ng>
2
2
3
Functions for probing memory accesses (and functions that call these)
4
are updated to take a vaddr for guest virtual addresses over
5
target_ulong.
6
7
Signed-off-by: Anton Johansson <anjo@rev.ng>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230621135633.1649-9-anjo@rev.ng>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
11
---
6
tcg/ppc/tcg-target.h | 2 +-
12
include/exec/exec-all.h | 14 +++++++-------
7
tcg/ppc/tcg-target.opc.h | 8 +++
13
accel/stubs/tcg-stub.c | 4 ++--
8
tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++-
14
accel/tcg/cputlb.c | 12 ++++++------
9
3 files changed, 121 insertions(+), 2 deletions(-)
15
accel/tcg/user-exec.c | 8 ++++----
16
4 files changed, 19 insertions(+), 19 deletions(-)
10
17
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
18
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
12
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
20
--- a/include/exec/exec-all.h
14
+++ b/tcg/ppc/tcg-target.h
21
+++ b/include/exec/exec-all.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
22
@@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
16
#define TCG_TARGET_HAS_shs_vec 0
23
* Finally, return the host address for a page that is backed by RAM,
17
#define TCG_TARGET_HAS_shv_vec 1
24
* or NULL if the page requires I/O.
18
#define TCG_TARGET_HAS_cmp_vec 1
25
*/
19
-#define TCG_TARGET_HAS_mul_vec 0
26
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
20
+#define TCG_TARGET_HAS_mul_vec 1
27
+void *probe_access(CPUArchState *env, vaddr addr, int size,
21
#define TCG_TARGET_HAS_sat_vec 1
28
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr);
22
#define TCG_TARGET_HAS_minmax_vec 1
29
23
#define TCG_TARGET_HAS_bitsel_vec 0
30
-static inline void *probe_write(CPUArchState *env, target_ulong addr, int size,
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
31
+static inline void *probe_write(CPUArchState *env, vaddr addr, int size,
32
int mmu_idx, uintptr_t retaddr)
33
{
34
return probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
35
}
36
37
-static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
38
+static inline void *probe_read(CPUArchState *env, vaddr addr, int size,
39
int mmu_idx, uintptr_t retaddr)
40
{
41
return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
42
@@ -XXX,XX +XXX,XX @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
43
* Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
44
* For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
45
*/
46
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
47
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
48
MMUAccessType access_type, int mmu_idx,
49
bool nonfault, void **phost, uintptr_t retaddr);
50
51
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
52
* and must be consumed or copied immediately, before any further
53
* access or changes to TLB @mmu_idx.
54
*/
55
-int probe_access_full(CPUArchState *env, target_ulong addr, int size,
56
+int probe_access_full(CPUArchState *env, vaddr addr, int size,
57
MMUAccessType access_type, int mmu_idx,
58
bool nonfault, void **phost,
59
CPUTLBEntryFull **pfull, uintptr_t retaddr);
60
@@ -XXX,XX +XXX,XX @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
61
*
62
* Note: this function can trigger an exception.
63
*/
64
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
65
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
66
void **hostp);
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
70
* Note: this function can trigger an exception.
71
*/
72
static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
73
- target_ulong addr)
74
+ vaddr addr)
75
{
76
return get_page_addr_code_hostp(env, addr, NULL);
77
}
78
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
25
index XXXXXXX..XXXXXXX 100644
79
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.opc.h
80
--- a/accel/stubs/tcg-stub.c
27
+++ b/tcg/ppc/tcg-target.opc.h
81
+++ b/accel/stubs/tcg-stub.c
28
@@ -XXX,XX +XXX,XX @@
82
@@ -XXX,XX +XXX,XX @@ void tcg_flush_jmp_cache(CPUState *cpu)
29
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
83
{
30
* consider these to be UNSPEC with names.
84
}
31
*/
85
32
+
86
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
33
+DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
87
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
34
+DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
88
MMUAccessType access_type, int mmu_idx,
35
+DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
89
bool nonfault, void **phost, uintptr_t retaddr)
36
+DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
90
{
37
+DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
91
g_assert_not_reached();
38
+DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
92
}
39
+DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
93
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
94
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
95
+void *probe_access(CPUArchState *env, vaddr addr, int size,
96
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
97
{
98
/* Handled by hardware accelerator. */
99
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
41
index XXXXXXX..XXXXXXX 100644
100
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
101
--- a/accel/tcg/cputlb.c
43
+++ b/tcg/ppc/tcg-target.inc.c
102
+++ b/accel/tcg/cputlb.c
44
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
103
@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
45
#define VSRAB VX4(772)
46
#define VSRAH VX4(836)
47
#define VSRAW VX4(900)
48
+#define VRLB VX4(4)
49
+#define VRLH VX4(68)
50
+#define VRLW VX4(132)
51
+
52
+#define VMULEUB VX4(520)
53
+#define VMULEUH VX4(584)
54
+#define VMULOUB VX4(8)
55
+#define VMULOUH VX4(72)
56
+#define VMSUMUHM VX4(38)
57
+
58
+#define VMRGHB VX4(12)
59
+#define VMRGHH VX4(76)
60
+#define VMRGHW VX4(140)
61
+#define VMRGLB VX4(268)
62
+#define VMRGLH VX4(332)
63
+#define VMRGLW VX4(396)
64
+
65
+#define VPKUHUM VX4(14)
66
+#define VPKUWUM VX4(78)
67
68
#define VAND VX4(1028)
69
#define VANDC VX4(1092)
70
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
71
case INDEX_op_sarv_vec:
72
return vece <= MO_32;
73
case INDEX_op_cmp_vec:
74
+ case INDEX_op_mul_vec:
75
case INDEX_op_shli_vec:
76
case INDEX_op_shri_vec:
77
case INDEX_op_sari_vec:
78
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
79
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
80
shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
81
shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
82
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
83
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
84
+ mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
85
+ mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
86
+ muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
87
+ mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
88
+ pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
89
+ rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
90
91
TCGType type = vecl + TCG_TYPE_V64;
92
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
93
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
94
}
95
break;
96
97
+ case INDEX_op_ppc_mrgh_vec:
98
+ insn = mrgh_op[vece];
99
+ break;
100
+ case INDEX_op_ppc_mrgl_vec:
101
+ insn = mrgl_op[vece];
102
+ break;
103
+ case INDEX_op_ppc_muleu_vec:
104
+ insn = muleu_op[vece];
105
+ break;
106
+ case INDEX_op_ppc_mulou_vec:
107
+ insn = mulou_op[vece];
108
+ break;
109
+ case INDEX_op_ppc_pkum_vec:
110
+ insn = pkum_op[vece];
111
+ break;
112
+ case INDEX_op_ppc_rotl_vec:
113
+ insn = rotl_op[vece];
114
+ break;
115
+ case INDEX_op_ppc_msum_vec:
116
+ tcg_debug_assert(vece == MO_16);
117
+ tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
118
+ return;
119
+
120
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
121
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
122
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
123
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
124
}
104
}
125
}
105
}
126
106
127
+static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
107
-static int probe_access_internal(CPUArchState *env, target_ulong addr,
128
+ TCGv_vec v1, TCGv_vec v2)
108
+static int probe_access_internal(CPUArchState *env, vaddr addr,
129
+{
109
int fault_size, MMUAccessType access_type,
130
+ TCGv_vec t1 = tcg_temp_new_vec(type);
110
int mmu_idx, bool nonfault,
131
+ TCGv_vec t2 = tcg_temp_new_vec(type);
111
void **phost, CPUTLBEntryFull **pfull,
132
+ TCGv_vec t3, t4;
112
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
133
+
113
uintptr_t index = tlb_index(env, mmu_idx, addr);
134
+ switch (vece) {
114
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
135
+ case MO_8:
115
uint64_t tlb_addr = tlb_read_idx(entry, access_type);
136
+ case MO_16:
116
- target_ulong page_addr = addr & TARGET_PAGE_MASK;
137
+ vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
117
+ vaddr page_addr = addr & TARGET_PAGE_MASK;
138
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
118
int flags = TLB_FLAGS_MASK;
139
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
119
140
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
120
if (!tlb_hit_page(tlb_addr, page_addr)) {
141
+ vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
121
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
142
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
122
return flags;
143
+ vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
123
}
144
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
124
145
+ vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
125
-int probe_access_full(CPUArchState *env, target_ulong addr, int size,
146
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1));
126
+int probe_access_full(CPUArchState *env, vaddr addr, int size,
147
+    break;
127
MMUAccessType access_type, int mmu_idx,
148
+
128
bool nonfault, void **phost, CPUTLBEntryFull **pfull,
149
+ case MO_32:
129
uintptr_t retaddr)
150
+ t3 = tcg_temp_new_vec(type);
130
@@ -XXX,XX +XXX,XX @@ int probe_access_full(CPUArchState *env, target_ulong addr, int size,
151
+ t4 = tcg_temp_new_vec(type);
131
return flags;
152
+ tcg_gen_dupi_vec(MO_8, t4, -16);
132
}
153
+ vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
133
154
+ tcgv_vec_arg(v2), tcgv_vec_arg(t4));
134
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
155
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
135
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
156
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
136
MMUAccessType access_type, int mmu_idx,
157
+ tcg_gen_dupi_vec(MO_8, t3, 0);
137
bool nonfault, void **phost, uintptr_t retaddr)
158
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
159
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
160
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
161
+ tcgv_vec_arg(t3), tcgv_vec_arg(t4));
162
+ tcg_gen_add_vec(MO_32, v0, t2, t3);
163
+ tcg_temp_free_vec(t3);
164
+ tcg_temp_free_vec(t4);
165
+ break;
166
+
167
+ default:
168
+ g_assert_not_reached();
169
+ }
170
+ tcg_temp_free_vec(t1);
171
+ tcg_temp_free_vec(t2);
172
+}
173
+
174
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
175
TCGArg a0, ...)
176
{
138
{
177
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
139
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
178
v2 = temp_tcgv_vec(arg_temp(a2));
140
return flags;
179
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
141
}
180
break;
142
181
+ case INDEX_op_mul_vec:
143
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
182
+ v2 = temp_tcgv_vec(arg_temp(a2));
144
+void *probe_access(CPUArchState *env, vaddr addr, int size,
183
+ expand_vec_mul(type, vece, v0, v1, v2);
145
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
184
+ break;
146
{
185
default:
147
CPUTLBEntryFull *full;
186
g_assert_not_reached();
148
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
187
}
149
* NOTE: This function will trigger an exception if the page is
188
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
150
* not executable.
189
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
151
*/
190
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
152
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
191
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
153
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
192
+ static const TCGTargetOpDef v_v_v_v
154
void **hostp)
193
+ = { .args_ct_str = { "v", "v", "v", "v" } };
155
{
194
156
CPUTLBEntryFull *full;
195
switch (op) {
157
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
196
case INDEX_op_goto_ptr:
158
index XXXXXXX..XXXXXXX 100644
197
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
159
--- a/accel/tcg/user-exec.c
198
160
+++ b/accel/tcg/user-exec.c
199
case INDEX_op_add_vec:
161
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
200
case INDEX_op_sub_vec:
162
return current_tb_invalidated ? 2 : 1;
201
+ case INDEX_op_mul_vec:
163
}
202
case INDEX_op_and_vec:
164
203
case INDEX_op_or_vec:
165
-static int probe_access_internal(CPUArchState *env, target_ulong addr,
204
case INDEX_op_xor_vec:
166
+static int probe_access_internal(CPUArchState *env, vaddr addr,
205
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
167
int fault_size, MMUAccessType access_type,
206
case INDEX_op_shlv_vec:
168
bool nonfault, uintptr_t ra)
207
case INDEX_op_shrv_vec:
169
{
208
case INDEX_op_sarv_vec:
170
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
209
+ case INDEX_op_ppc_mrgh_vec:
171
cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
210
+ case INDEX_op_ppc_mrgl_vec:
172
}
211
+ case INDEX_op_ppc_muleu_vec:
173
212
+ case INDEX_op_ppc_mulou_vec:
174
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
213
+ case INDEX_op_ppc_pkum_vec:
175
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
214
+ case INDEX_op_ppc_rotl_vec:
176
MMUAccessType access_type, int mmu_idx,
215
return &v_v_v;
177
bool nonfault, void **phost, uintptr_t ra)
216
case INDEX_op_not_vec:
178
{
217
case INDEX_op_dup_vec:
179
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
218
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
180
return flags;
219
case INDEX_op_st_vec:
181
}
220
case INDEX_op_dupm_vec:
182
221
return &v_r;
183
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
222
+ case INDEX_op_ppc_msum_vec:
184
+void *probe_access(CPUArchState *env, vaddr addr, int size,
223
+ return &v_v_v_v;
185
MMUAccessType access_type, int mmu_idx, uintptr_t ra)
224
186
{
225
default:
187
int flags;
226
return NULL;
188
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
189
return size ? g2h(env_cpu(env), addr) : NULL;
190
}
191
192
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
193
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
194
void **hostp)
195
{
196
int flags;
227
--
197
--
228
2.17.1
198
2.34.1
229
230
diff view generated by jsdifflib
1
These new instructions are conditional on MSR.FP when TX=0 and
1
From: Anton Johansson <anjo@rev.ng>
2
MSR.VEC when TX=1. Since we only care about the Altivec registers,
3
and force TX=1, we can consider these to be Altivec instructions.
4
Since Altivec is true for any use of vector types, we only need
5
test have_isa_2_07.
6
2
7
This includes moves to and from the integer registers.
3
Update atomic_mmu_lookup() and cpu_mmu_lookup() to take the guest
4
virtual address as a vaddr instead of a target_ulong.
8
5
9
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Anton Johansson <anjo@rev.ng>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230621135633.1649-10-anjo@rev.ng>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
10
---
12
tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------
11
accel/tcg/cputlb.c | 6 +++---
13
1 file changed, 26 insertions(+), 6 deletions(-)
12
accel/tcg/user-exec.c | 6 +++---
13
2 files changed, 6 insertions(+), 6 deletions(-)
14
14
15
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/ppc/tcg-target.inc.c
17
--- a/accel/tcg/cputlb.c
18
+++ b/tcg/ppc/tcg-target.inc.c
18
+++ b/accel/tcg/cputlb.c
19
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
19
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
20
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
20
* Probe for an atomic operation. Do not allow unaligned operations,
21
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
21
* or io operations to proceed. Return the host address.
22
22
*/
23
+#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
23
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
24
+#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
24
- MemOpIdx oi, int size, uintptr_t retaddr)
25
+#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
25
+static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
26
+#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
26
+ int size, uintptr_t retaddr)
27
+
27
{
28
#define RT(r) ((r)<<21)
28
uintptr_t mmu_idx = get_mmuidx(oi);
29
#define RS(r) ((r)<<21)
29
MemOp mop = get_memop(oi);
30
#define RA(r) ((r)<<16)
30
int a_bits = get_alignment_bits(mop);
31
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
31
uintptr_t index;
32
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
32
CPUTLBEntry *tlbe;
33
/* fallthru */
33
- target_ulong tlb_addr;
34
case TCG_TYPE_I32:
34
+ vaddr tlb_addr;
35
- if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
35
void *hostaddr;
36
- tcg_out32(s, OR | SAB(arg, ret, arg));
36
CPUTLBEntryFull *full;
37
- break;
37
38
- } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
38
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
39
- /* Altivec does not support vector/integer moves. */
39
index XXXXXXX..XXXXXXX 100644
40
- return false;
40
--- a/accel/tcg/user-exec.c
41
+ if (ret < TCG_REG_V0) {
41
+++ b/accel/tcg/user-exec.c
42
+ if (arg < TCG_REG_V0) {
42
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
43
+ tcg_out32(s, OR | SAB(arg, ret, arg));
43
44
+ break;
44
/* The softmmu versions of these helpers are in cputlb.c. */
45
+ } else if (have_isa_2_07) {
45
46
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
46
-static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
47
+ | VRT(arg) | RA(ret));
47
+static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr,
48
+ break;
48
MemOp mop, uintptr_t ra, MMUAccessType type)
49
+ } else {
49
{
50
+ /* Altivec does not support vector->integer moves. */
50
int a_bits = get_alignment_bits(mop);
51
+ return false;
51
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
52
+ }
52
/*
53
+ } else if (arg < TCG_REG_V0) {
53
* Do not allow unaligned operations to proceed. Return the host address.
54
+ if (have_isa_2_07) {
54
*/
55
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
55
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
56
+ | VRT(ret) | RA(arg));
56
- MemOpIdx oi, int size, uintptr_t retaddr)
57
+ break;
57
+static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
58
+ } else {
58
+ int size, uintptr_t retaddr)
59
+ /* Altivec does not support integer->vector moves. */
59
{
60
+ return false;
60
MemOp mop = get_memop(oi);
61
+ }
61
int a_bits = get_alignment_bits(mop);
62
}
63
/* fallthru */
64
case TCG_TYPE_V64:
65
--
62
--
66
2.17.1
63
2.34.1
67
68
diff view generated by jsdifflib
1
Introduce all of the flags required to enable tcg backend vector support,
1
From: Anton Johansson <anjo@rev.ng>
2
and a runtime flag to indicate the host supports Altivec instructions.
3
2
4
For now, do not actually set have_isa_altivec to true, because we have not
3
Use vaddr for guest virtual address in translator_use_goto_tb() and
5
yet added all of the code to actually generate all of the required insns.
4
translator_loop().
6
However, we must define these flags in order to disable ifndefs that create
7
stub versions of the functions added here.
8
5
9
The change to tcg_out_movi works around a buglet in tcg.c wherein if we
6
Signed-off-by: Anton Johansson <anjo@rev.ng>
10
do not define tcg_out_dupi_vec we get a declared but not defined Werror,
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
but if we only declare it we get a defined but not used Werror. We need
8
Message-Id: <20230621135633.1649-11-anjo@rev.ng>
12
to this change to tcg_out_movi eventually anyway, so it's no biggie.
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/exec/translator.h | 6 +++---
12
accel/tcg/translator.c | 10 +++++-----
13
2 files changed, 8 insertions(+), 8 deletions(-)
13
14
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
diff --git a/include/exec/translator.h b/include/exec/translator.h
15
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
16
---
17
tcg/ppc/tcg-target.h | 25 ++++++++++++++++
18
tcg/ppc/tcg-target.opc.h | 5 ++++
19
tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++--
20
3 files changed, 89 insertions(+), 3 deletions(-)
21
create mode 100644 tcg/ppc/tcg-target.opc.h
22
23
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
24
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.h
17
--- a/include/exec/translator.h
26
+++ b/tcg/ppc/tcg-target.h
18
+++ b/include/exec/translator.h
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
19
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
28
} TCGPowerISA;
20
* - When too many instructions have been translated.
29
21
*/
30
extern TCGPowerISA have_isa;
22
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
31
+extern bool have_altivec;
23
- target_ulong pc, void *host_pc,
32
24
- const TranslatorOps *ops, DisasContextBase *db);
33
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
25
+ vaddr pc, void *host_pc, const TranslatorOps *ops,
34
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
26
+ DisasContextBase *db);
35
@@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa;
27
36
#define TCG_TARGET_HAS_mulsh_i64 1
28
/**
37
#endif
29
* translator_use_goto_tb
38
30
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
39
+/*
31
* Return true if goto_tb is allowed between the current TB
40
+ * While technically Altivec could support V64, it has no 64-bit store
32
* and the destination PC.
41
+ * instruction and substituting two 32-bit stores makes the generated
33
*/
42
+ * code quite large.
34
-bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
43
+ */
35
+bool translator_use_goto_tb(DisasContextBase *db, vaddr dest);
44
+#define TCG_TARGET_HAS_v64 0
36
45
+#define TCG_TARGET_HAS_v128 have_altivec
37
/**
46
+#define TCG_TARGET_HAS_v256 0
38
* translator_io_start
47
+
39
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
48
+#define TCG_TARGET_HAS_andc_vec 0
49
+#define TCG_TARGET_HAS_orc_vec 0
50
+#define TCG_TARGET_HAS_not_vec 0
51
+#define TCG_TARGET_HAS_neg_vec 0
52
+#define TCG_TARGET_HAS_abs_vec 0
53
+#define TCG_TARGET_HAS_shi_vec 0
54
+#define TCG_TARGET_HAS_shs_vec 0
55
+#define TCG_TARGET_HAS_shv_vec 0
56
+#define TCG_TARGET_HAS_cmp_vec 0
57
+#define TCG_TARGET_HAS_mul_vec 0
58
+#define TCG_TARGET_HAS_sat_vec 0
59
+#define TCG_TARGET_HAS_minmax_vec 0
60
+#define TCG_TARGET_HAS_bitsel_vec 0
61
+#define TCG_TARGET_HAS_cmpsel_vec 0
62
+
63
void flush_icache_range(uintptr_t start, uintptr_t stop);
64
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
65
66
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
67
new file mode 100644
68
index XXXXXXX..XXXXXXX
69
--- /dev/null
70
+++ b/tcg/ppc/tcg-target.opc.h
71
@@ -XXX,XX +XXX,XX @@
72
+/*
73
+ * Target-specific opcodes for host vector expansion. These will be
74
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
75
+ * consider these to be UNSPEC with names.
76
+ */
77
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
78
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/ppc/tcg-target.inc.c
41
--- a/accel/tcg/translator.c
80
+++ b/tcg/ppc/tcg-target.inc.c
42
+++ b/accel/tcg/translator.c
81
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
43
@@ -XXX,XX +XXX,XX @@ static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags,
82
83
TCGPowerISA have_isa;
84
static bool have_isel;
85
+bool have_altivec;
86
87
#ifndef CONFIG_SOFTMMU
88
#define TCG_GUEST_BASE_REG 30
89
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
90
}
44
}
91
}
45
}
92
46
93
-static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
47
-bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
94
- tcg_target_long arg)
48
+bool translator_use_goto_tb(DisasContextBase *db, vaddr dest)
95
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
96
+ tcg_target_long val)
97
{
49
{
98
- tcg_out_movi_int(s, type, ret, arg, false);
50
/* Suppress goto_tb if requested. */
99
+ g_assert_not_reached();
51
if (tb_cflags(db->tb) & CF_NO_GOTO_TB) {
100
+}
52
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
101
+
102
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
103
+ tcg_target_long arg)
104
+{
105
+ switch (type) {
106
+ case TCG_TYPE_I32:
107
+ case TCG_TYPE_I64:
108
+ tcg_debug_assert(ret < TCG_REG_V0);
109
+ tcg_out_movi_int(s, type, ret, arg, false);
110
+ break;
111
+
112
+ case TCG_TYPE_V64:
113
+ case TCG_TYPE_V128:
114
+ tcg_debug_assert(ret >= TCG_REG_V0);
115
+ tcg_out_dupi_vec(s, type, ret, arg);
116
+ break;
117
+
118
+ default:
119
+ g_assert_not_reached();
120
+ }
121
}
53
}
122
54
123
static bool mask_operand(uint32_t c, int *mb, int *me)
55
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
56
- target_ulong pc, void *host_pc,
125
}
57
- const TranslatorOps *ops, DisasContextBase *db)
58
+ vaddr pc, void *host_pc, const TranslatorOps *ops,
59
+ DisasContextBase *db)
60
{
61
uint32_t cflags = tb_cflags(tb);
62
TCGOp *icount_start_insn;
63
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
126
}
64
}
127
65
128
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
66
static void *translator_access(CPUArchState *env, DisasContextBase *db,
129
+{
67
- target_ulong pc, size_t len)
130
+ g_assert_not_reached();
68
+ vaddr pc, size_t len)
131
+}
132
+
133
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134
+ TCGReg dst, TCGReg src)
135
+{
136
+ g_assert_not_reached();
137
+}
138
+
139
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
140
+ TCGReg out, TCGReg base, intptr_t offset)
141
+{
142
+ g_assert_not_reached();
143
+}
144
+
145
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
146
+ unsigned vecl, unsigned vece,
147
+ const TCGArg *args, const int *const_args)
148
+{
149
+ g_assert_not_reached();
150
+}
151
+
152
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
153
+ TCGArg a0, ...)
154
+{
155
+ g_assert_not_reached();
156
+}
157
+
158
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
159
{
69
{
160
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
70
void *host;
161
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
71
- target_ulong base, end;
162
72
+ vaddr base, end;
163
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
73
TranslationBlock *tb;
164
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
74
165
+ if (have_altivec) {
75
tb = db->tb;
166
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
167
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
168
+ }
169
170
tcg_target_call_clobber_regs = 0;
171
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
172
--
76
--
173
2.17.1
77
2.34.1
174
175
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VSX and
1
From: Anton Johansson <anjo@rev.ng>
2
are thus part of the VSX instruction set, and not Altivec.
3
This includes double-word loads and stores.
4
2
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230621135633.1649-13-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/ppc/tcg-target.inc.c | 11 +++++++++++
8
include/exec/exec-all.h | 2 +-
9
1 file changed, 11 insertions(+)
9
cpu.c | 2 +-
10
2 files changed, 2 insertions(+), 2 deletions(-)
10
11
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
14
--- a/include/exec/exec-all.h
14
+++ b/tcg/ppc/tcg-target.inc.c
15
+++ b/include/exec/exec-all.h
15
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
16
@@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu);
16
#define LVEWX XO31(71)
17
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
18
/* TranslationBlock invalidate API */
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
19
#if defined(CONFIG_USER_ONLY)
19
+#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
20
-void tb_invalidate_phys_addr(target_ulong addr);
20
21
+void tb_invalidate_phys_addr(hwaddr addr);
21
#define STVX XO31(231)
22
#else
22
#define STVEWX XO31(199)
23
void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
23
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
24
#endif
24
+#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
25
diff --git a/cpu.c b/cpu.c
25
26
index XXXXXXX..XXXXXXX 100644
26
#define VADDSBS VX4(768)
27
--- a/cpu.c
27
#define VADDUBS VX4(512)
28
+++ b/cpu.c
28
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
29
@@ -XXX,XX +XXX,XX @@ void list_cpus(void)
29
tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
30
}
30
break;
31
31
}
32
#if defined(CONFIG_USER_ONLY)
32
+ if (have_isa_2_07 && have_vsx) {
33
-void tb_invalidate_phys_addr(target_ulong addr)
33
+ tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
34
+void tb_invalidate_phys_addr(hwaddr addr)
34
+ break;
35
{
35
+ }
36
mmap_lock();
36
tcg_debug_assert((offset & 3) == 0);
37
tb_invalidate_phys_page(addr);
37
tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
38
shift = (offset - 4) & 0xc;
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
40
tcg_out_mem_long(s, STW, STWX, arg, base, offset);
41
break;
42
}
43
+ if (have_isa_2_07 && have_vsx) {
44
+ tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
45
+ break;
46
+ }
47
+ assert((offset & 3) == 0);
48
tcg_debug_assert((offset & 3) == 0);
49
shift = (offset - 4) & 0xc;
50
if (shift) {
51
--
38
--
52
2.17.1
39
2.34.1
53
54
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
From: Alex Bennée <alex.bennee@linaro.org>
2
2
3
qemu_cpu_kick is used for a number of reasons including to indicate
3
Balton discovered that asserts for the extract/deposit calls had a
4
there is work to be done. However when thread=single the old
4
significant impact on a lame benchmark on qemu-ppc. Replicating with:
5
qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one
6
which can lead to a hang in the case that:
7
5
8
a) the kick is from outside the vCPUs (e.g. iothread)
6
./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame \
9
b) the timers are paused (i.e. iothread calling run_on_cpu)
7
-h pts-trondheim-3.wav pts-trondheim-3.mp3
10
8
11
To avoid this lets split qemu_cpu_kick_rr into two functions. One for
9
showed up the pack/unpack routines not eliding the assert checks as it
12
the timer which continues to advance to the next timeslice and another
10
should have done causing them to prominently figure in the profile:
13
for all other kicks.
14
11
15
Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org>
12
 11.44%  qemu-ppc64  qemu-ppc64               [.] unpack_raw64.isra.0
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
13
 11.03%  qemu-ppc64  qemu-ppc64               [.] parts64_uncanon_normal
14
  8.26%  qemu-ppc64  qemu-ppc64               [.] helper_compute_fprf_float64
15
  6.75%  qemu-ppc64  qemu-ppc64               [.] do_float_check_status
16
  5.34%  qemu-ppc64  qemu-ppc64               [.] parts64_muladd
17
  4.75%  qemu-ppc64  qemu-ppc64               [.] pack_raw64.isra.0
18
  4.38%  qemu-ppc64  qemu-ppc64               [.] parts64_canonicalize
19
  3.62%  qemu-ppc64  qemu-ppc64               [.] float64r32_round_pack_canonical
20
21
After this patch the same test runs 31 seconds faster with a profile
22
where the generated code dominates more:
23
24
+ 14.12% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619420
25
+ 13.30% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000616850
26
+ 12.58% 12.19% qemu-ppc64 qemu-ppc64 [.] parts64_uncanon_normal
27
+ 10.62% 0.00% qemu-ppc64 [unknown] [.] 0x000000400061bf70
28
+ 9.91% 9.73% qemu-ppc64 qemu-ppc64 [.] helper_compute_fprf_float64
29
+ 7.84% 7.82% qemu-ppc64 qemu-ppc64 [.] do_float_check_status
30
+ 6.47% 5.78% qemu-ppc64 qemu-ppc64 [.] parts64_canonicalize.constprop.0
31
+ 6.46% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000620130
32
+ 6.42% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619400
33
+ 6.17% 6.04% qemu-ppc64 qemu-ppc64 [.] parts64_muladd
34
+ 5.85% 0.00% qemu-ppc64 [unknown] [.] 0x00000040006167e0
35
+ 5.74% 0.00% qemu-ppc64 [unknown] [.] 0x0000b693fcffffd3
36
+ 5.45% 4.78% qemu-ppc64 qemu-ppc64 [.] float64r32_round_pack_canonical
37
38
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
39
Message-Id: <ec9cfe5a-d5f2-466d-34dc-c35817e7e010@linaro.org>
40
[AJB: Patchified rth's suggestion]
41
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
42
Cc: BALATON Zoltan <balaton@eik.bme.hu>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
43
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
44
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
45
Tested-by: BALATON Zoltan <balaton@eik.bme.hu>
46
Message-Id: <20230523131107.3680641-1-alex.bennee@linaro.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
47
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
---
48
---
21
cpus.c | 24 ++++++++++++++++++------
49
fpu/softfloat.c | 22 +++++++++++-----------
22
1 file changed, 18 insertions(+), 6 deletions(-)
50
1 file changed, 11 insertions(+), 11 deletions(-)
23
51
24
diff --git a/cpus.c b/cpus.c
52
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
25
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
26
--- a/cpus.c
54
--- a/fpu/softfloat.c
27
+++ b/cpus.c
55
+++ b/fpu/softfloat.c
28
@@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void)
56
@@ -XXX,XX +XXX,XX @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
29
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
57
};
30
}
58
}
31
59
32
-/* Kick the currently round-robin scheduled vCPU */
60
-static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
33
-static void qemu_cpu_kick_rr_cpu(void)
61
+static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
34
+/* Kick the currently round-robin scheduled vCPU to next */
35
+static void qemu_cpu_kick_rr_next_cpu(void)
36
{
62
{
37
CPUState *cpu;
63
unpack_raw64(p, &float16_params, f);
38
do {
39
@@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void)
40
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
41
}
64
}
42
65
43
+/* Kick all RR vCPUs */
66
-static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
44
+static void qemu_cpu_kick_rr_cpus(void)
67
+static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
45
+{
46
+ CPUState *cpu;
47
+
48
+ CPU_FOREACH(cpu) {
49
+ cpu_exit(cpu);
50
+ };
51
+}
52
+
53
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
54
{
68
{
69
unpack_raw64(p, &bfloat16_params, f);
55
}
70
}
56
@@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
71
57
static void kick_tcg_thread(void *opaque)
72
-static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
73
+static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
58
{
74
{
59
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
75
unpack_raw64(p, &float32_params, f);
60
- qemu_cpu_kick_rr_cpu();
61
+ qemu_cpu_kick_rr_next_cpu();
62
}
76
}
63
77
64
static void start_tcg_kick_timer(void)
78
-static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
65
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu)
79
+static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
66
{
80
{
67
qemu_cond_broadcast(cpu->halt_cond);
81
unpack_raw64(p, &float64_params, f);
68
if (tcg_enabled()) {
82
}
69
- cpu_exit(cpu);
83
70
- /* NOP unless doing single-thread RR */
84
-static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
71
- qemu_cpu_kick_rr_cpu();
85
+static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
72
+ if (qemu_tcg_mttcg_enabled()) {
86
{
73
+ cpu_exit(cpu);
87
*p = (FloatParts128) {
74
+ } else {
88
.cls = float_class_unclassified,
75
+ qemu_cpu_kick_rr_cpus();
89
@@ -XXX,XX +XXX,XX @@ static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
76
+ }
90
};
77
} else {
91
}
78
if (hax_enabled()) {
92
79
/*
93
-static void float128_unpack_raw(FloatParts128 *p, float128 f)
94
+static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
95
{
96
const int f_size = float128_params.frac_size - 64;
97
const int e_size = float128_params.exp_size;
98
@@ -XXX,XX +XXX,XX @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
99
return ret;
100
}
101
102
-static inline float16 float16_pack_raw(const FloatParts64 *p)
103
+static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
104
{
105
return make_float16(pack_raw64(p, &float16_params));
106
}
107
108
-static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
109
+static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
110
{
111
return pack_raw64(p, &bfloat16_params);
112
}
113
114
-static inline float32 float32_pack_raw(const FloatParts64 *p)
115
+static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
116
{
117
return make_float32(pack_raw64(p, &float32_params));
118
}
119
120
-static inline float64 float64_pack_raw(const FloatParts64 *p)
121
+static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
122
{
123
return make_float64(pack_raw64(p, &float64_params));
124
}
125
126
-static float128 float128_pack_raw(const FloatParts128 *p)
127
+static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
128
{
129
const int f_size = float128_params.frac_size - 64;
130
const int e_size = float128_params.exp_size;
80
--
131
--
81
2.17.1
132
2.34.1
82
133
83
134
diff view generated by jsdifflib
1
Add support for vector add/subtract using Altivec instructions:
1
This is a perfectly natural occurrence for x86 "rep movb",
2
VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM.
2
where the "rep" prefix forms a counted loop of the one insn.
3
4
During the tests/tcg/multiarch/memory test, this logging is
5
triggered over 350000 times. Within the context of cross-i386-tci
6
build, which is already slow by nature, the logging is sufficient
7
to push the test into timeout.
3
8
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
10
---
7
tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++
11
tests/plugin/insn.c | 9 +--------
8
1 file changed, 20 insertions(+)
12
tests/tcg/i386/Makefile.softmmu-target | 9 ---------
13
tests/tcg/i386/Makefile.target | 6 ------
14
tests/tcg/x86_64/Makefile.softmmu-target | 9 ---------
15
4 files changed, 1 insertion(+), 32 deletions(-)
9
16
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
17
diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
19
--- a/tests/plugin/insn.c
13
+++ b/tcg/ppc/tcg-target.inc.c
20
+++ b/tests/plugin/insn.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
21
@@ -XXX,XX +XXX,XX @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
15
#define STVX XO31(231)
22
#define MAX_CPUS 8 /* lets not go nuts */
16
#define STVEWX XO31(199)
23
17
24
typedef struct {
18
+#define VADDUBM VX4(0)
25
- uint64_t last_pc;
19
+#define VADDUHM VX4(64)
26
uint64_t insn_count;
20
+#define VADDUWM VX4(128)
27
} InstructionCount;
28
29
@@ -XXX,XX +XXX,XX @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
30
{
31
unsigned int i = cpu_index % MAX_CPUS;
32
InstructionCount *c = &counts[i];
33
- uint64_t this_pc = GPOINTER_TO_UINT(udata);
34
- if (this_pc == c->last_pc) {
35
- g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%"
36
- PRIx64 "\n", this_pc);
37
- qemu_plugin_outs(out);
38
- }
39
- c->last_pc = this_pc;
21
+
40
+
22
+#define VSUBUBM VX4(1024)
41
c->insn_count++;
23
+#define VSUBUHM VX4(1088)
42
}
24
+#define VSUBUWM VX4(1152)
43
25
+
44
diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target
26
#define VMAXSB VX4(258)
45
index XXXXXXX..XXXXXXX 100644
27
#define VMAXSH VX4(322)
46
--- a/tests/tcg/i386/Makefile.softmmu-target
28
#define VMAXSW VX4(386)
47
+++ b/tests/tcg/i386/Makefile.softmmu-target
29
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
48
@@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=$(MULTIARCH_RUNS)
30
case INDEX_op_andc_vec:
49
31
case INDEX_op_not_vec:
50
memory: CFLAGS+=-DCHECK_UNALIGNED=1
32
return 1;
51
33
+ case INDEX_op_add_vec:
52
-# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
34
+ case INDEX_op_sub_vec:
53
-run-plugin-%-with-libinsn.so:
35
case INDEX_op_smax_vec:
54
-    $(call run-test, $@, \
36
case INDEX_op_smin_vec:
55
-     $(QEMU) -monitor none -display none \
37
case INDEX_op_umax_vec:
56
-         -chardev file$(COMMA)path=$@.out$(COMMA)id=output \
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
57
- -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
39
const TCGArg *args, const int *const_args)
58
-          -d plugin -D $*-with-libinsn.so.pout \
40
{
59
-         $(QEMU_OPTS) $*)
41
static const uint32_t
60
-
42
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
61
# Running
43
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
62
QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel
44
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
63
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
45
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
64
index XXXXXXX..XXXXXXX 100644
46
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
65
--- a/tests/tcg/i386/Makefile.target
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
66
+++ b/tests/tcg/i386/Makefile.target
48
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
67
@@ -XXX,XX +XXX,XX @@ else
49
return;
68
SKIP_I386_TESTS+=test-i386-fprem
50
69
endif
51
+ case INDEX_op_add_vec:
70
52
+ insn = add_op[vece];
71
-# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
53
+ break;
72
-run-plugin-%-with-libinsn.so:
54
+ case INDEX_op_sub_vec:
73
-    $(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
55
+ insn = sub_op[vece];
74
-     -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
56
+ break;
75
-     -d plugin -D $*-with-libinsn.so.pout $*)
57
case INDEX_op_smin_vec:
76
-
58
insn = smin_op[vece];
77
# Update TESTS
59
break;
78
I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
60
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
79
TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
61
return (TCG_TARGET_REG_BITS == 64 ? &S_S
80
diff --git a/tests/tcg/x86_64/Makefile.softmmu-target b/tests/tcg/x86_64/Makefile.softmmu-target
62
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
81
index XXXXXXX..XXXXXXX 100644
63
82
--- a/tests/tcg/x86_64/Makefile.softmmu-target
64
+ case INDEX_op_add_vec:
83
+++ b/tests/tcg/x86_64/Makefile.softmmu-target
65
+ case INDEX_op_sub_vec:
84
@@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=$(MULTIARCH_RUNS)
66
case INDEX_op_and_vec:
85
67
case INDEX_op_or_vec:
86
memory: CFLAGS+=-DCHECK_UNALIGNED=1
68
case INDEX_op_xor_vec:
87
88
-# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
89
-run-plugin-%-with-libinsn.so:
90
-    $(call run-test, $@, \
91
-     $(QEMU) -monitor none -display none \
92
-         -chardev file$(COMMA)path=$@.out$(COMMA)id=output \
93
- -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
94
-          -d plugin -D $*-with-libinsn.so.pout \
95
-         $(QEMU_OPTS) $*)
96
-
97
# Running
98
QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel
69
--
99
--
70
2.17.1
100
2.34.1
71
72
diff view generated by jsdifflib
1
Previously we've been hard-coding knowledge that Power7 has ISEL, but
1
From: Fei Wu <fei2.wu@intel.com>
2
it was an optional instruction before that. Use the AT_HWCAP2 bit,
3
when present, to properly determine support.
4
2
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
TBStats will be introduced to replace CONFIG_PROFILER totally, here
4
remove all CONFIG_PROFILER related stuffs first.
5
6
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
7
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
8
Signed-off-by: Fei Wu <fei2.wu@intel.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230607122411.3394702-2-fei2.wu@intel.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
12
---
8
tcg/ppc/tcg-target.inc.c | 17 ++++++++++++-----
13
meson.build | 2 -
9
1 file changed, 12 insertions(+), 5 deletions(-)
14
qapi/machine.json | 18 ---
15
include/qemu/timer.h | 9 --
16
include/tcg/tcg.h | 26 -----
17
accel/tcg/monitor.c | 31 -----
18
accel/tcg/tcg-accel-ops.c | 10 --
19
accel/tcg/translate-all.c | 33 ------
20
softmmu/runstate.c | 9 --
21
tcg/tcg.c | 214 ----------------------------------
22
tests/qtest/qmp-cmd-test.c | 3 -
23
hmp-commands-info.hx | 15 ---
24
meson_options.txt | 2 -
25
scripts/meson-buildoptions.sh | 3 -
26
13 files changed, 375 deletions(-)
10
27
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
28
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
30
--- a/meson.build
14
+++ b/tcg/ppc/tcg-target.inc.c
31
+++ b/meson.build
32
@@ -XXX,XX +XXX,XX @@ if numa.found()
33
dependencies: numa))
34
endif
35
config_host_data.set('CONFIG_OPENGL', opengl.found())
36
-config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
37
config_host_data.set('CONFIG_RBD', rbd.found())
38
config_host_data.set('CONFIG_RDMA', rdma.found())
39
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
40
@@ -XXX,XX +XXX,XX @@ if 'objc' in all_languages
41
summary_info += {'QEMU_OBJCFLAGS': ' '.join(qemu_common_flags)}
42
endif
43
summary_info += {'QEMU_LDFLAGS': ' '.join(qemu_ldflags)}
44
-summary_info += {'profiler': get_option('profiler')}
45
summary_info += {'link-time optimization (LTO)': get_option('b_lto')}
46
summary_info += {'PIE': get_option('b_pie')}
47
summary_info += {'static build': get_option('prefer_static')}
48
diff --git a/qapi/machine.json b/qapi/machine.json
49
index XXXXXXX..XXXXXXX 100644
50
--- a/qapi/machine.json
51
+++ b/qapi/machine.json
15
@@ -XXX,XX +XXX,XX @@
52
@@ -XXX,XX +XXX,XX @@
16
static tcg_insn_unit *tb_ret_addr;
53
'if': 'CONFIG_TCG',
17
54
'features': [ 'unstable' ] }
18
TCGPowerISA have_isa;
55
19
-
56
-##
20
-#define HAVE_ISEL have_isa_2_06
57
-# @x-query-profile:
21
+static bool have_isel;
58
-#
22
59
-# Query TCG profiling information
23
#ifndef CONFIG_SOFTMMU
60
-#
24
#define TCG_GUEST_BASE_REG 30
61
-# Features:
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
62
-#
26
/* If we have ISEL, we can implement everything with 3 or 4 insns.
63
-# @unstable: This command is meant for debugging.
27
All other cases below are also at least 3 insns, so speed up the
64
-#
28
code generator by not considering them and always using ISEL. */
65
-# Returns: profile information
29
- if (HAVE_ISEL) {
66
-#
30
+ if (have_isel) {
67
-# Since: 6.2
31
int isel, tab;
68
-##
32
69
-{ 'command': 'x-query-profile',
33
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
70
- 'returns': 'HumanReadableText',
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
71
- 'if': 'CONFIG_TCG',
35
72
- 'features': [ 'unstable' ] }
36
tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
73
-
37
74
##
38
- if (HAVE_ISEL) {
75
# @x-query-ramblock:
39
+ if (have_isel) {
76
#
40
int isel = tcg_to_isel[cond];
77
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
41
78
index XXXXXXX..XXXXXXX 100644
42
/* Swap the V operands if the operation indicates inversion. */
79
--- a/include/qemu/timer.h
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
80
+++ b/include/qemu/timer.h
44
} else {
81
@@ -XXX,XX +XXX,XX @@ static inline int64_t cpu_get_host_ticks(void)
45
tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
82
}
46
/* Note that the only other valid constant for a2 is 0. */
83
#endif
47
- if (HAVE_ISEL) {
84
48
+ if (have_isel) {
85
-#ifdef CONFIG_PROFILER
49
tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
86
-static inline int64_t profile_getclock(void)
50
tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
87
-{
51
} else if (!const_a2 && a0 == a2) {
88
- return get_clock();
52
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
89
-}
90
-
91
-extern int64_t dev_time;
92
-#endif
93
-
94
#endif
95
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
96
index XXXXXXX..XXXXXXX 100644
97
--- a/include/tcg/tcg.h
98
+++ b/include/tcg/tcg.h
99
@@ -XXX,XX +XXX,XX @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
100
return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
101
}
102
103
-typedef struct TCGProfile {
104
- int64_t cpu_exec_time;
105
- int64_t tb_count1;
106
- int64_t tb_count;
107
- int64_t op_count; /* total insn count */
108
- int op_count_max; /* max insn per TB */
109
- int temp_count_max;
110
- int64_t temp_count;
111
- int64_t del_op_count;
112
- int64_t code_in_len;
113
- int64_t code_out_len;
114
- int64_t search_out_len;
115
- int64_t interm_time;
116
- int64_t code_time;
117
- int64_t la_time;
118
- int64_t opt_time;
119
- int64_t restore_count;
120
- int64_t restore_time;
121
- int64_t table_op_count[NB_OPS];
122
-} TCGProfile;
123
-
124
struct TCGContext {
125
uint8_t *pool_cur, *pool_end;
126
TCGPool *pool_first, *pool_current, *pool_first_large;
127
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
128
tcg_insn_unit *code_buf; /* pointer for start of tb */
129
tcg_insn_unit *code_ptr; /* pointer for running end of tb */
130
131
-#ifdef CONFIG_PROFILER
132
- TCGProfile prof;
133
-#endif
134
-
135
#ifdef CONFIG_DEBUG_TCG
136
int goto_tb_issue_mask;
137
const TCGOpcode *vecop_list;
138
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
139
return temp_tcgv_ptr(t);
140
}
141
142
-int64_t tcg_cpu_exec_time(void);
143
void tcg_dump_info(GString *buf);
144
void tcg_dump_op_count(GString *buf);
145
146
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/accel/tcg/monitor.c
149
+++ b/accel/tcg/monitor.c
150
@@ -XXX,XX +XXX,XX @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
151
return human_readable_text_from_str(buf);
152
}
153
154
-#ifdef CONFIG_PROFILER
155
-
156
-int64_t dev_time;
157
-
158
-HumanReadableText *qmp_x_query_profile(Error **errp)
159
-{
160
- g_autoptr(GString) buf = g_string_new("");
161
- static int64_t last_cpu_exec_time;
162
- int64_t cpu_exec_time;
163
- int64_t delta;
164
-
165
- cpu_exec_time = tcg_cpu_exec_time();
166
- delta = cpu_exec_time - last_cpu_exec_time;
167
-
168
- g_string_append_printf(buf, "async time %" PRId64 " (%0.3f)\n",
169
- dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
170
- g_string_append_printf(buf, "qemu time %" PRId64 " (%0.3f)\n",
171
- delta, delta / (double)NANOSECONDS_PER_SECOND);
172
- last_cpu_exec_time = cpu_exec_time;
173
- dev_time = 0;
174
-
175
- return human_readable_text_from_str(buf);
176
-}
177
-#else
178
-HumanReadableText *qmp_x_query_profile(Error **errp)
179
-{
180
- error_setg(errp, "Internal profiler not compiled");
181
- return NULL;
182
-}
183
-#endif
184
-
185
static void hmp_tcg_register(void)
186
{
187
monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
188
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
189
index XXXXXXX..XXXXXXX 100644
190
--- a/accel/tcg/tcg-accel-ops.c
191
+++ b/accel/tcg/tcg-accel-ops.c
192
@@ -XXX,XX +XXX,XX @@ void tcg_cpus_destroy(CPUState *cpu)
193
int tcg_cpus_exec(CPUState *cpu)
194
{
195
int ret;
196
-#ifdef CONFIG_PROFILER
197
- int64_t ti;
198
-#endif
199
assert(tcg_enabled());
200
-#ifdef CONFIG_PROFILER
201
- ti = profile_getclock();
202
-#endif
203
cpu_exec_start(cpu);
204
ret = cpu_exec(cpu);
205
cpu_exec_end(cpu);
206
-#ifdef CONFIG_PROFILER
207
- qatomic_set(&tcg_ctx->prof.cpu_exec_time,
208
- tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
209
-#endif
210
return ret;
211
}
212
213
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/accel/tcg/translate-all.c
216
+++ b/accel/tcg/translate-all.c
217
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
218
uintptr_t host_pc)
219
{
220
uint64_t data[TARGET_INSN_START_WORDS];
221
-#ifdef CONFIG_PROFILER
222
- TCGProfile *prof = &tcg_ctx->prof;
223
- int64_t ti = profile_getclock();
224
-#endif
225
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
226
227
if (insns_left < 0) {
228
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
229
}
230
231
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
232
-
233
-#ifdef CONFIG_PROFILER
234
- qatomic_set(&prof->restore_time,
235
- prof->restore_time + profile_getclock() - ti);
236
- qatomic_set(&prof->restore_count, prof->restore_count + 1);
237
-#endif
238
}
239
240
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
241
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
242
tcg_ctx->cpu = NULL;
243
*max_insns = tb->icount;
244
245
-#ifdef CONFIG_PROFILER
246
- qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
247
- qatomic_set(&tcg_ctx->prof.interm_time,
248
- tcg_ctx->prof.interm_time + profile_getclock() - *ti);
249
- *ti = profile_getclock();
250
-#endif
251
-
252
return tcg_gen_code(tcg_ctx, tb, pc);
253
}
254
255
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
256
tb_page_addr_t phys_pc;
257
tcg_insn_unit *gen_code_buf;
258
int gen_code_size, search_size, max_insns;
259
-#ifdef CONFIG_PROFILER
260
- TCGProfile *prof = &tcg_ctx->prof;
261
-#endif
262
int64_t ti;
263
void *host_pc;
264
265
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
266
267
tb_overflow:
268
269
-#ifdef CONFIG_PROFILER
270
- /* includes aborted translations because of exceptions */
271
- qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
272
- ti = profile_getclock();
273
-#endif
274
-
275
trace_translate_block(tb, pc, tb->tc.ptr);
276
277
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
278
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
279
*/
280
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
281
282
-#ifdef CONFIG_PROFILER
283
- qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
284
- qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
285
- qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
286
- qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
287
-#endif
288
-
289
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
290
qemu_log_in_addr_range(pc)) {
291
FILE *logfile = qemu_log_trylock();
292
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
293
index XXXXXXX..XXXXXXX 100644
294
--- a/softmmu/runstate.c
295
+++ b/softmmu/runstate.c
296
@@ -XXX,XX +XXX,XX @@ static bool main_loop_should_exit(int *status)
297
int qemu_main_loop(void)
298
{
299
int status = EXIT_SUCCESS;
300
-#ifdef CONFIG_PROFILER
301
- int64_t ti;
302
-#endif
303
304
while (!main_loop_should_exit(&status)) {
305
-#ifdef CONFIG_PROFILER
306
- ti = profile_getclock();
307
-#endif
308
main_loop_wait(false);
309
-#ifdef CONFIG_PROFILER
310
- dev_time += profile_getclock() - ti;
311
-#endif
312
}
313
314
return status;
315
diff --git a/tcg/tcg.c b/tcg/tcg.c
316
index XXXXXXX..XXXXXXX 100644
317
--- a/tcg/tcg.c
318
+++ b/tcg/tcg.c
319
@@ -XXX,XX +XXX,XX @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
320
QTAILQ_REMOVE(&s->ops, op, link);
321
QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
322
s->nb_ops--;
323
-
324
-#ifdef CONFIG_PROFILER
325
- qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
326
-#endif
327
}
328
329
void tcg_remove_ops_after(TCGOp *op)
330
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
331
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
332
}
333
334
-#ifdef CONFIG_PROFILER
335
-
336
-/* avoid copy/paste errors */
337
-#define PROF_ADD(to, from, field) \
338
- do { \
339
- (to)->field += qatomic_read(&((from)->field)); \
340
- } while (0)
341
-
342
-#define PROF_MAX(to, from, field) \
343
- do { \
344
- typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
345
- if (val__ > (to)->field) { \
346
- (to)->field = val__; \
347
- } \
348
- } while (0)
349
-
350
-/* Pass in a zero'ed @prof */
351
-static inline
352
-void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
353
-{
354
- unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
355
- unsigned int i;
356
-
357
- for (i = 0; i < n_ctxs; i++) {
358
- TCGContext *s = qatomic_read(&tcg_ctxs[i]);
359
- const TCGProfile *orig = &s->prof;
360
-
361
- if (counters) {
362
- PROF_ADD(prof, orig, cpu_exec_time);
363
- PROF_ADD(prof, orig, tb_count1);
364
- PROF_ADD(prof, orig, tb_count);
365
- PROF_ADD(prof, orig, op_count);
366
- PROF_MAX(prof, orig, op_count_max);
367
- PROF_ADD(prof, orig, temp_count);
368
- PROF_MAX(prof, orig, temp_count_max);
369
- PROF_ADD(prof, orig, del_op_count);
370
- PROF_ADD(prof, orig, code_in_len);
371
- PROF_ADD(prof, orig, code_out_len);
372
- PROF_ADD(prof, orig, search_out_len);
373
- PROF_ADD(prof, orig, interm_time);
374
- PROF_ADD(prof, orig, code_time);
375
- PROF_ADD(prof, orig, la_time);
376
- PROF_ADD(prof, orig, opt_time);
377
- PROF_ADD(prof, orig, restore_count);
378
- PROF_ADD(prof, orig, restore_time);
379
- }
380
- if (table) {
381
- int i;
382
-
383
- for (i = 0; i < NB_OPS; i++) {
384
- PROF_ADD(prof, orig, table_op_count[i]);
385
- }
386
- }
387
- }
388
-}
389
-
390
-#undef PROF_ADD
391
-#undef PROF_MAX
392
-
393
-static void tcg_profile_snapshot_counters(TCGProfile *prof)
394
-{
395
- tcg_profile_snapshot(prof, true, false);
396
-}
397
-
398
-static void tcg_profile_snapshot_table(TCGProfile *prof)
399
-{
400
- tcg_profile_snapshot(prof, false, true);
401
-}
402
-
403
-void tcg_dump_op_count(GString *buf)
404
-{
405
- TCGProfile prof = {};
406
- int i;
407
-
408
- tcg_profile_snapshot_table(&prof);
409
- for (i = 0; i < NB_OPS; i++) {
410
- g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
411
- prof.table_op_count[i]);
412
- }
413
-}
414
-
415
-int64_t tcg_cpu_exec_time(void)
416
-{
417
- unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
418
- unsigned int i;
419
- int64_t ret = 0;
420
-
421
- for (i = 0; i < n_ctxs; i++) {
422
- const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
423
- const TCGProfile *prof = &s->prof;
424
-
425
- ret += qatomic_read(&prof->cpu_exec_time);
426
- }
427
- return ret;
428
-}
429
-#else
430
void tcg_dump_op_count(GString *buf)
431
{
432
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
433
}
434
435
-int64_t tcg_cpu_exec_time(void)
436
-{
437
- error_report("%s: TCG profiler not compiled", __func__);
438
- exit(EXIT_FAILURE);
439
-}
440
-#endif
441
-
442
-
443
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
444
{
445
-#ifdef CONFIG_PROFILER
446
- TCGProfile *prof = &s->prof;
447
-#endif
448
int i, start_words, num_insns;
449
TCGOp *op;
450
451
-#ifdef CONFIG_PROFILER
452
- {
453
- int n = 0;
454
-
455
- QTAILQ_FOREACH(op, &s->ops, link) {
456
- n++;
457
- }
458
- qatomic_set(&prof->op_count, prof->op_count + n);
459
- if (n > prof->op_count_max) {
460
- qatomic_set(&prof->op_count_max, n);
461
- }
462
-
463
- n = s->nb_temps;
464
- qatomic_set(&prof->temp_count, prof->temp_count + n);
465
- if (n > prof->temp_count_max) {
466
- qatomic_set(&prof->temp_count_max, n);
467
- }
468
- }
469
-#endif
470
-
471
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
472
&& qemu_log_in_addr_range(pc_start))) {
473
FILE *logfile = qemu_log_trylock();
474
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
53
}
475
}
54
#endif
476
#endif
55
477
56
+#ifdef PPC_FEATURE2_HAS_ISEL
478
-#ifdef CONFIG_PROFILER
57
+ /* Prefer explicit instruction from the kernel. */
479
- qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
58
+ have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
480
-#endif
59
+#else
481
-
60
+ /* Fall back to knowing Power7 (2.06) has ISEL. */
482
tcg_optimize(s);
61
+ have_isel = have_isa_2_06;
483
62
+#endif
484
-#ifdef CONFIG_PROFILER
63
+
485
- qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
64
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
486
- qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
65
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
487
-#endif
66
488
-
489
reachable_code_pass(s);
490
liveness_pass_0(s);
491
liveness_pass_1(s);
492
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
493
}
494
}
495
496
-#ifdef CONFIG_PROFILER
497
- qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
498
-#endif
499
-
500
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
501
&& qemu_log_in_addr_range(pc_start))) {
502
FILE *logfile = qemu_log_trylock();
503
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
504
QTAILQ_FOREACH(op, &s->ops, link) {
505
TCGOpcode opc = op->opc;
506
507
-#ifdef CONFIG_PROFILER
508
- qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
509
-#endif
510
-
511
switch (opc) {
512
case INDEX_op_mov_i32:
513
case INDEX_op_mov_i64:
514
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
515
return tcg_current_code_size(s);
516
}
517
518
-#ifdef CONFIG_PROFILER
519
-void tcg_dump_info(GString *buf)
520
-{
521
- TCGProfile prof = {};
522
- const TCGProfile *s;
523
- int64_t tb_count;
524
- int64_t tb_div_count;
525
- int64_t tot;
526
-
527
- tcg_profile_snapshot_counters(&prof);
528
- s = &prof;
529
- tb_count = s->tb_count;
530
- tb_div_count = tb_count ? tb_count : 1;
531
- tot = s->interm_time + s->code_time;
532
-
533
- g_string_append_printf(buf, "JIT cycles %" PRId64
534
- " (%0.3f s at 2.4 GHz)\n",
535
- tot, tot / 2.4e9);
536
- g_string_append_printf(buf, "translated TBs %" PRId64
537
- " (aborted=%" PRId64 " %0.1f%%)\n",
538
- tb_count, s->tb_count1 - tb_count,
539
- (double)(s->tb_count1 - s->tb_count)
540
- / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
541
- g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n",
542
- (double)s->op_count / tb_div_count, s->op_count_max);
543
- g_string_append_printf(buf, "deleted ops/TB %0.2f\n",
544
- (double)s->del_op_count / tb_div_count);
545
- g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n",
546
- (double)s->temp_count / tb_div_count,
547
- s->temp_count_max);
548
- g_string_append_printf(buf, "avg host code/TB %0.1f\n",
549
- (double)s->code_out_len / tb_div_count);
550
- g_string_append_printf(buf, "avg search data/TB %0.1f\n",
551
- (double)s->search_out_len / tb_div_count);
552
-
553
- g_string_append_printf(buf, "cycles/op %0.1f\n",
554
- s->op_count ? (double)tot / s->op_count : 0);
555
- g_string_append_printf(buf, "cycles/in byte %0.1f\n",
556
- s->code_in_len ? (double)tot / s->code_in_len : 0);
557
- g_string_append_printf(buf, "cycles/out byte %0.1f\n",
558
- s->code_out_len ? (double)tot / s->code_out_len : 0);
559
- g_string_append_printf(buf, "cycles/search byte %0.1f\n",
560
- s->search_out_len ?
561
- (double)tot / s->search_out_len : 0);
562
- if (tot == 0) {
563
- tot = 1;
564
- }
565
- g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
566
- (double)s->interm_time / tot * 100.0);
567
- g_string_append_printf(buf, " gen_code time %0.1f%%\n",
568
- (double)s->code_time / tot * 100.0);
569
- g_string_append_printf(buf, "optim./code time %0.1f%%\n",
570
- (double)s->opt_time / (s->code_time ?
571
- s->code_time : 1)
572
- * 100.0);
573
- g_string_append_printf(buf, "liveness/code time %0.1f%%\n",
574
- (double)s->la_time / (s->code_time ?
575
- s->code_time : 1) * 100.0);
576
- g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
577
- s->restore_count);
578
- g_string_append_printf(buf, " avg cycles %0.1f\n",
579
- s->restore_count ?
580
- (double)s->restore_time / s->restore_count : 0);
581
-}
582
-#else
583
void tcg_dump_info(GString *buf)
584
{
585
g_string_append_printf(buf, "[TCG profiler not compiled]\n");
586
}
587
-#endif
588
589
#ifdef ELF_HOST_MACHINE
590
/* In order to use this feature, the backend needs to do three things:
591
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
592
index XXXXXXX..XXXXXXX 100644
593
--- a/tests/qtest/qmp-cmd-test.c
594
+++ b/tests/qtest/qmp-cmd-test.c
595
@@ -XXX,XX +XXX,XX @@ static int query_error_class(const char *cmd)
596
{ "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
597
{ "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
598
{ "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
599
-#ifndef CONFIG_PROFILER
600
- { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
601
-#endif
602
/* Only valid with a USB bus added */
603
{ "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
604
/* Only valid with accel=tcg */
605
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
606
index XXXXXXX..XXXXXXX 100644
607
--- a/hmp-commands-info.hx
608
+++ b/hmp-commands-info.hx
609
@@ -XXX,XX +XXX,XX @@ SRST
610
Show host USB devices.
611
ERST
612
613
-#if defined(CONFIG_TCG)
614
- {
615
- .name = "profile",
616
- .args_type = "",
617
- .params = "",
618
- .help = "show profiling information",
619
- .cmd_info_hrt = qmp_x_query_profile,
620
- },
621
-#endif
622
-
623
-SRST
624
- ``info profile``
625
- Show profiling information.
626
-ERST
627
-
628
{
629
.name = "capture",
630
.args_type = "",
631
diff --git a/meson_options.txt b/meson_options.txt
632
index XXXXXXX..XXXXXXX 100644
633
--- a/meson_options.txt
634
+++ b/meson_options.txt
635
@@ -XXX,XX +XXX,XX @@ option('qom_cast_debug', type: 'boolean', value: true,
636
option('gprof', type: 'boolean', value: false,
637
description: 'QEMU profiling with gprof',
638
deprecated: true)
639
-option('profiler', type: 'boolean', value: false,
640
- description: 'profiler support')
641
option('slirp_smbd', type : 'feature', value : 'auto',
642
description: 'use smbd (at path --smbd=*) in slirp networking')
643
644
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
645
index XXXXXXX..XXXXXXX 100644
646
--- a/scripts/meson-buildoptions.sh
647
+++ b/scripts/meson-buildoptions.sh
648
@@ -XXX,XX +XXX,XX @@ meson_options_help() {
649
printf "%s\n" ' jemalloc/system/tcmalloc)'
650
printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for'
651
printf "%s\n" ' upgrades'
652
- printf "%s\n" ' --enable-profiler profiler support'
653
printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and'
654
printf "%s\n" ' getrandom()'
655
printf "%s\n" ' --enable-safe-stack SafeStack Stack Smash Protection (requires'
656
@@ -XXX,XX +XXX,XX @@ _meson_option_parse() {
657
--with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
658
--enable-png) printf "%s" -Dpng=enabled ;;
659
--disable-png) printf "%s" -Dpng=disabled ;;
660
- --enable-profiler) printf "%s" -Dprofiler=true ;;
661
- --disable-profiler) printf "%s" -Dprofiler=false ;;
662
--enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
663
--disable-pvrdma) printf "%s" -Dpvrdma=disabled ;;
664
--enable-qcow1) printf "%s" -Dqcow1=enabled ;;
67
--
665
--
68
2.17.1
666
2.34.1
69
667
70
668
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VEC and
1
From: Max Chou <max.chou@sifive.com>
2
are thus part of the Altivec instruction set, and not VSX.
3
This includes lots of double-word arithmetic and a few extra
4
logical operations.
5
2
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
The 5th parameter of tcg_gen_gvec_2s should be replaced by the
4
temporary tmp variable in the tcg_gen_gvec_andcs function.
5
6
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Signed-off-by: Max Chou <max.chou@sifive.com>
8
Message-Id: <20230622161646.32005-9-max.chou@sifive.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
tcg/ppc/tcg-target.h | 4 +-
11
tcg/tcg-op-gvec.c | 2 +-
10
tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++----------
12
1 file changed, 1 insertion(+), 1 deletion(-)
11
2 files changed, 67 insertions(+), 22 deletions(-)
12
13
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
14
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
16
--- a/tcg/tcg-op-gvec.c
16
+++ b/tcg/ppc/tcg-target.h
17
+++ b/tcg/tcg-op-gvec.c
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
18
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
18
typedef enum {
19
19
tcg_isa_base,
20
TCGv_i64 tmp = tcg_temp_ebb_new_i64();
20
tcg_isa_2_06,
21
tcg_gen_dup_i64(vece, tmp, c);
21
+ tcg_isa_2_07,
22
- tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
22
tcg_isa_3_00,
23
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &g);
23
} TCGPowerISA;
24
tcg_temp_free_i64(tmp);
24
25
}
25
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
26
26
extern bool have_vsx;
27
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
29
+#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
30
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
34
#define TCG_TARGET_HAS_v256 0
35
36
#define TCG_TARGET_HAS_andc_vec 1
37
-#define TCG_TARGET_HAS_orc_vec 0
38
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
39
#define TCG_TARGET_HAS_not_vec 1
40
#define TCG_TARGET_HAS_neg_vec 0
41
#define TCG_TARGET_HAS_abs_vec 0
42
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/ppc/tcg-target.inc.c
45
+++ b/tcg/ppc/tcg-target.inc.c
46
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
47
#define VADDSWS VX4(896)
48
#define VADDUWS VX4(640)
49
#define VADDUWM VX4(128)
50
+#define VADDUDM VX4(192) /* v2.07 */
51
52
#define VSUBSBS VX4(1792)
53
#define VSUBUBS VX4(1536)
54
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
55
#define VSUBSWS VX4(1920)
56
#define VSUBUWS VX4(1664)
57
#define VSUBUWM VX4(1152)
58
+#define VSUBUDM VX4(1216) /* v2.07 */
59
60
#define VMAXSB VX4(258)
61
#define VMAXSH VX4(322)
62
#define VMAXSW VX4(386)
63
+#define VMAXSD VX4(450) /* v2.07 */
64
#define VMAXUB VX4(2)
65
#define VMAXUH VX4(66)
66
#define VMAXUW VX4(130)
67
+#define VMAXUD VX4(194) /* v2.07 */
68
#define VMINSB VX4(770)
69
#define VMINSH VX4(834)
70
#define VMINSW VX4(898)
71
+#define VMINSD VX4(962) /* v2.07 */
72
#define VMINUB VX4(514)
73
#define VMINUH VX4(578)
74
#define VMINUW VX4(642)
75
+#define VMINUD VX4(706) /* v2.07 */
76
77
#define VCMPEQUB VX4(6)
78
#define VCMPEQUH VX4(70)
79
#define VCMPEQUW VX4(134)
80
+#define VCMPEQUD VX4(199) /* v2.07 */
81
#define VCMPGTSB VX4(774)
82
#define VCMPGTSH VX4(838)
83
#define VCMPGTSW VX4(902)
84
+#define VCMPGTSD VX4(967) /* v2.07 */
85
#define VCMPGTUB VX4(518)
86
#define VCMPGTUH VX4(582)
87
#define VCMPGTUW VX4(646)
88
+#define VCMPGTUD VX4(711) /* v2.07 */
89
90
#define VSLB VX4(260)
91
#define VSLH VX4(324)
92
#define VSLW VX4(388)
93
+#define VSLD VX4(1476) /* v2.07 */
94
#define VSRB VX4(516)
95
#define VSRH VX4(580)
96
#define VSRW VX4(644)
97
+#define VSRD VX4(1732) /* v2.07 */
98
#define VSRAB VX4(772)
99
#define VSRAH VX4(836)
100
#define VSRAW VX4(900)
101
+#define VSRAD VX4(964) /* v2.07 */
102
#define VRLB VX4(4)
103
#define VRLH VX4(68)
104
#define VRLW VX4(132)
105
+#define VRLD VX4(196) /* v2.07 */
106
107
#define VMULEUB VX4(520)
108
#define VMULEUH VX4(584)
109
+#define VMULEUW VX4(648) /* v2.07 */
110
#define VMULOUB VX4(8)
111
#define VMULOUH VX4(72)
112
+#define VMULOUW VX4(136) /* v2.07 */
113
+#define VMULUWM VX4(137) /* v2.07 */
114
#define VMSUMUHM VX4(38)
115
116
#define VMRGHB VX4(12)
117
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
118
#define VNOR VX4(1284)
119
#define VOR VX4(1156)
120
#define VXOR VX4(1220)
121
+#define VEQV VX4(1668) /* v2.07 */
122
+#define VNAND VX4(1412) /* v2.07 */
123
+#define VORC VX4(1348) /* v2.07 */
124
125
#define VSPLTB VX4(524)
126
#define VSPLTH VX4(588)
127
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
128
case INDEX_op_andc_vec:
129
case INDEX_op_not_vec:
130
return 1;
131
+ case INDEX_op_orc_vec:
132
+ return have_isa_2_07;
133
case INDEX_op_add_vec:
134
case INDEX_op_sub_vec:
135
case INDEX_op_smax_vec:
136
case INDEX_op_smin_vec:
137
case INDEX_op_umax_vec:
138
case INDEX_op_umin_vec:
139
+ case INDEX_op_shlv_vec:
140
+ case INDEX_op_shrv_vec:
141
+ case INDEX_op_sarv_vec:
142
+ return vece <= MO_32 || have_isa_2_07;
143
case INDEX_op_ssadd_vec:
144
case INDEX_op_sssub_vec:
145
case INDEX_op_usadd_vec:
146
case INDEX_op_ussub_vec:
147
- case INDEX_op_shlv_vec:
148
- case INDEX_op_shrv_vec:
149
- case INDEX_op_sarv_vec:
150
return vece <= MO_32;
151
case INDEX_op_cmp_vec:
152
- case INDEX_op_mul_vec:
153
case INDEX_op_shli_vec:
154
case INDEX_op_shri_vec:
155
case INDEX_op_sari_vec:
156
- return vece <= MO_32 ? -1 : 0;
157
+ return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
158
+ case INDEX_op_mul_vec:
159
+ switch (vece) {
160
+ case MO_8:
161
+ case MO_16:
162
+ return -1;
163
+ case MO_32:
164
+ return have_isa_2_07 ? 1 : -1;
165
+ }
166
+ return 0;
167
case INDEX_op_bitsel_vec:
168
return have_vsx;
169
default:
170
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
171
const TCGArg *args, const int *const_args)
172
{
173
static const uint32_t
174
- add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
175
- sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
176
- eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
177
- gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
178
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
179
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
180
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
181
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
182
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
183
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
184
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
185
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
186
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
187
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
188
- umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
189
- smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
190
- umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
191
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
192
- shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
193
- shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
194
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
195
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
196
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
197
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
198
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
199
+ shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
200
+ shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
201
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
202
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
203
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
204
- muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
205
- mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
206
+ muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
207
+ mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
208
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
209
- rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
210
+ rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
211
212
TCGType type = vecl + TCG_TYPE_V64;
213
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
215
case INDEX_op_sub_vec:
216
insn = sub_op[vece];
217
break;
218
+ case INDEX_op_mul_vec:
219
+ tcg_debug_assert(vece == MO_32 && have_isa_2_07);
220
+ insn = VMULUWM;
221
+ break;
222
case INDEX_op_ssadd_vec:
223
insn = ssadd_op[vece];
224
break;
225
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
226
insn = VNOR;
227
a2 = a1;
228
break;
229
+ case INDEX_op_orc_vec:
230
+ insn = VORC;
231
+ break;
232
233
case INDEX_op_cmp_vec:
234
switch (args[3]) {
235
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
236
{
237
bool need_swap = false, need_inv = false;
238
239
- tcg_debug_assert(vece <= MO_32);
240
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
241
242
switch (cond) {
243
case TCG_COND_EQ:
244
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
245
    break;
246
247
case MO_32:
248
+ tcg_debug_assert(!have_isa_2_07);
249
t3 = tcg_temp_new_vec(type);
250
t4 = tcg_temp_new_vec(type);
251
tcg_gen_dupi_vec(MO_8, t4, -16);
252
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
253
if (hwcap & PPC_FEATURE_ARCH_2_06) {
254
have_isa = tcg_isa_2_06;
255
}
256
+#ifdef PPC_FEATURE2_ARCH_2_07
257
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
258
+ have_isa = tcg_isa_2_07;
259
+ }
260
+#endif
261
#ifdef PPC_FEATURE2_ARCH_3_00
262
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
263
have_isa = tcg_isa_3_00;
264
--
27
--
265
2.17.1
28
2.34.1
266
267
diff view generated by jsdifflib
1
Now that we have implemented the required tcg operations,
1
The microblaze architecture does not reorder instructions.
2
we can enable detection of host vector support.
2
While there is an MBAR wait-for-data-access instruction,
3
this concerns synchronizing with DMA.
3
4
4
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32)
5
This should have been defined when enabling MTTCG.
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Edgar E. Iglesias <edgar@zeroasic.com>
9
Fixes: d449561b130 ("configure: microblaze: Enable mttcg")
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
tcg/ppc/tcg-target.inc.c | 4 ++++
12
target/microblaze/cpu.h | 3 +++
9
1 file changed, 4 insertions(+)
13
1 file changed, 3 insertions(+)
10
14
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
15
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
17
--- a/target/microblaze/cpu.h
14
+++ b/tcg/ppc/tcg-target.inc.c
18
+++ b/target/microblaze/cpu.h
15
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
19
@@ -XXX,XX +XXX,XX @@
16
have_isel = have_isa_2_06;
20
#include "exec/cpu-defs.h"
17
#endif
21
#include "qemu/cpu-float.h"
18
22
19
+ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
23
+/* MicroBlaze is always in-order. */
20
+ have_altivec = true;
24
+#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL
21
+ }
22
+
25
+
23
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
26
typedef struct CPUArchState CPUMBState;
24
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
27
#if !defined(CONFIG_USER_ONLY)
25
if (have_altivec) {
28
#include "mmu.h"
26
--
29
--
27
2.17.1
30
2.34.1
28
31
29
32
diff view generated by jsdifflib
1
Add support for vector maximum/minimum using Altivec instructions
1
The virtio devices require proper memory ordering between
2
VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and
2
the vcpus and the iothreads.
3
VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW.
4
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
---
6
---
8
tcg/ppc/tcg-target.h | 2 +-
7
tcg/tcg-op.c | 14 +++++++++++++-
9
tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++-
8
1 file changed, 13 insertions(+), 1 deletion(-)
10
2 files changed, 40 insertions(+), 2 deletions(-)
11
9
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
10
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
12
--- a/tcg/tcg-op.c
15
+++ b/tcg/ppc/tcg-target.h
13
+++ b/tcg/tcg-op.c
16
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_br(TCGLabel *l)
17
#define TCG_TARGET_HAS_cmp_vec 1
15
18
#define TCG_TARGET_HAS_mul_vec 0
16
void tcg_gen_mb(TCGBar mb_type)
19
#define TCG_TARGET_HAS_sat_vec 0
17
{
20
-#define TCG_TARGET_HAS_minmax_vec 0
18
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {
21
+#define TCG_TARGET_HAS_minmax_vec 1
19
+#ifdef CONFIG_USER_ONLY
22
#define TCG_TARGET_HAS_bitsel_vec 0
20
+ bool parallel = tcg_ctx->gen_tb->cflags & CF_PARALLEL;
23
#define TCG_TARGET_HAS_cmpsel_vec 0
21
+#else
24
22
+ /*
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
23
+ * It is tempting to elide the barrier in a uniprocessor context.
26
index XXXXXXX..XXXXXXX 100644
24
+ * However, even with a single cpu we have i/o threads running in
27
--- a/tcg/ppc/tcg-target.inc.c
25
+ * parallel, and lack of memory order can result in e.g. virtio
28
+++ b/tcg/ppc/tcg-target.inc.c
26
+ * queue entries being read incorrectly.
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
27
+ */
30
#define STVX XO31(231)
28
+ bool parallel = true;
31
#define STVEWX XO31(199)
29
+#endif
32
33
+#define VMAXSB VX4(258)
34
+#define VMAXSH VX4(322)
35
+#define VMAXSW VX4(386)
36
+#define VMAXUB VX4(2)
37
+#define VMAXUH VX4(66)
38
+#define VMAXUW VX4(130)
39
+#define VMINSB VX4(770)
40
+#define VMINSH VX4(834)
41
+#define VMINSW VX4(898)
42
+#define VMINUB VX4(514)
43
+#define VMINUH VX4(578)
44
+#define VMINUW VX4(642)
45
+
30
+
46
#define VCMPEQUB VX4(6)
31
+ if (parallel) {
47
#define VCMPEQUH VX4(70)
32
tcg_gen_op1(INDEX_op_mb, mb_type);
48
#define VCMPEQUW VX4(134)
33
}
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
34
}
50
case INDEX_op_andc_vec:
51
case INDEX_op_not_vec:
52
return 1;
53
+ case INDEX_op_smax_vec:
54
+ case INDEX_op_smin_vec:
55
+ case INDEX_op_umax_vec:
56
+ case INDEX_op_umin_vec:
57
+ return vece <= MO_32;
58
case INDEX_op_cmp_vec:
59
return vece <= MO_32 ? -1 : 0;
60
default:
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
62
static const uint32_t
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
64
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
65
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
66
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
67
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
68
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
69
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
70
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
71
72
TCGType type = vecl + TCG_TYPE_V64;
73
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
76
return;
77
78
+ case INDEX_op_smin_vec:
79
+ insn = smin_op[vece];
80
+ break;
81
+ case INDEX_op_umin_vec:
82
+ insn = umin_op[vece];
83
+ break;
84
+ case INDEX_op_smax_vec:
85
+ insn = smax_op[vece];
86
+ break;
87
+ case INDEX_op_umax_vec:
88
+ insn = umax_op[vece];
89
+ break;
90
case INDEX_op_and_vec:
91
insn = VAND;
92
break;
93
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
case INDEX_op_andc_vec:
95
case INDEX_op_orc_vec:
96
case INDEX_op_cmp_vec:
97
+ case INDEX_op_smax_vec:
98
+ case INDEX_op_smin_vec:
99
+ case INDEX_op_umax_vec:
100
+ case INDEX_op_umin_vec:
101
return &v_v_v;
102
case INDEX_op_not_vec:
103
case INDEX_op_dup_vec:
104
--
35
--
105
2.17.1
36
2.34.1
106
37
107
38
diff view generated by jsdifflib
1
Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding
1
Bring the helpers into line with the rest of tcg in respecting
2
elements of Altivec instructions.
2
guest memory ordering.
3
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
6
---
7
tcg/ppc/tcg-target.inc.c | 5 +++++
7
accel/tcg/internal.h | 34 ++++++++++++++++++++++++++++++++++
8
1 file changed, 5 insertions(+)
8
accel/tcg/cputlb.c | 10 ++++++++++
9
9
accel/tcg/user-exec.c | 10 ++++++++++
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
3 files changed, 54 insertions(+)
11
12
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
14
--- a/accel/tcg/internal.h
13
+++ b/tcg/ppc/tcg-target.inc.c
15
+++ b/accel/tcg/internal.h
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
16
@@ -XXX,XX +XXX,XX @@ extern int64_t max_advance;
15
#define MB64(b) ((b)<<5)
17
16
#define FXM(b) (1 << (19 - (b)))
18
extern bool one_insn_per_tb;
17
19
18
+#define VRT(r) (((r) & 31) << 21)
20
+/**
19
+#define VRA(r) (((r) & 31) << 16)
21
+ * tcg_req_mo:
20
+#define VRB(r) (((r) & 31) << 11)
22
+ * @type: TCGBar
21
+#define VRC(r) (((r) & 31) << 6)
23
+ *
24
+ * Filter @type to the barrier that is required for the guest
25
+ * memory ordering vs the host memory ordering. A non-zero
26
+ * result indicates that some barrier is required.
27
+ *
28
+ * If TCG_GUEST_DEFAULT_MO is not defined, assume that the
29
+ * guest requires strict ordering.
30
+ *
31
+ * This is a macro so that it's constant even without optimization.
32
+ */
33
+#ifdef TCG_GUEST_DEFAULT_MO
34
+# define tcg_req_mo(type) \
35
+ ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO)
36
+#else
37
+# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO)
38
+#endif
22
+
39
+
23
#define LK 1
40
+/**
24
41
+ * cpu_req_mo:
25
#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
42
+ * @type: TCGBar
43
+ *
44
+ * If tcg_req_mo indicates a barrier for @type is required
45
+ * for the guest memory model, issue a host memory barrier.
46
+ */
47
+#define cpu_req_mo(type) \
48
+ do { \
49
+ if (tcg_req_mo(type)) { \
50
+ smp_mb(); \
51
+ } \
52
+ } while (0)
53
+
54
#endif /* ACCEL_TCG_INTERNAL_H */
55
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/accel/tcg/cputlb.c
58
+++ b/accel/tcg/cputlb.c
59
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
60
MMULookupLocals l;
61
bool crosspage;
62
63
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
64
crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
65
tcg_debug_assert(!crosspage);
66
67
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
68
uint16_t ret;
69
uint8_t a, b;
70
71
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
72
crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
73
if (likely(!crosspage)) {
74
return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
75
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
76
bool crosspage;
77
uint32_t ret;
78
79
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
80
crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
81
if (likely(!crosspage)) {
82
return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
83
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi,
84
bool crosspage;
85
uint64_t ret;
86
87
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
88
crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
89
if (likely(!crosspage)) {
90
return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
91
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
92
Int128 ret;
93
int first;
94
95
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
96
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
97
if (likely(!crosspage)) {
98
/* Perform the load host endian. */
99
@@ -XXX,XX +XXX,XX @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
100
bool crosspage;
101
102
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
103
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
104
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
105
tcg_debug_assert(!crosspage);
106
107
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val,
108
bool crosspage;
109
uint8_t a, b;
110
111
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
112
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
113
if (likely(!crosspage)) {
114
do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
115
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val,
116
MMULookupLocals l;
117
bool crosspage;
118
119
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
120
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
121
if (likely(!crosspage)) {
122
do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
123
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val,
124
MMULookupLocals l;
125
bool crosspage;
126
127
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
128
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
129
if (likely(!crosspage)) {
130
do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
131
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
132
uint64_t a, b;
133
int first;
134
135
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
136
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
137
if (likely(!crosspage)) {
138
/* Swap to host endian if necessary, then store. */
139
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/accel/tcg/user-exec.c
142
+++ b/accel/tcg/user-exec.c
143
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
144
uint8_t ret;
145
146
tcg_debug_assert((mop & MO_SIZE) == MO_8);
147
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
148
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
149
ret = ldub_p(haddr);
150
clear_helper_retaddr();
151
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
152
uint16_t ret;
153
154
tcg_debug_assert((mop & MO_SIZE) == MO_16);
155
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
156
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
157
ret = load_atom_2(env, ra, haddr, mop);
158
clear_helper_retaddr();
159
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
160
uint32_t ret;
161
162
tcg_debug_assert((mop & MO_SIZE) == MO_32);
163
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
164
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
165
ret = load_atom_4(env, ra, haddr, mop);
166
clear_helper_retaddr();
167
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
168
uint64_t ret;
169
170
tcg_debug_assert((mop & MO_SIZE) == MO_64);
171
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
172
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
173
ret = load_atom_8(env, ra, haddr, mop);
174
clear_helper_retaddr();
175
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
176
Int128 ret;
177
178
tcg_debug_assert((mop & MO_SIZE) == MO_128);
179
+ cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
180
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
181
ret = load_atom_16(env, ra, haddr, mop);
182
clear_helper_retaddr();
183
@@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
184
void *haddr;
185
186
tcg_debug_assert((mop & MO_SIZE) == MO_8);
187
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
188
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
189
stb_p(haddr, val);
190
clear_helper_retaddr();
191
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
192
void *haddr;
193
194
tcg_debug_assert((mop & MO_SIZE) == MO_16);
195
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
196
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
197
198
if (mop & MO_BSWAP) {
199
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
200
void *haddr;
201
202
tcg_debug_assert((mop & MO_SIZE) == MO_32);
203
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
204
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
205
206
if (mop & MO_BSWAP) {
207
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
208
void *haddr;
209
210
tcg_debug_assert((mop & MO_SIZE) == MO_64);
211
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
212
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
213
214
if (mop & MO_BSWAP) {
215
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
216
void *haddr;
217
218
tcg_debug_assert((mop & MO_SIZE) == MO_128);
219
+ cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
220
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
221
222
if (mop & MO_BSWAP) {
26
--
223
--
27
2.17.1
224
2.34.1
28
225
29
226
diff view generated by jsdifflib
1
For Altivec, this is done via vector shift by vector,
1
We now issue host memory barriers to match the guest memory order.
2
and loading the immediate into a register.
2
Continue to disable MTTCG only if the guest has not been ported.
3
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
6
---
7
tcg/ppc/tcg-target.h | 2 +-
7
accel/tcg/tcg-all.c | 39 ++++++++++-----------------------------
8
tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++--
8
1 file changed, 10 insertions(+), 29 deletions(-)
9
2 files changed, 57 insertions(+), 3 deletions(-)
10
9
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
10
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
12
--- a/accel/tcg/tcg-all.c
14
+++ b/tcg/ppc/tcg-target.h
13
+++ b/accel/tcg/tcg-all.c
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
14
@@ -XXX,XX +XXX,XX @@ DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
16
#define TCG_TARGET_HAS_abs_vec 0
15
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
17
#define TCG_TARGET_HAS_shi_vec 0
16
*
18
#define TCG_TARGET_HAS_shs_vec 0
17
* Once a guest architecture has been converted to the new primitives
19
-#define TCG_TARGET_HAS_shv_vec 0
18
- * there are two remaining limitations to check.
20
+#define TCG_TARGET_HAS_shv_vec 1
19
- *
21
#define TCG_TARGET_HAS_cmp_vec 1
20
- * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
22
#define TCG_TARGET_HAS_mul_vec 0
21
- * - The host must have a stronger memory order than the guest
23
#define TCG_TARGET_HAS_sat_vec 1
22
- *
24
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
23
- * It may be possible in future to support strong guests on weak hosts
25
index XXXXXXX..XXXXXXX 100644
24
- * but that will require tagging all load/stores in a guest with their
26
--- a/tcg/ppc/tcg-target.inc.c
25
- * implicit memory order requirements which would likely slow things
27
+++ b/tcg/ppc/tcg-target.inc.c
26
- * down a lot.
28
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
27
+ * there is one remaining limitation to check:
29
#define VCMPGTUH VX4(582)
28
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
30
#define VCMPGTUW VX4(646)
29
*/
31
30
32
+#define VSLB VX4(260)
31
-static bool check_tcg_memory_orders_compatible(void)
33
+#define VSLH VX4(324)
32
-{
34
+#define VSLW VX4(388)
33
-#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
35
+#define VSRB VX4(516)
34
- return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
36
+#define VSRH VX4(580)
35
-#else
37
+#define VSRW VX4(644)
36
- return false;
38
+#define VSRAB VX4(772)
37
-#endif
39
+#define VSRAH VX4(836)
38
-}
40
+#define VSRAW VX4(900)
39
-
41
+
40
static bool default_mttcg_enabled(void)
42
#define VAND VX4(1028)
41
{
43
#define VANDC VX4(1092)
42
if (icount_enabled() || TCG_OVERSIZED_GUEST) {
44
#define VNOR VX4(1284)
43
return false;
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
44
- } else {
46
case INDEX_op_sssub_vec:
45
-#ifdef TARGET_SUPPORTS_MTTCG
47
case INDEX_op_usadd_vec:
46
- return check_tcg_memory_orders_compatible();
48
case INDEX_op_ussub_vec:
47
-#else
49
+ case INDEX_op_shlv_vec:
48
- return false;
50
+ case INDEX_op_shrv_vec:
49
-#endif
51
+ case INDEX_op_sarv_vec:
50
}
52
return vece <= MO_32;
51
+#ifdef TARGET_SUPPORTS_MTTCG
53
case INDEX_op_cmp_vec:
52
+# ifndef TCG_GUEST_DEFAULT_MO
54
+ case INDEX_op_shli_vec:
53
+# error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO"
55
+ case INDEX_op_shri_vec:
54
+# endif
56
+ case INDEX_op_sari_vec:
55
+ return true;
57
return vece <= MO_32 ? -1 : 0;
56
+#else
58
default:
57
+ return false;
59
return 0;
58
+#endif
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
61
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
62
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
63
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
64
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
65
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
66
+ shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
67
+ shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
68
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
69
70
TCGType type = vecl + TCG_TYPE_V64;
71
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
73
case INDEX_op_umax_vec:
74
insn = umax_op[vece];
75
break;
76
+ case INDEX_op_shlv_vec:
77
+ insn = shlv_op[vece];
78
+ break;
79
+ case INDEX_op_shrv_vec:
80
+ insn = shrv_op[vece];
81
+ break;
82
+ case INDEX_op_sarv_vec:
83
+ insn = sarv_op[vece];
84
+ break;
85
case INDEX_op_and_vec:
86
insn = VAND;
87
break;
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
89
tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
90
}
59
}
91
60
92
+static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
61
static void tcg_accel_instance_init(Object *obj)
93
+ TCGv_vec v1, TCGArg imm, TCGOpcode opci)
62
@@ -XXX,XX +XXX,XX @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp)
94
+{
63
warn_report("Guest not yet converted to MTTCG - "
95
+ TCGv_vec t1 = tcg_temp_new_vec(type);
64
"you may get unexpected results");
96
+
65
#endif
97
+ /* Splat w/bytes for xxspltib. */
66
- if (!check_tcg_memory_orders_compatible()) {
98
+ tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
67
- warn_report("Guest expects a stronger memory ordering "
99
+ vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
68
- "than the host provides");
100
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
69
- error_printf("This may cause strange/hard to debug errors\n");
101
+ tcg_temp_free_vec(t1);
70
- }
102
+}
71
s->mttcg_enabled = true;
103
+
72
}
104
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
73
} else if (strcmp(value, "single") == 0) {
105
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
106
{
107
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
108
{
109
va_list va;
110
TCGv_vec v0, v1, v2;
111
+ TCGArg a2;
112
113
va_start(va, a0);
114
v0 = temp_tcgv_vec(arg_temp(a0));
115
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
116
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
117
+ a2 = va_arg(va, TCGArg);
118
119
switch (opc) {
120
+ case INDEX_op_shli_vec:
121
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
122
+ break;
123
+ case INDEX_op_shri_vec:
124
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
125
+ break;
126
+ case INDEX_op_sari_vec:
127
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
128
+ break;
129
case INDEX_op_cmp_vec:
130
+ v2 = temp_tcgv_vec(arg_temp(a2));
131
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
132
break;
133
default:
134
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
135
case INDEX_op_smin_vec:
136
case INDEX_op_umax_vec:
137
case INDEX_op_umin_vec:
138
+ case INDEX_op_shlv_vec:
139
+ case INDEX_op_shrv_vec:
140
+ case INDEX_op_sarv_vec:
141
return &v_v_v;
142
case INDEX_op_not_vec:
143
case INDEX_op_dup_vec:
144
--
74
--
145
2.17.1
75
2.34.1
146
76
147
77
diff view generated by jsdifflib
1
This is identical to have_isa_2_06, so replace it.
1
We have run out of bits we can use within the CPUTLBEntry comparators,
2
2
as TLB_FLAGS_MASK cannot overlap alignment.
3
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
4
Store slow_flags[] in CPUTLBEntryFull, and merge with the flags from
5
the comparator. A new TLB_FORCE_SLOW bit is set within the comparator
6
as an indication that the slow path must be used.
7
8
Move TLB_BSWAP to TLB_SLOW_FLAGS_MASK. Since we are out of bits,
9
we cannot create a new bit without moving an old one.
10
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
13
---
6
tcg/ppc/tcg-target.inc.c | 5 ++---
14
include/exec/cpu-all.h | 21 +++++++--
7
1 file changed, 2 insertions(+), 3 deletions(-)
15
include/exec/cpu-defs.h | 6 +++
8
16
include/hw/core/cpu.h | 1 +
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
17
accel/tcg/cputlb.c | 98 ++++++++++++++++++++++++-----------------
10
index XXXXXXX..XXXXXXX 100644
18
4 files changed, 82 insertions(+), 44 deletions(-)
11
--- a/tcg/ppc/tcg-target.inc.c
19
12
+++ b/tcg/ppc/tcg-target.inc.c
20
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
13
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
21
index XXXXXXX..XXXXXXX 100644
14
22
--- a/include/exec/cpu-all.h
15
TCGPowerISA have_isa;
23
+++ b/include/exec/cpu-all.h
16
24
@@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env);
17
-#define HAVE_ISA_2_06 have_isa_2_06
25
#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3))
18
#define HAVE_ISEL have_isa_2_06
26
/* Set if TLB entry contains a watchpoint. */
19
27
#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4))
20
#ifndef CONFIG_SOFTMMU
28
-/* Set if TLB entry requires byte swap. */
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
29
-#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5))
30
+/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
31
+#define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5))
32
/* Set if TLB entry writes ignored. */
33
#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6))
34
35
-/* Use this mask to check interception with an alignment mask
36
+/*
37
+ * Use this mask to check interception with an alignment mask
38
* in a TCG backend.
39
*/
40
#define TLB_FLAGS_MASK \
41
(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
42
- | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE)
43
+ | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
44
+
45
+/*
46
+ * Flags stored in CPUTLBEntryFull.slow_flags[x].
47
+ * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x].
48
+ */
49
+/* Set if TLB entry requires byte swap. */
50
+#define TLB_BSWAP (1 << 0)
51
+
52
+#define TLB_SLOW_FLAGS_MASK TLB_BSWAP
53
+
54
+/* The two sets of flags must not overlap. */
55
+QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
56
57
/**
58
* tlb_hit_page: return true if page aligned @addr is a hit against the
59
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
60
index XXXXXXX..XXXXXXX 100644
61
--- a/include/exec/cpu-defs.h
62
+++ b/include/exec/cpu-defs.h
63
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
64
/* @lg_page_size contains the log2 of the page size. */
65
uint8_t lg_page_size;
66
67
+ /*
68
+ * Additional tlb flags for use by the slow path. If non-zero,
69
+ * the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW.
70
+ */
71
+ uint8_t slow_flags[MMU_ACCESS_COUNT];
72
+
73
/*
74
* Allow target-specific additions to this structure.
75
* This may be used to cache items from the guest cpu
76
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/hw/core/cpu.h
79
+++ b/include/hw/core/cpu.h
80
@@ -XXX,XX +XXX,XX @@ typedef enum MMUAccessType {
81
MMU_DATA_LOAD = 0,
82
MMU_DATA_STORE = 1,
83
MMU_INST_FETCH = 2
84
+#define MMU_ACCESS_COUNT 3
85
} MMUAccessType;
86
87
typedef struct CPUWatchpoint CPUWatchpoint;
88
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/accel/tcg/cputlb.c
91
+++ b/accel/tcg/cputlb.c
92
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
93
env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
94
}
95
96
+static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent,
97
+ target_ulong address, int flags,
98
+ MMUAccessType access_type, bool enable)
99
+{
100
+ if (enable) {
101
+ address |= flags & TLB_FLAGS_MASK;
102
+ flags &= TLB_SLOW_FLAGS_MASK;
103
+ if (flags) {
104
+ address |= TLB_FORCE_SLOW;
105
+ }
106
+ } else {
107
+ address = -1;
108
+ flags = 0;
109
+ }
110
+ ent->addr_idx[access_type] = address;
111
+ full->slow_flags[access_type] = flags;
112
+}
113
+
114
/*
115
* Add a new TLB entry. At most one entry for a given virtual address
116
* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
117
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
118
CPUTLB *tlb = env_tlb(env);
119
CPUTLBDesc *desc = &tlb->d[mmu_idx];
120
MemoryRegionSection *section;
121
- unsigned int index;
122
- vaddr address;
123
- vaddr write_address;
124
+ unsigned int index, read_flags, write_flags;
125
uintptr_t addend;
126
CPUTLBEntry *te, tn;
127
hwaddr iotlb, xlat, sz, paddr_page;
128
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
129
" prot=%x idx=%d\n",
130
addr, full->phys_addr, prot, mmu_idx);
131
132
- address = addr_page;
133
+ read_flags = 0;
134
if (full->lg_page_size < TARGET_PAGE_BITS) {
135
/* Repeat the MMU check and TLB fill on every access. */
136
- address |= TLB_INVALID_MASK;
137
+ read_flags |= TLB_INVALID_MASK;
138
}
139
if (full->attrs.byte_swap) {
140
- address |= TLB_BSWAP;
141
+ read_flags |= TLB_BSWAP;
142
}
143
144
is_ram = memory_region_is_ram(section->mr);
145
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
146
addend = 0;
147
}
148
149
- write_address = address;
150
+ write_flags = read_flags;
151
if (is_ram) {
152
iotlb = memory_region_get_ram_addr(section->mr) + xlat;
153
/*
154
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
155
*/
156
if (prot & PAGE_WRITE) {
157
if (section->readonly) {
158
- write_address |= TLB_DISCARD_WRITE;
159
+ write_flags |= TLB_DISCARD_WRITE;
160
} else if (cpu_physical_memory_is_clean(iotlb)) {
161
- write_address |= TLB_NOTDIRTY;
162
+ write_flags |= TLB_NOTDIRTY;
163
}
22
}
164
}
23
} else {
165
} else {
24
uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
166
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
25
- if (!HAVE_ISA_2_06 && insn == LDBRX) {
167
* Reads to romd devices go through the ram_ptr found above,
26
+ if (!have_isa_2_06 && insn == LDBRX) {
168
* but of course reads to I/O must go through MMIO.
27
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
169
*/
28
tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
170
- write_address |= TLB_MMIO;
29
tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
171
+ write_flags |= TLB_MMIO;
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
172
if (!is_romd) {
173
- address = write_address;
174
+ read_flags = write_flags;
31
}
175
}
32
} else {
176
}
33
uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
177
34
- if (!HAVE_ISA_2_06 && insn == STDBRX) {
178
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
35
+ if (!have_isa_2_06 && insn == STDBRX) {
179
* TARGET_PAGE_BITS, and either
36
tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
180
* + the ram_addr_t of the page base of the target RAM (RAM)
37
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
181
* + the offset within section->mr of the page base (I/O, ROMD)
38
tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
182
- * We subtract the vaddr_page (which is page aligned and thus won't
183
+ * We subtract addr_page (which is page aligned and thus won't
184
* disturb the low bits) to give an offset which can be added to the
185
* (non-page-aligned) vaddr of the eventual memory access to get
186
* the MemoryRegion offset for the access. Note that the vaddr we
187
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
188
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
189
*/
190
desc->fulltlb[index] = *full;
191
- desc->fulltlb[index].xlat_section = iotlb - addr_page;
192
- desc->fulltlb[index].phys_addr = paddr_page;
193
+ full = &desc->fulltlb[index];
194
+ full->xlat_section = iotlb - addr_page;
195
+ full->phys_addr = paddr_page;
196
197
/* Now calculate the new entry */
198
tn.addend = addend - addr_page;
199
- if (prot & PAGE_READ) {
200
- tn.addr_read = address;
201
- if (wp_flags & BP_MEM_READ) {
202
- tn.addr_read |= TLB_WATCHPOINT;
203
- }
204
- } else {
205
- tn.addr_read = -1;
206
- }
207
208
- if (prot & PAGE_EXEC) {
209
- tn.addr_code = address;
210
- } else {
211
- tn.addr_code = -1;
212
- }
213
+ tlb_set_compare(full, &tn, addr_page, read_flags,
214
+ MMU_INST_FETCH, prot & PAGE_EXEC);
215
216
- tn.addr_write = -1;
217
- if (prot & PAGE_WRITE) {
218
- tn.addr_write = write_address;
219
- if (prot & PAGE_WRITE_INV) {
220
- tn.addr_write |= TLB_INVALID_MASK;
221
- }
222
- if (wp_flags & BP_MEM_WRITE) {
223
- tn.addr_write |= TLB_WATCHPOINT;
224
- }
225
+ if (wp_flags & BP_MEM_READ) {
226
+ read_flags |= TLB_WATCHPOINT;
227
}
228
+ tlb_set_compare(full, &tn, addr_page, read_flags,
229
+ MMU_DATA_LOAD, prot & PAGE_READ);
230
+
231
+ if (prot & PAGE_WRITE_INV) {
232
+ write_flags |= TLB_INVALID_MASK;
233
+ }
234
+ if (wp_flags & BP_MEM_WRITE) {
235
+ write_flags |= TLB_WATCHPOINT;
236
+ }
237
+ tlb_set_compare(full, &tn, addr_page, write_flags,
238
+ MMU_DATA_STORE, prot & PAGE_WRITE);
239
240
copy_tlb_helper_locked(te, &tn);
241
tlb_n_used_entries_inc(env, mmu_idx);
242
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, vaddr addr,
243
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
244
uint64_t tlb_addr = tlb_read_idx(entry, access_type);
245
vaddr page_addr = addr & TARGET_PAGE_MASK;
246
- int flags = TLB_FLAGS_MASK;
247
+ int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
248
+ CPUTLBEntryFull *full;
249
250
if (!tlb_hit_page(tlb_addr, page_addr)) {
251
if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
252
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, vaddr addr,
253
}
254
flags &= tlb_addr;
255
256
- *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
257
+ *pfull = full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
258
+ flags |= full->slow_flags[access_type];
259
260
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
261
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
262
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
263
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
264
uint64_t tlb_addr = tlb_read_idx(entry, access_type);
265
bool maybe_resized = false;
266
+ CPUTLBEntryFull *full;
267
+ int flags;
268
269
/* If the TLB entry is for a different page, reload and try again. */
270
if (!tlb_hit(tlb_addr, addr)) {
271
@@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
272
tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
273
}
274
275
- data->flags = tlb_addr & TLB_FLAGS_MASK;
276
- data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
277
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
278
+ flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW);
279
+ flags |= full->slow_flags[access_type];
280
+
281
+ data->full = full;
282
+ data->flags = flags;
283
/* Compute haddr speculatively; depending on flags it might be invalid. */
284
data->haddr = (void *)((uintptr_t)addr + entry->addend);
285
39
--
286
--
40
2.17.1
287
2.34.1
41
288
42
289
diff view generated by jsdifflib
1
The VSX instruction set instructions include double-word loads and
1
This frees up one bit of the primary tlb flags without
2
stores, double-word load and splat, double-word permute, and bit
2
impacting the TLB_NOTDIRTY logic.
3
select. All of which require multiple operations in the Altivec
4
instruction set.
5
3
6
Because the VSX registers map %vsr32 to %vr0, and we have no current
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
intention or need to use vector registers outside %vr0-%vr19, force
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't
6
---
9
have to otherwise modify the VR[TABC] macros.
7
include/exec/cpu-all.h | 8 ++++----
8
accel/tcg/cputlb.c | 18 ++++++++++++++----
9
2 files changed, 18 insertions(+), 8 deletions(-)
10
10
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
12
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
---
14
tcg/ppc/tcg-target.h | 5 ++--
15
tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++----
16
2 files changed, 51 insertions(+), 6 deletions(-)
17
18
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/ppc/tcg-target.h
13
--- a/include/exec/cpu-all.h
21
+++ b/tcg/ppc/tcg-target.h
14
+++ b/include/exec/cpu-all.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
@@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env);
23
16
#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2))
24
extern TCGPowerISA have_isa;
17
/* Set if TLB entry is an IO callback. */
25
extern bool have_altivec;
18
#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3))
26
+extern bool have_vsx;
19
-/* Set if TLB entry contains a watchpoint. */
27
20
-#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4))
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
21
/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
29
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
22
#define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5))
30
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
23
/* Set if TLB entry writes ignored. */
31
* instruction and substituting two 32-bit stores makes the generated
24
@@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env);
32
* code quite large.
33
*/
25
*/
34
-#define TCG_TARGET_HAS_v64 0
26
#define TLB_FLAGS_MASK \
35
+#define TCG_TARGET_HAS_v64 have_vsx
27
(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
36
#define TCG_TARGET_HAS_v128 have_altivec
28
- | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
37
#define TCG_TARGET_HAS_v256 0
29
+ | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
38
30
39
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
31
/*
40
#define TCG_TARGET_HAS_mul_vec 1
32
* Flags stored in CPUTLBEntryFull.slow_flags[x].
41
#define TCG_TARGET_HAS_sat_vec 1
33
@@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env);
42
#define TCG_TARGET_HAS_minmax_vec 1
34
*/
43
-#define TCG_TARGET_HAS_bitsel_vec 0
35
/* Set if TLB entry requires byte swap. */
44
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
36
#define TLB_BSWAP (1 << 0)
45
#define TCG_TARGET_HAS_cmpsel_vec 0
37
+/* Set if TLB entry contains a watchpoint. */
46
38
+#define TLB_WATCHPOINT (1 << 1)
47
void flush_icache_range(uintptr_t start, uintptr_t stop);
39
48
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
40
-#define TLB_SLOW_FLAGS_MASK TLB_BSWAP
41
+#define TLB_SLOW_FLAGS_MASK (TLB_BSWAP | TLB_WATCHPOINT)
42
43
/* The two sets of flags must not overlap. */
44
QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
45
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
49
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/ppc/tcg-target.inc.c
47
--- a/accel/tcg/cputlb.c
51
+++ b/tcg/ppc/tcg-target.inc.c
48
+++ b/accel/tcg/cputlb.c
52
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
49
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
53
TCGPowerISA have_isa;
50
*/
54
static bool have_isel;
51
goto stop_the_world;
55
bool have_altivec;
52
}
56
+bool have_vsx;
53
- /* Collect TLB_WATCHPOINT for read. */
57
54
+ /* Collect tlb flags for read. */
58
#ifndef CONFIG_SOFTMMU
55
tlb_addr |= tlbe->addr_read;
59
#define TCG_GUEST_BASE_REG 30
56
60
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
57
/* Notice an IO access or a needs-MMU-lookup access */
61
#define LVEBX XO31(7)
58
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
62
#define LVEHX XO31(39)
59
notdirty_write(env_cpu(env), addr, size, full, retaddr);
63
#define LVEWX XO31(71)
60
}
64
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
61
65
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
62
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
66
63
- cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs,
67
#define STVX XO31(231)
64
- BP_MEM_READ | BP_MEM_WRITE, retaddr);
68
#define STVEWX XO31(199)
65
+ if (unlikely(tlb_addr & TLB_FORCE_SLOW)) {
69
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
66
+ int wp_flags = 0;
70
71
#define VADDSBS VX4(768)
72
#define VADDUBS VX4(512)
73
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
74
75
#define VSLDOI VX4(44)
76
77
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
78
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
79
+
67
+
80
#define RT(r) ((r)<<21)
68
+ if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) {
81
#define RS(r) ((r)<<21)
69
+ wp_flags |= BP_MEM_WRITE;
82
#define RA(r) ((r)<<16)
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
84
add = 0;
85
}
86
87
- load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
88
- if (TCG_TARGET_REG_BITS == 64) {
89
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
90
+ if (have_vsx) {
91
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
92
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
93
+ if (TCG_TARGET_REG_BITS == 64) {
94
+ new_pool_label(s, val, rel, s->code_ptr, add);
95
+ } else {
96
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
97
+ }
70
+ }
98
} else {
71
+ if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) {
99
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
72
+ wp_flags |= BP_MEM_READ;
100
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
73
+ }
101
+ if (TCG_TARGET_REG_BITS == 64) {
74
+ if (wp_flags) {
102
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
75
+ cpu_check_watchpoint(env_cpu(env), addr, size,
103
+ } else {
76
+ full->attrs, wp_flags, retaddr);
104
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
105
+ }
77
+ }
106
}
78
}
107
79
108
if (USE_REG_TB) {
80
return hostaddr;
109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
110
/* fallthru */
111
case TCG_TYPE_V64:
112
tcg_debug_assert(ret >= TCG_REG_V0);
113
+ if (have_vsx) {
114
+ tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
115
+ break;
116
+ }
117
tcg_debug_assert((offset & 7) == 0);
118
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
119
if (offset & 8) {
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
121
/* fallthru */
122
case TCG_TYPE_V64:
123
tcg_debug_assert(arg >= TCG_REG_V0);
124
+ if (have_vsx) {
125
+ tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
126
+ break;
127
+ }
128
tcg_debug_assert((offset & 7) == 0);
129
if (offset & 8) {
130
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
131
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
132
case INDEX_op_shri_vec:
133
case INDEX_op_sari_vec:
134
return vece <= MO_32 ? -1 : 0;
135
+ case INDEX_op_bitsel_vec:
136
+ return have_vsx;
137
default:
138
return 0;
139
}
140
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
142
break;
143
case MO_64:
144
+ if (have_vsx) {
145
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
146
+ break;
147
+ }
148
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
149
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
150
break;
151
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
153
break;
154
case MO_64:
155
+ if (have_vsx) {
156
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
157
+ break;
158
+ }
159
tcg_debug_assert((offset & 7) == 0);
160
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
161
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
162
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
163
}
164
break;
165
166
+ case INDEX_op_bitsel_vec:
167
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
168
+ return;
169
+
170
case INDEX_op_dup2_vec:
171
assert(TCG_TARGET_REG_BITS == 32);
172
/* With inputs a1 = xLxx, a2 = xHxx */
173
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
174
case INDEX_op_st_vec:
175
case INDEX_op_dupm_vec:
176
return &v_r;
177
+ case INDEX_op_bitsel_vec:
178
case INDEX_op_ppc_msum_vec:
179
return &v_v_v_v;
180
181
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
182
183
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
184
have_altivec = true;
185
+ /* We only care about the portion of VSX that overlaps Altivec. */
186
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
187
+ have_vsx = true;
188
+ }
189
}
190
191
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
192
--
81
--
193
2.17.1
82
2.34.1
194
83
195
84
diff view generated by jsdifflib
1
Introduce an enum to hold base < 2.06 < 3.00. Use macros to
1
Move to fill a hole in the set of bits.
2
preserve the existing have_isa_2_06 and have_isa_3_00 predicates.
2
Reduce the total number of tlb bits by 1.
3
3
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/ppc/tcg-target.h | 12 ++++++++++--
7
include/exec/cpu-all.h | 4 ++--
8
tcg/ppc/tcg-target.inc.c | 8 ++++----
8
tcg/tcg-op-ldst.c | 2 +-
9
2 files changed, 14 insertions(+), 6 deletions(-)
9
2 files changed, 3 insertions(+), 3 deletions(-)
10
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
13
--- a/include/exec/cpu-all.h
14
+++ b/tcg/ppc/tcg-target.h
14
+++ b/include/exec/cpu-all.h
15
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
@@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env);
16
TCG_AREG0 = TCG_REG_R27
16
#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2))
17
} TCGReg;
17
/* Set if TLB entry is an IO callback. */
18
18
#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3))
19
-extern bool have_isa_2_06;
19
+/* Set if TLB entry writes ignored. */
20
-extern bool have_isa_3_00;
20
+#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 4))
21
+typedef enum {
21
/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
22
+ tcg_isa_base,
22
#define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5))
23
+ tcg_isa_2_06,
23
-/* Set if TLB entry writes ignored. */
24
+ tcg_isa_3_00,
24
-#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6))
25
+} TCGPowerISA;
25
26
+
26
/*
27
+extern TCGPowerISA have_isa;
27
* Use this mask to check interception with an alignment mask
28
+
28
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
29
+#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
30
+#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
34
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
35
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/ppc/tcg-target.inc.c
30
--- a/tcg/tcg-op-ldst.c
37
+++ b/tcg/ppc/tcg-target.inc.c
31
+++ b/tcg/tcg-op-ldst.c
38
@@ -XXX,XX +XXX,XX @@
32
@@ -XXX,XX +XXX,XX @@ static void check_max_alignment(unsigned a_bits)
39
33
* The requested alignment cannot overlap the TLB flags.
40
static tcg_insn_unit *tb_ret_addr;
34
* FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
41
35
*/
42
-bool have_isa_2_06;
36
- tcg_debug_assert(a_bits + 6 <= tcg_ctx->page_bits);
43
-bool have_isa_3_00;
37
+ tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
44
+TCGPowerISA have_isa;
45
46
#define HAVE_ISA_2_06 have_isa_2_06
47
#define HAVE_ISEL have_isa_2_06
48
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
49
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
50
unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
51
52
+ have_isa = tcg_isa_base;
53
if (hwcap & PPC_FEATURE_ARCH_2_06) {
54
- have_isa_2_06 = true;
55
+ have_isa = tcg_isa_2_06;
56
}
57
#ifdef PPC_FEATURE2_ARCH_3_00
58
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
59
- have_isa_3_00 = true;
60
+ have_isa = tcg_isa_3_00;
61
}
62
#endif
38
#endif
39
}
63
40
64
--
41
--
65
2.17.1
42
2.34.1
66
43
67
44
diff view generated by jsdifflib