1 | The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb: | 1 | The following changes since commit 390e8fc6b0e7b521c9eceb8dfe0958e141009ab9: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100) | 3 | Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging (2023-06-26 16:05:45 +0200) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20191013 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230626 |
8 | 8 | ||
9 | for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328: | 9 | for you to fetch changes up to a0eaae08c7c6a59c185cf646b02f4167b2ac6ec0: |
10 | 10 | ||
11 | cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400) | 11 | accel/tcg: Renumber TLB_DISCARD_WRITE (2023-06-26 17:33:00 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Host vector support for tcg/ppc. | 14 | accel/tcg: Replace target_ulong in some APIs |
15 | Fix thread=single cpu kicking. | 15 | accel/tcg: Remove CONFIG_PROFILER |
16 | accel/tcg: Store some tlb flags in CPUTLBEntryFull | ||
17 | tcg: Issue memory barriers as required for the guest memory model | ||
18 | tcg: Fix temporary variable in tcg_gen_gvec_andcs | ||
16 | 19 | ||
17 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
18 | Alex Bennée (1): | 21 | Alex Bennée (1): |
19 | cpus: kick all vCPUs when running thread=single | 22 | softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining |
20 | 23 | ||
21 | Richard Henderson (22): | 24 | Anton Johansson (11): |
22 | tcg/ppc: Introduce Altivec registers | 25 | accel: Replace target_ulong in tlb_*() |
23 | tcg/ppc: Introduce macro VX4() | 26 | accel/tcg/translate-all.c: Widen pc and cs_base |
24 | tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() | 27 | target: Widen pc/cs_base in cpu_get_tb_cpu_state |
25 | tcg/ppc: Create TCGPowerISA and have_isa | 28 | accel/tcg/cputlb.c: Widen CPUTLBEntry access functions |
26 | tcg/ppc: Replace HAVE_ISA_2_06 | 29 | accel/tcg/cputlb.c: Widen addr in MMULookupPageData |
27 | tcg/ppc: Replace HAVE_ISEL macro with a variable | 30 | accel/tcg/cpu-exec.c: Widen pc to vaddr |
28 | tcg/ppc: Enable tcg backend vector compilation | 31 | accel/tcg: Widen pc to vaddr in CPUJumpCache |
29 | tcg/ppc: Add support for load/store/logic/comparison | 32 | accel: Replace target_ulong with vaddr in probe_*() |
30 | tcg/ppc: Add support for vector maximum/minimum | 33 | accel/tcg: Replace target_ulong with vaddr in *_mmu_lookup() |
31 | tcg/ppc: Add support for vector add/subtract | 34 | accel/tcg: Replace target_ulong with vaddr in translator_*() |
32 | tcg/ppc: Add support for vector saturated add/subtract | 35 | cpu: Replace target_ulong with hwaddr in tb_invalidate_phys_addr() |
33 | tcg/ppc: Support vector shift by immediate | ||
34 | tcg/ppc: Support vector multiply | ||
35 | tcg/ppc: Support vector dup2 | ||
36 | tcg/ppc: Enable Altivec detection | ||
37 | tcg/ppc: Update vector support for VSX | ||
38 | tcg/ppc: Update vector support for v2.07 Altivec | ||
39 | tcg/ppc: Update vector support for v2.07 VSX | ||
40 | tcg/ppc: Update vector support for v2.07 FP | ||
41 | tcg/ppc: Update vector support for v3.00 Altivec | ||
42 | tcg/ppc: Update vector support for v3.00 load/store | ||
43 | tcg/ppc: Update vector support for v3.00 dup/dupi | ||
44 | 36 | ||
45 | tcg/ppc/tcg-target.h | 51 ++- | 37 | Fei Wu (1): |
46 | tcg/ppc/tcg-target.opc.h | 13 + | 38 | accel/tcg: remove CONFIG_PROFILER |
47 | cpus.c | 24 +- | ||
48 | tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++---- | ||
49 | 4 files changed, 1119 insertions(+), 87 deletions(-) | ||
50 | create mode 100644 tcg/ppc/tcg-target.opc.h | ||
51 | 39 | ||
40 | Max Chou (1): | ||
41 | tcg: Fix temporary variable in tcg_gen_gvec_andcs | ||
42 | |||
43 | Richard Henderson (8): | ||
44 | tests/plugin: Remove duplicate insn log from libinsn.so | ||
45 | target/microblaze: Define TCG_GUEST_DEFAULT_MO | ||
46 | tcg: Do not elide memory barriers for !CF_PARALLEL in system mode | ||
47 | tcg: Add host memory barriers to cpu_ldst.h interfaces | ||
48 | accel/tcg: Remove check_tcg_memory_orders_compatible | ||
49 | accel/tcg: Store some tlb flags in CPUTLBEntryFull | ||
50 | accel/tcg: Move TLB_WATCHPOINT to TLB_SLOW_FLAGS_MASK | ||
51 | accel/tcg: Renumber TLB_DISCARD_WRITE | ||
52 | |||
53 | meson.build | 2 - | ||
54 | qapi/machine.json | 18 -- | ||
55 | accel/tcg/internal.h | 40 +++- | ||
56 | accel/tcg/tb-hash.h | 12 +- | ||
57 | accel/tcg/tb-jmp-cache.h | 2 +- | ||
58 | include/exec/cpu-all.h | 27 ++- | ||
59 | include/exec/cpu-defs.h | 10 +- | ||
60 | include/exec/cpu_ldst.h | 10 +- | ||
61 | include/exec/exec-all.h | 95 +++++---- | ||
62 | include/exec/translator.h | 6 +- | ||
63 | include/hw/core/cpu.h | 1 + | ||
64 | include/qemu/plugin-memory.h | 2 +- | ||
65 | include/qemu/timer.h | 9 - | ||
66 | include/tcg/tcg.h | 26 --- | ||
67 | target/alpha/cpu.h | 4 +- | ||
68 | target/arm/cpu.h | 4 +- | ||
69 | target/avr/cpu.h | 4 +- | ||
70 | target/cris/cpu.h | 4 +- | ||
71 | target/hexagon/cpu.h | 4 +- | ||
72 | target/hppa/cpu.h | 5 +- | ||
73 | target/i386/cpu.h | 4 +- | ||
74 | target/loongarch/cpu.h | 6 +- | ||
75 | target/m68k/cpu.h | 4 +- | ||
76 | target/microblaze/cpu.h | 7 +- | ||
77 | target/mips/cpu.h | 4 +- | ||
78 | target/nios2/cpu.h | 4 +- | ||
79 | target/openrisc/cpu.h | 5 +- | ||
80 | target/ppc/cpu.h | 8 +- | ||
81 | target/riscv/cpu.h | 4 +- | ||
82 | target/rx/cpu.h | 4 +- | ||
83 | target/s390x/cpu.h | 4 +- | ||
84 | target/sh4/cpu.h | 4 +- | ||
85 | target/sparc/cpu.h | 4 +- | ||
86 | target/tricore/cpu.h | 4 +- | ||
87 | target/xtensa/cpu.h | 4 +- | ||
88 | accel/stubs/tcg-stub.c | 6 +- | ||
89 | accel/tcg/cpu-exec.c | 43 ++-- | ||
90 | accel/tcg/cputlb.c | 351 +++++++++++++++++-------------- | ||
91 | accel/tcg/monitor.c | 31 --- | ||
92 | accel/tcg/tb-maint.c | 2 +- | ||
93 | accel/tcg/tcg-accel-ops.c | 10 - | ||
94 | accel/tcg/tcg-all.c | 39 +--- | ||
95 | accel/tcg/translate-all.c | 46 +--- | ||
96 | accel/tcg/translator.c | 10 +- | ||
97 | accel/tcg/user-exec.c | 24 ++- | ||
98 | cpu.c | 2 +- | ||
99 | fpu/softfloat.c | 22 +- | ||
100 | softmmu/runstate.c | 9 - | ||
101 | target/arm/helper.c | 4 +- | ||
102 | target/ppc/helper_regs.c | 4 +- | ||
103 | target/riscv/cpu_helper.c | 4 +- | ||
104 | tcg/tcg-op-gvec.c | 2 +- | ||
105 | tcg/tcg-op-ldst.c | 2 +- | ||
106 | tcg/tcg-op.c | 14 +- | ||
107 | tcg/tcg.c | 214 ------------------- | ||
108 | tests/plugin/insn.c | 9 +- | ||
109 | tests/qtest/qmp-cmd-test.c | 3 - | ||
110 | hmp-commands-info.hx | 15 -- | ||
111 | meson_options.txt | 2 - | ||
112 | scripts/meson-buildoptions.sh | 3 - | ||
113 | tests/tcg/i386/Makefile.softmmu-target | 9 - | ||
114 | tests/tcg/i386/Makefile.target | 6 - | ||
115 | tests/tcg/x86_64/Makefile.softmmu-target | 9 - | ||
116 | 63 files changed, 469 insertions(+), 781 deletions(-) | ||
117 | diff view generated by jsdifflib |
1 | These new instructions are conditional on MSR.VEC for TX=1, | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | so we can consider these Altivec instructions. | ||
3 | 2 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | Replaces target_ulong with vaddr for guest virtual addresses in tlb_*() |
4 | functions and auxilliary structs. | ||
5 | |||
6 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230621135633.1649-2-anjo@rev.ng> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 10 | --- |
7 | tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++-- | 11 | include/exec/cpu-defs.h | 4 +- |
8 | 1 file changed, 26 insertions(+), 2 deletions(-) | 12 | include/exec/exec-all.h | 79 ++++++++-------- |
13 | include/qemu/plugin-memory.h | 2 +- | ||
14 | accel/stubs/tcg-stub.c | 2 +- | ||
15 | accel/tcg/cputlb.c | 177 +++++++++++++++++------------------ | ||
16 | accel/tcg/tb-maint.c | 2 +- | ||
17 | 6 files changed, 131 insertions(+), 135 deletions(-) | ||
9 | 18 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 19 | diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h |
11 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 21 | --- a/include/exec/cpu-defs.h |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 22 | +++ b/include/exec/cpu-defs.h |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 23 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc { |
15 | 24 | * we must flush the entire tlb. The region is matched if | |
16 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | 25 | * (addr & large_page_mask) == large_page_addr. |
17 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | 26 | */ |
18 | +#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ | 27 | - target_ulong large_page_addr; |
19 | 28 | - target_ulong large_page_mask; | |
20 | #define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | 29 | + vaddr large_page_addr; |
21 | #define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | 30 | + vaddr large_page_mask; |
22 | #define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | 31 | /* host time (in ns) at the beginning of the time window */ |
23 | #define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | 32 | int64_t window_begin_ns; |
24 | +#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ | 33 | /* maximum number of entries observed in the window */ |
25 | +#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ | 34 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h |
26 | 35 | index XXXXXXX..XXXXXXX 100644 | |
27 | #define RT(r) ((r)<<21) | 36 | --- a/include/exec/exec-all.h |
28 | #define RS(r) ((r)<<21) | 37 | +++ b/include/exec/exec-all.h |
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | 38 | @@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu); |
30 | return; | 39 | * Flush one page from the TLB of the specified CPU, for all |
40 | * MMU indexes. | ||
41 | */ | ||
42 | -void tlb_flush_page(CPUState *cpu, target_ulong addr); | ||
43 | +void tlb_flush_page(CPUState *cpu, vaddr addr); | ||
44 | /** | ||
45 | * tlb_flush_page_all_cpus: | ||
46 | * @cpu: src CPU of the flush | ||
47 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr); | ||
48 | * Flush one page from the TLB of the specified CPU, for all | ||
49 | * MMU indexes. | ||
50 | */ | ||
51 | -void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); | ||
52 | +void tlb_flush_page_all_cpus(CPUState *src, vaddr addr); | ||
53 | /** | ||
54 | * tlb_flush_page_all_cpus_synced: | ||
55 | * @cpu: src CPU of the flush | ||
56 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); | ||
57 | * the source vCPUs safe work is complete. This will depend on when | ||
58 | * the guests translation ends the TB. | ||
59 | */ | ||
60 | -void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr); | ||
61 | +void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr); | ||
62 | /** | ||
63 | * tlb_flush: | ||
64 | * @cpu: CPU whose TLB should be flushed | ||
65 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu); | ||
66 | * Flush one page from the TLB of the specified CPU, for the specified | ||
67 | * MMU indexes. | ||
68 | */ | ||
69 | -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
70 | +void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, | ||
71 | uint16_t idxmap); | ||
72 | /** | ||
73 | * tlb_flush_page_by_mmuidx_all_cpus: | ||
74 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
75 | * Flush one page from the TLB of all CPUs, for the specified | ||
76 | * MMU indexes. | ||
77 | */ | ||
78 | -void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, | ||
79 | +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, | ||
80 | uint16_t idxmap); | ||
81 | /** | ||
82 | * tlb_flush_page_by_mmuidx_all_cpus_synced: | ||
83 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, | ||
84 | * complete once the source vCPUs safe work is complete. This will | ||
85 | * depend on when the guests translation ends the TB. | ||
86 | */ | ||
87 | -void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, | ||
88 | +void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr, | ||
89 | uint16_t idxmap); | ||
90 | /** | ||
91 | * tlb_flush_by_mmuidx: | ||
92 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); | ||
93 | * | ||
94 | * Similar to tlb_flush_page_mask, but with a bitmap of indexes. | ||
95 | */ | ||
96 | -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
97 | +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr, | ||
98 | uint16_t idxmap, unsigned bits); | ||
99 | |||
100 | /* Similarly, with broadcast and syncing. */ | ||
101 | -void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, | ||
102 | +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, | ||
103 | uint16_t idxmap, unsigned bits); | ||
104 | void tlb_flush_page_bits_by_mmuidx_all_cpus_synced | ||
105 | - (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); | ||
106 | + (CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits); | ||
107 | |||
108 | /** | ||
109 | * tlb_flush_range_by_mmuidx | ||
110 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced | ||
111 | * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len), | ||
112 | * comparing only the low @bits worth of each virtual page. | ||
113 | */ | ||
114 | -void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
115 | - target_ulong len, uint16_t idxmap, | ||
116 | +void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, | ||
117 | + vaddr len, uint16_t idxmap, | ||
118 | unsigned bits); | ||
119 | |||
120 | /* Similarly, with broadcast and syncing. */ | ||
121 | -void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, | ||
122 | - target_ulong len, uint16_t idxmap, | ||
123 | +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, | ||
124 | + vaddr len, uint16_t idxmap, | ||
125 | unsigned bits); | ||
126 | void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
127 | - target_ulong addr, | ||
128 | - target_ulong len, | ||
129 | + vaddr addr, | ||
130 | + vaddr len, | ||
131 | uint16_t idxmap, | ||
132 | unsigned bits); | ||
133 | |||
134 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
135 | * tlb_set_page_full: | ||
136 | * @cpu: CPU context | ||
137 | * @mmu_idx: mmu index of the tlb to modify | ||
138 | - * @vaddr: virtual address of the entry to add | ||
139 | + * @addr: virtual address of the entry to add | ||
140 | * @full: the details of the tlb entry | ||
141 | * | ||
142 | * Add an entry to @cpu tlb index @mmu_idx. All of the fields of | ||
143 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
144 | * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only | ||
145 | * used by tlb_flush_page. | ||
146 | */ | ||
147 | -void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, | ||
148 | +void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr, | ||
149 | CPUTLBEntryFull *full); | ||
150 | |||
151 | /** | ||
152 | * tlb_set_page_with_attrs: | ||
153 | * @cpu: CPU to add this TLB entry for | ||
154 | - * @vaddr: virtual address of page to add entry for | ||
155 | + * @addr: virtual address of page to add entry for | ||
156 | * @paddr: physical address of the page | ||
157 | * @attrs: memory transaction attributes | ||
158 | * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits) | ||
159 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, | ||
160 | * @size: size of the page in bytes | ||
161 | * | ||
162 | * Add an entry to this CPU's TLB (a mapping from virtual address | ||
163 | - * @vaddr to physical address @paddr) with the specified memory | ||
164 | + * @addr to physical address @paddr) with the specified memory | ||
165 | * transaction attributes. This is generally called by the target CPU | ||
166 | * specific code after it has been called through the tlb_fill() | ||
167 | * entry point and performed a successful page table walk to find | ||
168 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, | ||
169 | * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only | ||
170 | * used by tlb_flush_page. | ||
171 | */ | ||
172 | -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
173 | +void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, | ||
174 | hwaddr paddr, MemTxAttrs attrs, | ||
175 | - int prot, int mmu_idx, target_ulong size); | ||
176 | + int prot, int mmu_idx, vaddr size); | ||
177 | /* tlb_set_page: | ||
178 | * | ||
179 | * This function is equivalent to calling tlb_set_page_with_attrs() | ||
180 | * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided | ||
181 | * as a convenience for CPUs which don't use memory transaction attributes. | ||
182 | */ | ||
183 | -void tlb_set_page(CPUState *cpu, target_ulong vaddr, | ||
184 | +void tlb_set_page(CPUState *cpu, vaddr addr, | ||
185 | hwaddr paddr, int prot, | ||
186 | - int mmu_idx, target_ulong size); | ||
187 | + int mmu_idx, vaddr size); | ||
188 | #else | ||
189 | static inline void tlb_init(CPUState *cpu) | ||
190 | { | ||
191 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_init(CPUState *cpu) | ||
192 | static inline void tlb_destroy(CPUState *cpu) | ||
193 | { | ||
194 | } | ||
195 | -static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) | ||
196 | +static inline void tlb_flush_page(CPUState *cpu, vaddr addr) | ||
197 | { | ||
198 | } | ||
199 | -static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) | ||
200 | +static inline void tlb_flush_page_all_cpus(CPUState *src, vaddr addr) | ||
201 | { | ||
202 | } | ||
203 | -static inline void tlb_flush_page_all_cpus_synced(CPUState *src, | ||
204 | - target_ulong addr) | ||
205 | +static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr) | ||
206 | { | ||
207 | } | ||
208 | static inline void tlb_flush(CPUState *cpu) | ||
209 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu) | ||
210 | { | ||
211 | } | ||
212 | static inline void tlb_flush_page_by_mmuidx(CPUState *cpu, | ||
213 | - target_ulong addr, uint16_t idxmap) | ||
214 | + vaddr addr, uint16_t idxmap) | ||
215 | { | ||
216 | } | ||
217 | |||
218 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) | ||
219 | { | ||
220 | } | ||
221 | static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, | ||
222 | - target_ulong addr, | ||
223 | + vaddr addr, | ||
224 | uint16_t idxmap) | ||
225 | { | ||
226 | } | ||
227 | static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
228 | - target_ulong addr, | ||
229 | + vaddr addr, | ||
230 | uint16_t idxmap) | ||
231 | { | ||
232 | } | ||
233 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
234 | { | ||
235 | } | ||
236 | static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, | ||
237 | - target_ulong addr, | ||
238 | + vaddr addr, | ||
239 | uint16_t idxmap, | ||
240 | unsigned bits) | ||
241 | { | ||
242 | } | ||
243 | static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, | ||
244 | - target_ulong addr, | ||
245 | + vaddr addr, | ||
246 | uint16_t idxmap, | ||
247 | unsigned bits) | ||
248 | { | ||
249 | } | ||
250 | static inline void | ||
251 | -tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, | ||
252 | +tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr, | ||
253 | uint16_t idxmap, unsigned bits) | ||
254 | { | ||
255 | } | ||
256 | -static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
257 | - target_ulong len, uint16_t idxmap, | ||
258 | +static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, | ||
259 | + vaddr len, uint16_t idxmap, | ||
260 | unsigned bits) | ||
261 | { | ||
262 | } | ||
263 | static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, | ||
264 | - target_ulong addr, | ||
265 | - target_ulong len, | ||
266 | + vaddr addr, | ||
267 | + vaddr len, | ||
268 | uint16_t idxmap, | ||
269 | unsigned bits) | ||
270 | { | ||
271 | } | ||
272 | static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, | ||
273 | - target_ulong addr, | ||
274 | - target_long len, | ||
275 | + vaddr addr, | ||
276 | + vaddr len, | ||
277 | uint16_t idxmap, | ||
278 | unsigned bits) | ||
279 | { | ||
280 | @@ -XXX,XX +XXX,XX @@ static inline void mmap_lock(void) {} | ||
281 | static inline void mmap_unlock(void) {} | ||
282 | |||
283 | void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); | ||
284 | -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr); | ||
285 | +void tlb_set_dirty(CPUState *cpu, vaddr addr); | ||
286 | |||
287 | MemoryRegionSection * | ||
288 | address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, | ||
289 | diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h | ||
290 | index XXXXXXX..XXXXXXX 100644 | ||
291 | --- a/include/qemu/plugin-memory.h | ||
292 | +++ b/include/qemu/plugin-memory.h | ||
293 | @@ -XXX,XX +XXX,XX @@ struct qemu_plugin_hwaddr { | ||
294 | * It would only fail if not called from an instrumented memory access | ||
295 | * which would be an abuse of the API. | ||
296 | */ | ||
297 | -bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, | ||
298 | +bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, | ||
299 | bool is_store, struct qemu_plugin_hwaddr *data); | ||
300 | |||
301 | #endif /* PLUGIN_MEMORY_H */ | ||
302 | diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c | ||
303 | index XXXXXXX..XXXXXXX 100644 | ||
304 | --- a/accel/stubs/tcg-stub.c | ||
305 | +++ b/accel/stubs/tcg-stub.c | ||
306 | @@ -XXX,XX +XXX,XX @@ void tb_flush(CPUState *cpu) | ||
307 | { | ||
308 | } | ||
309 | |||
310 | -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
311 | +void tlb_set_dirty(CPUState *cpu, vaddr vaddr) | ||
312 | { | ||
313 | } | ||
314 | |||
315 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
316 | index XXXXXXX..XXXXXXX 100644 | ||
317 | --- a/accel/tcg/cputlb.c | ||
318 | +++ b/accel/tcg/cputlb.c | ||
319 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu) | ||
320 | } | ||
321 | |||
322 | static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, | ||
323 | - target_ulong page, target_ulong mask) | ||
324 | + vaddr page, vaddr mask) | ||
325 | { | ||
326 | page &= mask; | ||
327 | mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; | ||
328 | @@ -XXX,XX +XXX,XX @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, | ||
329 | page == (tlb_entry->addr_code & mask)); | ||
330 | } | ||
331 | |||
332 | -static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, | ||
333 | - target_ulong page) | ||
334 | +static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page) | ||
335 | { | ||
336 | return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); | ||
337 | } | ||
338 | @@ -XXX,XX +XXX,XX @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) | ||
339 | |||
340 | /* Called with tlb_c.lock held */ | ||
341 | static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, | ||
342 | - target_ulong page, | ||
343 | - target_ulong mask) | ||
344 | + vaddr page, | ||
345 | + vaddr mask) | ||
346 | { | ||
347 | if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { | ||
348 | memset(tlb_entry, -1, sizeof(*tlb_entry)); | ||
349 | @@ -XXX,XX +XXX,XX @@ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, | ||
350 | return false; | ||
351 | } | ||
352 | |||
353 | -static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, | ||
354 | - target_ulong page) | ||
355 | +static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page) | ||
356 | { | ||
357 | return tlb_flush_entry_mask_locked(tlb_entry, page, -1); | ||
358 | } | ||
359 | |||
360 | /* Called with tlb_c.lock held */ | ||
361 | static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, | ||
362 | - target_ulong page, | ||
363 | - target_ulong mask) | ||
364 | + vaddr page, | ||
365 | + vaddr mask) | ||
366 | { | ||
367 | CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; | ||
368 | int k; | ||
369 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, | ||
370 | } | ||
371 | |||
372 | static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, | ||
373 | - target_ulong page) | ||
374 | + vaddr page) | ||
375 | { | ||
376 | tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); | ||
377 | } | ||
378 | |||
379 | -static void tlb_flush_page_locked(CPUArchState *env, int midx, | ||
380 | - target_ulong page) | ||
381 | +static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page) | ||
382 | { | ||
383 | - target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; | ||
384 | - target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; | ||
385 | + vaddr lp_addr = env_tlb(env)->d[midx].large_page_addr; | ||
386 | + vaddr lp_mask = env_tlb(env)->d[midx].large_page_mask; | ||
387 | |||
388 | /* Check if we need to flush due to large pages. */ | ||
389 | if ((page & lp_mask) == lp_addr) { | ||
390 | - tlb_debug("forcing full flush midx %d (" | ||
391 | - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", | ||
392 | + tlb_debug("forcing full flush midx %d (%" | ||
393 | + VADDR_PRIx "/%" VADDR_PRIx ")\n", | ||
394 | midx, lp_addr, lp_mask); | ||
395 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
396 | } else { | ||
397 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, | ||
398 | * at @addr from the tlbs indicated by @idxmap from @cpu. | ||
399 | */ | ||
400 | static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, | ||
401 | - target_ulong addr, | ||
402 | + vaddr addr, | ||
403 | uint16_t idxmap) | ||
404 | { | ||
405 | CPUArchState *env = cpu->env_ptr; | ||
406 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, | ||
407 | |||
408 | assert_cpu_is_self(cpu); | ||
409 | |||
410 | - tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); | ||
411 | + tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); | ||
412 | |||
413 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
414 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
415 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, | ||
416 | static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, | ||
417 | run_on_cpu_data data) | ||
418 | { | ||
419 | - target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; | ||
420 | - target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; | ||
421 | + vaddr addr_and_idxmap = data.target_ptr; | ||
422 | + vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK; | ||
423 | uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; | ||
424 | |||
425 | tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); | ||
426 | } | ||
427 | |||
428 | typedef struct { | ||
429 | - target_ulong addr; | ||
430 | + vaddr addr; | ||
431 | uint16_t idxmap; | ||
432 | } TLBFlushPageByMMUIdxData; | ||
433 | |||
434 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, | ||
435 | g_free(d); | ||
436 | } | ||
437 | |||
438 | -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) | ||
439 | +void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap) | ||
440 | { | ||
441 | - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
442 | + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
443 | |||
444 | /* This should already be page aligned */ | ||
445 | addr &= TARGET_PAGE_MASK; | ||
446 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) | ||
447 | } | ||
448 | } | ||
449 | |||
450 | -void tlb_flush_page(CPUState *cpu, target_ulong addr) | ||
451 | +void tlb_flush_page(CPUState *cpu, vaddr addr) | ||
452 | { | ||
453 | tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); | ||
454 | } | ||
455 | |||
456 | -void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, | ||
457 | +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr, | ||
458 | uint16_t idxmap) | ||
459 | { | ||
460 | - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
461 | + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
462 | |||
463 | /* This should already be page aligned */ | ||
464 | addr &= TARGET_PAGE_MASK; | ||
465 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, | ||
466 | tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); | ||
467 | } | ||
468 | |||
469 | -void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) | ||
470 | +void tlb_flush_page_all_cpus(CPUState *src, vaddr addr) | ||
471 | { | ||
472 | tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); | ||
473 | } | ||
474 | |||
475 | void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
476 | - target_ulong addr, | ||
477 | + vaddr addr, | ||
478 | uint16_t idxmap) | ||
479 | { | ||
480 | - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
481 | + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
482 | |||
483 | /* This should already be page aligned */ | ||
484 | addr &= TARGET_PAGE_MASK; | ||
485 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
486 | } | ||
487 | } | ||
488 | |||
489 | -void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) | ||
490 | +void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr) | ||
491 | { | ||
492 | tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); | ||
493 | } | ||
494 | |||
495 | static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
496 | - target_ulong addr, target_ulong len, | ||
497 | + vaddr addr, vaddr len, | ||
498 | unsigned bits) | ||
499 | { | ||
500 | CPUTLBDesc *d = &env_tlb(env)->d[midx]; | ||
501 | CPUTLBDescFast *f = &env_tlb(env)->f[midx]; | ||
502 | - target_ulong mask = MAKE_64BIT_MASK(0, bits); | ||
503 | + vaddr mask = MAKE_64BIT_MASK(0, bits); | ||
504 | |||
505 | /* | ||
506 | * If @bits is smaller than the tlb size, there may be multiple entries | ||
507 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
508 | */ | ||
509 | if (mask < f->mask || len > f->mask) { | ||
510 | tlb_debug("forcing full flush midx %d (" | ||
511 | - TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", | ||
512 | + "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n", | ||
513 | midx, addr, mask, len); | ||
514 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
515 | return; | ||
516 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
517 | */ | ||
518 | if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { | ||
519 | tlb_debug("forcing full flush midx %d (" | ||
520 | - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", | ||
521 | + "%" VADDR_PRIx "/%" VADDR_PRIx ")\n", | ||
522 | midx, d->large_page_addr, d->large_page_mask); | ||
523 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
524 | return; | ||
525 | } | ||
526 | |||
527 | - for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { | ||
528 | - target_ulong page = addr + i; | ||
529 | + for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) { | ||
530 | + vaddr page = addr + i; | ||
531 | CPUTLBEntry *entry = tlb_entry(env, midx, page); | ||
532 | |||
533 | if (tlb_flush_entry_mask_locked(entry, page, mask)) { | ||
534 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
535 | } | ||
536 | |||
537 | typedef struct { | ||
538 | - target_ulong addr; | ||
539 | - target_ulong len; | ||
540 | + vaddr addr; | ||
541 | + vaddr len; | ||
542 | uint16_t idxmap; | ||
543 | uint16_t bits; | ||
544 | } TLBFlushRangeData; | ||
545 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, | ||
546 | |||
547 | assert_cpu_is_self(cpu); | ||
548 | |||
549 | - tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", | ||
550 | + tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n", | ||
551 | d.addr, d.bits, d.len, d.idxmap); | ||
552 | |||
553 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
554 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, | ||
555 | * overlap the flushed pages, which includes the previous. | ||
556 | */ | ||
557 | d.addr -= TARGET_PAGE_SIZE; | ||
558 | - for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) { | ||
559 | + for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) { | ||
560 | tb_jmp_cache_clear_page(cpu, d.addr); | ||
561 | d.addr += TARGET_PAGE_SIZE; | ||
562 | } | ||
563 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu, | ||
564 | g_free(d); | ||
565 | } | ||
566 | |||
567 | -void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
568 | - target_ulong len, uint16_t idxmap, | ||
569 | +void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, | ||
570 | + vaddr len, uint16_t idxmap, | ||
571 | unsigned bits) | ||
572 | { | ||
573 | TLBFlushRangeData d; | ||
574 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
575 | } | ||
576 | } | ||
577 | |||
578 | -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, | ||
579 | +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr, | ||
580 | uint16_t idxmap, unsigned bits) | ||
581 | { | ||
582 | tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); | ||
583 | } | ||
584 | |||
585 | void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, | ||
586 | - target_ulong addr, target_ulong len, | ||
587 | + vaddr addr, vaddr len, | ||
588 | uint16_t idxmap, unsigned bits) | ||
589 | { | ||
590 | TLBFlushRangeData d; | ||
591 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, | ||
592 | } | ||
593 | |||
594 | void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, | ||
595 | - target_ulong addr, | ||
596 | - uint16_t idxmap, unsigned bits) | ||
597 | + vaddr addr, uint16_t idxmap, | ||
598 | + unsigned bits) | ||
599 | { | ||
600 | tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, | ||
601 | idxmap, bits); | ||
602 | } | ||
603 | |||
604 | void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
605 | - target_ulong addr, | ||
606 | - target_ulong len, | ||
607 | + vaddr addr, | ||
608 | + vaddr len, | ||
609 | uint16_t idxmap, | ||
610 | unsigned bits) | ||
611 | { | ||
612 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
613 | } | ||
614 | |||
615 | void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
616 | - target_ulong addr, | ||
617 | + vaddr addr, | ||
618 | uint16_t idxmap, | ||
619 | unsigned bits) | ||
620 | { | ||
621 | @@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) | ||
622 | |||
623 | /* Called with tlb_c.lock held */ | ||
624 | static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, | ||
625 | - target_ulong vaddr) | ||
626 | + vaddr addr) | ||
627 | { | ||
628 | - if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { | ||
629 | - tlb_entry->addr_write = vaddr; | ||
630 | + if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) { | ||
631 | + tlb_entry->addr_write = addr; | ||
632 | } | ||
633 | } | ||
634 | |||
635 | /* update the TLB corresponding to virtual page vaddr | ||
636 | so that it is no longer dirty */ | ||
637 | -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
638 | +void tlb_set_dirty(CPUState *cpu, vaddr addr) | ||
639 | { | ||
640 | CPUArchState *env = cpu->env_ptr; | ||
641 | int mmu_idx; | ||
642 | |||
643 | assert_cpu_is_self(cpu); | ||
644 | |||
645 | - vaddr &= TARGET_PAGE_MASK; | ||
646 | + addr &= TARGET_PAGE_MASK; | ||
647 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
648 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
649 | - tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); | ||
650 | + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, addr), addr); | ||
651 | } | ||
652 | |||
653 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
654 | int k; | ||
655 | for (k = 0; k < CPU_VTLB_SIZE; k++) { | ||
656 | - tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); | ||
657 | + tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], addr); | ||
31 | } | 658 | } |
32 | } | 659 | } |
33 | + if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { | 660 | qemu_spin_unlock(&env_tlb(env)->c.lock); |
34 | + tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); | 661 | @@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) |
35 | + return; | 662 | /* Our TLB does not support large pages, so remember the area covered by |
36 | + } | 663 | large pages and trigger a full TLB flush if these are invalidated. */ |
664 | static void tlb_add_large_page(CPUArchState *env, int mmu_idx, | ||
665 | - target_ulong vaddr, target_ulong size) | ||
666 | + vaddr addr, uint64_t size) | ||
667 | { | ||
668 | - target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; | ||
669 | - target_ulong lp_mask = ~(size - 1); | ||
670 | + vaddr lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; | ||
671 | + vaddr lp_mask = ~(size - 1); | ||
672 | |||
673 | - if (lp_addr == (target_ulong)-1) { | ||
674 | + if (lp_addr == (vaddr)-1) { | ||
675 | /* No previous large page. */ | ||
676 | - lp_addr = vaddr; | ||
677 | + lp_addr = addr; | ||
678 | } else { | ||
679 | /* Extend the existing region to include the new page. | ||
680 | This is a compromise between unnecessary flushes and | ||
681 | the cost of maintaining a full variable size TLB. */ | ||
682 | lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; | ||
683 | - while (((lp_addr ^ vaddr) & lp_mask) != 0) { | ||
684 | + while (((lp_addr ^ addr) & lp_mask) != 0) { | ||
685 | lp_mask <<= 1; | ||
686 | } | ||
687 | } | ||
688 | @@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, | ||
689 | * critical section. | ||
690 | */ | ||
691 | void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
692 | - target_ulong vaddr, CPUTLBEntryFull *full) | ||
693 | + vaddr addr, CPUTLBEntryFull *full) | ||
694 | { | ||
695 | CPUArchState *env = cpu->env_ptr; | ||
696 | CPUTLB *tlb = env_tlb(env); | ||
697 | CPUTLBDesc *desc = &tlb->d[mmu_idx]; | ||
698 | MemoryRegionSection *section; | ||
699 | unsigned int index; | ||
700 | - target_ulong address; | ||
701 | - target_ulong write_address; | ||
702 | + vaddr address; | ||
703 | + vaddr write_address; | ||
704 | uintptr_t addend; | ||
705 | CPUTLBEntry *te, tn; | ||
706 | hwaddr iotlb, xlat, sz, paddr_page; | ||
707 | - target_ulong vaddr_page; | ||
708 | + vaddr addr_page; | ||
709 | int asidx, wp_flags, prot; | ||
710 | bool is_ram, is_romd; | ||
711 | |||
712 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
713 | sz = TARGET_PAGE_SIZE; | ||
714 | } else { | ||
715 | sz = (hwaddr)1 << full->lg_page_size; | ||
716 | - tlb_add_large_page(env, mmu_idx, vaddr, sz); | ||
717 | + tlb_add_large_page(env, mmu_idx, addr, sz); | ||
718 | } | ||
719 | - vaddr_page = vaddr & TARGET_PAGE_MASK; | ||
720 | + addr_page = addr & TARGET_PAGE_MASK; | ||
721 | paddr_page = full->phys_addr & TARGET_PAGE_MASK; | ||
722 | |||
723 | prot = full->prot; | ||
724 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
725 | &xlat, &sz, full->attrs, &prot); | ||
726 | assert(sz >= TARGET_PAGE_SIZE); | ||
727 | |||
728 | - tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" HWADDR_FMT_plx | ||
729 | + tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx | ||
730 | " prot=%x idx=%d\n", | ||
731 | - vaddr, full->phys_addr, prot, mmu_idx); | ||
732 | + addr, full->phys_addr, prot, mmu_idx); | ||
733 | |||
734 | - address = vaddr_page; | ||
735 | + address = addr_page; | ||
736 | if (full->lg_page_size < TARGET_PAGE_BITS) { | ||
737 | /* Repeat the MMU check and TLB fill on every access. */ | ||
738 | address |= TLB_INVALID_MASK; | ||
739 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
740 | } | ||
741 | } | ||
742 | |||
743 | - wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, | ||
744 | + wp_flags = cpu_watchpoint_address_matches(cpu, addr_page, | ||
745 | TARGET_PAGE_SIZE); | ||
746 | |||
747 | - index = tlb_index(env, mmu_idx, vaddr_page); | ||
748 | - te = tlb_entry(env, mmu_idx, vaddr_page); | ||
749 | + index = tlb_index(env, mmu_idx, addr_page); | ||
750 | + te = tlb_entry(env, mmu_idx, addr_page); | ||
37 | 751 | ||
38 | /* | 752 | /* |
39 | * Otherwise we must load the value from the constant pool. | 753 | * Hold the TLB lock for the rest of the function. We could acquire/release |
40 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 754 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, |
41 | TCGReg dst, TCGReg src) | 755 | tlb->c.dirty |= 1 << mmu_idx; |
42 | { | 756 | |
43 | tcg_debug_assert(dst >= TCG_REG_V0); | 757 | /* Make sure there's no cached translation for the new page. */ |
44 | - tcg_debug_assert(src >= TCG_REG_V0); | 758 | - tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); |
45 | + | 759 | + tlb_flush_vtlb_page_locked(env, mmu_idx, addr_page); |
46 | + /* Splat from integer reg allowed via constraints for v3.00. */ | ||
47 | + if (src < TCG_REG_V0) { | ||
48 | + tcg_debug_assert(have_isa_3_00); | ||
49 | + switch (vece) { | ||
50 | + case MO_64: | ||
51 | + tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); | ||
52 | + return true; | ||
53 | + case MO_32: | ||
54 | + tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); | ||
55 | + return true; | ||
56 | + default: | ||
57 | + /* Fail, so that we fall back on either dupm or mov+dup. */ | ||
58 | + return false; | ||
59 | + } | ||
60 | + } | ||
61 | 760 | ||
62 | /* | 761 | /* |
63 | * Recall we use (or emulate) VSX integer loads, so the integer is | 762 | * Only evict the old entry to the victim tlb if it's for a |
64 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 763 | * different page; otherwise just overwrite the stale data. |
65 | static const TCGTargetOpDef sub2 | 764 | */ |
66 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | 765 | - if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { |
67 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | 766 | + if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) { |
68 | + static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } }; | 767 | unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; |
69 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | 768 | CPUTLBEntry *tv = &desc->vtable[vidx]; |
70 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | 769 | |
71 | static const TCGTargetOpDef v_v_v_v | 770 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, |
72 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 771 | * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). |
73 | return &v_v_v; | 772 | */ |
74 | case INDEX_op_not_vec: | 773 | desc->fulltlb[index] = *full; |
75 | case INDEX_op_neg_vec: | 774 | - desc->fulltlb[index].xlat_section = iotlb - vaddr_page; |
76 | - case INDEX_op_dup_vec: | 775 | + desc->fulltlb[index].xlat_section = iotlb - addr_page; |
77 | return &v_v; | 776 | desc->fulltlb[index].phys_addr = paddr_page; |
78 | + case INDEX_op_dup_vec: | 777 | |
79 | + return have_isa_3_00 ? &v_vr : &v_v; | 778 | /* Now calculate the new entry */ |
80 | case INDEX_op_ld_vec: | 779 | - tn.addend = addend - vaddr_page; |
81 | case INDEX_op_st_vec: | 780 | + tn.addend = addend - addr_page; |
82 | case INDEX_op_dupm_vec: | 781 | if (prot & PAGE_READ) { |
782 | tn.addr_read = address; | ||
783 | if (wp_flags & BP_MEM_READ) { | ||
784 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
785 | qemu_spin_unlock(&tlb->c.lock); | ||
786 | } | ||
787 | |||
788 | -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
789 | +void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, | ||
790 | hwaddr paddr, MemTxAttrs attrs, int prot, | ||
791 | - int mmu_idx, target_ulong size) | ||
792 | + int mmu_idx, uint64_t size) | ||
793 | { | ||
794 | CPUTLBEntryFull full = { | ||
795 | .phys_addr = paddr, | ||
796 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
797 | }; | ||
798 | |||
799 | assert(is_power_of_2(size)); | ||
800 | - tlb_set_page_full(cpu, mmu_idx, vaddr, &full); | ||
801 | + tlb_set_page_full(cpu, mmu_idx, addr, &full); | ||
802 | } | ||
803 | |||
804 | -void tlb_set_page(CPUState *cpu, target_ulong vaddr, | ||
805 | +void tlb_set_page(CPUState *cpu, vaddr addr, | ||
806 | hwaddr paddr, int prot, | ||
807 | - int mmu_idx, target_ulong size) | ||
808 | + int mmu_idx, uint64_t size) | ||
809 | { | ||
810 | - tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, | ||
811 | + tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED, | ||
812 | prot, mmu_idx, size); | ||
813 | } | ||
814 | |||
815 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, | ||
816 | * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must | ||
817 | * be discarded and looked up again (e.g. via tlb_entry()). | ||
818 | */ | ||
819 | -static void tlb_fill(CPUState *cpu, target_ulong addr, int size, | ||
820 | +static void tlb_fill(CPUState *cpu, vaddr addr, int size, | ||
821 | MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) | ||
822 | { | ||
823 | bool ok; | ||
824 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, | ||
825 | } | ||
826 | |||
827 | static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, | ||
828 | - int mmu_idx, target_ulong addr, uintptr_t retaddr, | ||
829 | + int mmu_idx, vaddr addr, uintptr_t retaddr, | ||
830 | MMUAccessType access_type, MemOp op) | ||
831 | { | ||
832 | CPUState *cpu = env_cpu(env); | ||
833 | @@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, | ||
834 | } | ||
835 | |||
836 | static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
837 | - int mmu_idx, uint64_t val, target_ulong addr, | ||
838 | + int mmu_idx, uint64_t val, vaddr addr, | ||
839 | uintptr_t retaddr, MemOp op) | ||
840 | { | ||
841 | CPUState *cpu = env_cpu(env); | ||
842 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
843 | /* Return true if ADDR is present in the victim tlb, and has been copied | ||
844 | back to the main tlb. */ | ||
845 | static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
846 | - MMUAccessType access_type, target_ulong page) | ||
847 | + MMUAccessType access_type, vaddr page) | ||
848 | { | ||
849 | size_t vidx; | ||
850 | |||
851 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
852 | * from the same thread (which a mem callback will be) this is safe. | ||
853 | */ | ||
854 | |||
855 | -bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, | ||
856 | +bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, | ||
857 | bool is_store, struct qemu_plugin_hwaddr *data) | ||
858 | { | ||
859 | CPUArchState *env = cpu->env_ptr; | ||
860 | CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
861 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
862 | - target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; | ||
863 | + vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; | ||
864 | |||
865 | if (likely(tlb_hit(tlb_addr, addr))) { | ||
866 | /* We must have an iotlb entry for MMIO */ | ||
867 | diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c | ||
868 | index XXXXXXX..XXXXXXX 100644 | ||
869 | --- a/accel/tcg/tb-maint.c | ||
870 | +++ b/accel/tcg/tb-maint.c | ||
871 | @@ -XXX,XX +XXX,XX @@ static void tb_remove_all(void) | ||
872 | /* Call with mmap_lock held. */ | ||
873 | static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2) | ||
874 | { | ||
875 | - target_ulong addr; | ||
876 | + vaddr addr; | ||
877 | int flags; | ||
878 | |||
879 | assert_memory_lock(); | ||
83 | -- | 880 | -- |
84 | 2.17.1 | 881 | 2.34.1 |
85 | |||
86 | diff view generated by jsdifflib |
1 | These new instructions are a mix of those like LXSD that are | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | only conditional only on MSR.VEC and those like LXV that are | ||
3 | conditional on MSR.VEC for TX=1. Thus, in the end, we can | ||
4 | consider all of these as Altivec instructions. | ||
5 | 2 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | Signed-off-by: Anton Johansson <anjo@rev.ng> |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230621135633.1649-3-anjo@rev.ng> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 7 | --- |
9 | tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++-------- | 8 | accel/tcg/internal.h | 6 +++--- |
10 | 1 file changed, 38 insertions(+), 9 deletions(-) | 9 | accel/tcg/translate-all.c | 10 +++++----- |
10 | 2 files changed, 8 insertions(+), 8 deletions(-) | ||
11 | 11 | ||
12 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 12 | diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/ppc/tcg-target.inc.c | 14 | --- a/accel/tcg/internal.h |
15 | +++ b/tcg/ppc/tcg-target.inc.c | 15 | +++ b/accel/tcg/internal.h |
16 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 16 | @@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr, |
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 17 | G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); |
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 18 | #endif /* CONFIG_SOFTMMU */ |
19 | #define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | 19 | |
20 | +#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ | 20 | -TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, |
21 | +#define LXSD (OPCD(57) | 2) /* v3.00 */ | 21 | - target_ulong cs_base, uint32_t flags, |
22 | +#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ | 22 | +TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc, |
23 | 23 | + uint64_t cs_base, uint32_t flags, | |
24 | #define STVX XO31(231) | 24 | int cflags); |
25 | #define STVEWX XO31(199) | 25 | void page_init(void); |
26 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 26 | void tb_htable_init(void); |
27 | #define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | 27 | @@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, |
28 | +#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ | 28 | uintptr_t host_pc); |
29 | +#define STXSD (OPCD(61) | 2) /* v3.00 */ | 29 | |
30 | 30 | /* Return the current PC from CPU, which may be cached in TB. */ | |
31 | #define VADDSBS VX4(768) | 31 | -static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) |
32 | #define VADDUBS VX4(512) | 32 | +static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb) |
33 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
34 | TCGReg base, tcg_target_long offset) | ||
35 | { | 33 | { |
36 | tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; | 34 | if (tb_cflags(tb) & CF_PCREL) { |
37 | - bool is_store = false; | 35 | return cpu->cc->get_pc(cpu); |
38 | + bool is_int_store = false; | 36 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c |
39 | TCGReg rs = TCG_REG_TMP1; | 37 | index XXXXXXX..XXXXXXX 100644 |
40 | 38 | --- a/accel/tcg/translate-all.c | |
41 | switch (opi) { | 39 | +++ b/accel/tcg/translate-all.c |
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 40 | @@ -XXX,XX +XXX,XX @@ void page_init(void) |
43 | break; | 41 | * Return the size of the generated code, or negative on error. |
42 | */ | ||
43 | static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, | ||
44 | - target_ulong pc, void *host_pc, | ||
45 | + vaddr pc, void *host_pc, | ||
46 | int *max_insns, int64_t *ti) | ||
47 | { | ||
48 | int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); | ||
49 | @@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, | ||
50 | |||
51 | /* Called with mmap_lock held for user mode emulation. */ | ||
52 | TranslationBlock *tb_gen_code(CPUState *cpu, | ||
53 | - target_ulong pc, target_ulong cs_base, | ||
54 | + vaddr pc, uint64_t cs_base, | ||
55 | uint32_t flags, int cflags) | ||
56 | { | ||
57 | CPUArchState *env = cpu->env_ptr; | ||
58 | @@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) | ||
59 | cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; | ||
60 | |||
61 | if (qemu_loglevel_mask(CPU_LOG_EXEC)) { | ||
62 | - target_ulong pc = log_pc(cpu, tb); | ||
63 | + vaddr pc = log_pc(cpu, tb); | ||
64 | if (qemu_log_in_addr_range(pc)) { | ||
65 | - qemu_log("cpu_io_recompile: rewound execution of TB to " | ||
66 | - TARGET_FMT_lx "\n", pc); | ||
67 | + qemu_log("cpu_io_recompile: rewound execution of TB to %" | ||
68 | + VADDR_PRIx "\n", pc); | ||
44 | } | 69 | } |
45 | break; | ||
46 | + case LXSD: | ||
47 | + case STXSD: | ||
48 | + align = 3; | ||
49 | + break; | ||
50 | + case LXV: | ||
51 | + case STXV: | ||
52 | + align = 15; | ||
53 | + break; | ||
54 | case STD: | ||
55 | align = 3; | ||
56 | /* FALLTHRU */ | ||
57 | case STB: case STH: case STW: | ||
58 | - is_store = true; | ||
59 | + is_int_store = true; | ||
60 | break; | ||
61 | } | 70 | } |
62 | 71 | ||
63 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
64 | if (rs == base) { | ||
65 | rs = TCG_REG_R0; | ||
66 | } | ||
67 | - tcg_debug_assert(!is_store || rs != rt); | ||
68 | + tcg_debug_assert(!is_int_store || rs != rt); | ||
69 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
70 | tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
71 | return; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
73 | case TCG_TYPE_V64: | ||
74 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
75 | if (have_vsx) { | ||
76 | - tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
77 | + tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, | ||
78 | + ret, base, offset); | ||
79 | break; | ||
80 | } | ||
81 | tcg_debug_assert((offset & 7) == 0); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
83 | case TCG_TYPE_V128: | ||
84 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
85 | tcg_debug_assert((offset & 15) == 0); | ||
86 | - tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
87 | + tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, | ||
88 | + LVX, ret, base, offset); | ||
89 | break; | ||
90 | default: | ||
91 | g_assert_not_reached(); | ||
92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
93 | case TCG_TYPE_V64: | ||
94 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
95 | if (have_vsx) { | ||
96 | - tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | ||
97 | + tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, | ||
98 | + STXSDX, arg, base, offset); | ||
99 | break; | ||
100 | } | ||
101 | tcg_debug_assert((offset & 7) == 0); | ||
102 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
103 | break; | ||
104 | case TCG_TYPE_V128: | ||
105 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
106 | - tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
107 | + tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, | ||
108 | + STVX, arg, base, offset); | ||
109 | break; | ||
110 | default: | ||
111 | g_assert_not_reached(); | ||
112 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
113 | tcg_debug_assert(out >= TCG_REG_V0); | ||
114 | switch (vece) { | ||
115 | case MO_8: | ||
116 | - tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
117 | + if (have_isa_3_00) { | ||
118 | + tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); | ||
119 | + } else { | ||
120 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
121 | + } | ||
122 | elt = extract32(offset, 0, 4); | ||
123 | #ifndef HOST_WORDS_BIGENDIAN | ||
124 | elt ^= 15; | ||
125 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
126 | break; | ||
127 | case MO_16: | ||
128 | tcg_debug_assert((offset & 1) == 0); | ||
129 | - tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
130 | + if (have_isa_3_00) { | ||
131 | + tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); | ||
132 | + } else { | ||
133 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
134 | + } | ||
135 | elt = extract32(offset, 1, 3); | ||
136 | #ifndef HOST_WORDS_BIGENDIAN | ||
137 | elt ^= 7; | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
139 | tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
140 | break; | ||
141 | case MO_32: | ||
142 | + if (have_isa_3_00) { | ||
143 | + tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); | ||
144 | + break; | ||
145 | + } | ||
146 | tcg_debug_assert((offset & 3) == 0); | ||
147 | tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
148 | elt = extract32(offset, 2, 2); | ||
149 | -- | 72 | -- |
150 | 2.17.1 | 73 | 2.34.1 |
151 | |||
152 | diff view generated by jsdifflib |
1 | Altivec supports 32 128-bit vector registers, whose names are | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | by convention v0 through v31. | ||
3 | 2 | ||
3 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230621135633.1649-4-anjo@rev.ng> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 7 | --- |
7 | tcg/ppc/tcg-target.h | 11 ++++- | 8 | target/alpha/cpu.h | 4 ++-- |
8 | tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++--------------- | 9 | target/arm/cpu.h | 4 ++-- |
9 | 2 files changed, 65 insertions(+), 34 deletions(-) | 10 | target/avr/cpu.h | 4 ++-- |
11 | target/cris/cpu.h | 4 ++-- | ||
12 | target/hexagon/cpu.h | 4 ++-- | ||
13 | target/hppa/cpu.h | 5 ++--- | ||
14 | target/i386/cpu.h | 4 ++-- | ||
15 | target/loongarch/cpu.h | 6 ++---- | ||
16 | target/m68k/cpu.h | 4 ++-- | ||
17 | target/microblaze/cpu.h | 4 ++-- | ||
18 | target/mips/cpu.h | 4 ++-- | ||
19 | target/nios2/cpu.h | 4 ++-- | ||
20 | target/openrisc/cpu.h | 5 ++--- | ||
21 | target/ppc/cpu.h | 8 ++++---- | ||
22 | target/riscv/cpu.h | 4 ++-- | ||
23 | target/rx/cpu.h | 4 ++-- | ||
24 | target/s390x/cpu.h | 4 ++-- | ||
25 | target/sh4/cpu.h | 4 ++-- | ||
26 | target/sparc/cpu.h | 4 ++-- | ||
27 | target/tricore/cpu.h | 4 ++-- | ||
28 | target/xtensa/cpu.h | 4 ++-- | ||
29 | accel/tcg/cpu-exec.c | 9 ++++++--- | ||
30 | accel/tcg/translate-all.c | 3 ++- | ||
31 | target/arm/helper.c | 4 ++-- | ||
32 | target/ppc/helper_regs.c | 4 ++-- | ||
33 | target/riscv/cpu_helper.c | 4 ++-- | ||
34 | 26 files changed, 58 insertions(+), 58 deletions(-) | ||
10 | 35 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 36 | diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h |
12 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 38 | --- a/target/alpha/cpu.h |
14 | +++ b/tcg/ppc/tcg-target.h | 39 | +++ b/target/alpha/cpu.h |
15 | @@ -XXX,XX +XXX,XX @@ | 40 | @@ -XXX,XX +XXX,XX @@ void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, |
16 | # define TCG_TARGET_REG_BITS 32 | 41 | MemTxResult response, uintptr_t retaddr); |
17 | #endif | 42 | #endif |
18 | 43 | ||
19 | -#define TCG_TARGET_NB_REGS 32 | 44 | -static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, target_ulong *pc, |
20 | +#define TCG_TARGET_NB_REGS 64 | 45 | - target_ulong *cs_base, uint32_t *pflags) |
21 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | 46 | +static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, vaddr *pc, |
22 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | 47 | + uint64_t *cs_base, uint32_t *pflags) |
23 | 48 | { | |
24 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 49 | *pc = env->pc; |
25 | TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, | 50 | *cs_base = 0; |
26 | TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, | 51 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
27 | 52 | index XXXXXXX..XXXXXXX 100644 | |
28 | + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | 53 | --- a/target/arm/cpu.h |
29 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | 54 | +++ b/target/arm/cpu.h |
30 | + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, | 55 | @@ -XXX,XX +XXX,XX @@ static inline bool arm_cpu_bswap_data(CPUARMState *env) |
31 | + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, | 56 | } |
32 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | ||
33 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | ||
34 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | ||
35 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | ||
36 | + | ||
37 | TCG_REG_CALL_STACK = TCG_REG_R1, | ||
38 | TCG_AREG0 = TCG_REG_R27 | ||
39 | } TCGReg; | ||
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/ppc/tcg-target.inc.c | ||
43 | +++ b/tcg/ppc/tcg-target.inc.c | ||
44 | @@ -XXX,XX +XXX,XX @@ | ||
45 | # define TCG_REG_TMP1 TCG_REG_R12 | ||
46 | #endif | 57 | #endif |
47 | 58 | ||
48 | +#define TCG_VEC_TMP1 TCG_REG_V0 | 59 | -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, |
49 | +#define TCG_VEC_TMP2 TCG_REG_V1 | 60 | - target_ulong *cs_base, uint32_t *flags); |
50 | + | 61 | +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, |
51 | #define TCG_REG_TB TCG_REG_R31 | 62 | + uint64_t *cs_base, uint32_t *flags); |
52 | #define USE_REG_TB (TCG_TARGET_REG_BITS == 64) | 63 | |
53 | 64 | enum { | |
54 | @@ -XXX,XX +XXX,XX @@ bool have_isa_3_00; | 65 | QEMU_PSCI_CONDUIT_DISABLED = 0, |
66 | diff --git a/target/avr/cpu.h b/target/avr/cpu.h | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/avr/cpu.h | ||
69 | +++ b/target/avr/cpu.h | ||
70 | @@ -XXX,XX +XXX,XX @@ enum { | ||
71 | TB_FLAGS_SKIP = 2, | ||
72 | }; | ||
73 | |||
74 | -static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc, | ||
75 | - target_ulong *cs_base, uint32_t *pflags) | ||
76 | +static inline void cpu_get_tb_cpu_state(CPUAVRState *env, vaddr *pc, | ||
77 | + uint64_t *cs_base, uint32_t *pflags) | ||
78 | { | ||
79 | uint32_t flags = 0; | ||
80 | |||
81 | diff --git a/target/cris/cpu.h b/target/cris/cpu.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/cris/cpu.h | ||
84 | +++ b/target/cris/cpu.h | ||
85 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch) | ||
86 | |||
87 | #include "exec/cpu-all.h" | ||
88 | |||
89 | -static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc, | ||
90 | - target_ulong *cs_base, uint32_t *flags) | ||
91 | +static inline void cpu_get_tb_cpu_state(CPUCRISState *env, vaddr *pc, | ||
92 | + uint64_t *cs_base, uint32_t *flags) | ||
93 | { | ||
94 | *pc = env->pc; | ||
95 | *cs_base = 0; | ||
96 | diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h | ||
97 | index XXXXXXX..XXXXXXX 100644 | ||
98 | --- a/target/hexagon/cpu.h | ||
99 | +++ b/target/hexagon/cpu.h | ||
100 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | ||
101 | |||
102 | FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) | ||
103 | |||
104 | -static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc, | ||
105 | - target_ulong *cs_base, uint32_t *flags) | ||
106 | +static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, | ||
107 | + uint64_t *cs_base, uint32_t *flags) | ||
108 | { | ||
109 | uint32_t hex_flags = 0; | ||
110 | *pc = env->gpr[HEX_REG_PC]; | ||
111 | diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/target/hppa/cpu.h | ||
114 | +++ b/target/hppa/cpu.h | ||
115 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong hppa_form_gva(CPUHPPAState *env, uint64_t spc, | ||
116 | #define TB_FLAG_PRIV_SHIFT 8 | ||
117 | #define TB_FLAG_UNALIGN 0x400 | ||
118 | |||
119 | -static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, target_ulong *pc, | ||
120 | - target_ulong *cs_base, | ||
121 | - uint32_t *pflags) | ||
122 | +static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, | ||
123 | + uint64_t *cs_base, uint32_t *pflags) | ||
124 | { | ||
125 | uint32_t flags = env->psw_n * PSW_N; | ||
126 | |||
127 | diff --git a/target/i386/cpu.h b/target/i386/cpu.h | ||
128 | index XXXXXXX..XXXXXXX 100644 | ||
129 | --- a/target/i386/cpu.h | ||
130 | +++ b/target/i386/cpu.h | ||
131 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index_kernel(CPUX86State *env) | ||
132 | #include "hw/i386/apic.h" | ||
55 | #endif | 133 | #endif |
56 | 134 | ||
135 | -static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc, | ||
136 | - target_ulong *cs_base, uint32_t *flags) | ||
137 | +static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc, | ||
138 | + uint64_t *cs_base, uint32_t *flags) | ||
139 | { | ||
140 | *cs_base = env->segs[R_CS].base; | ||
141 | *pc = *cs_base + env->eip; | ||
142 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/target/loongarch/cpu.h | ||
145 | +++ b/target/loongarch/cpu.h | ||
146 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch) | ||
147 | #define HW_FLAGS_EUEN_FPE 0x04 | ||
148 | #define HW_FLAGS_EUEN_SXE 0x08 | ||
149 | |||
150 | -static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, | ||
151 | - target_ulong *pc, | ||
152 | - target_ulong *cs_base, | ||
153 | - uint32_t *flags) | ||
154 | +static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, | ||
155 | + uint64_t *cs_base, uint32_t *flags) | ||
156 | { | ||
157 | *pc = env->pc; | ||
158 | *cs_base = 0; | ||
159 | diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h | ||
160 | index XXXXXXX..XXXXXXX 100644 | ||
161 | --- a/target/m68k/cpu.h | ||
162 | +++ b/target/m68k/cpu.h | ||
163 | @@ -XXX,XX +XXX,XX @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, | ||
164 | #define TB_FLAGS_TRACE 16 | ||
165 | #define TB_FLAGS_TRACE_BIT (1 << TB_FLAGS_TRACE) | ||
166 | |||
167 | -static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc, | ||
168 | - target_ulong *cs_base, uint32_t *flags) | ||
169 | +static inline void cpu_get_tb_cpu_state(CPUM68KState *env, vaddr *pc, | ||
170 | + uint64_t *cs_base, uint32_t *flags) | ||
171 | { | ||
172 | *pc = env->pc; | ||
173 | *cs_base = 0; | ||
174 | diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/target/microblaze/cpu.h | ||
177 | +++ b/target/microblaze/cpu.h | ||
178 | @@ -XXX,XX +XXX,XX @@ void mb_tcg_init(void); | ||
179 | /* Ensure there is no overlap between the two masks. */ | ||
180 | QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK); | ||
181 | |||
182 | -static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc, | ||
183 | - target_ulong *cs_base, uint32_t *flags) | ||
184 | +static inline void cpu_get_tb_cpu_state(CPUMBState *env, vaddr *pc, | ||
185 | + uint64_t *cs_base, uint32_t *flags) | ||
186 | { | ||
187 | *pc = env->pc; | ||
188 | *flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK); | ||
189 | diff --git a/target/mips/cpu.h b/target/mips/cpu.h | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/mips/cpu.h | ||
192 | +++ b/target/mips/cpu.h | ||
193 | @@ -XXX,XX +XXX,XX @@ void itc_reconfigure(struct MIPSITUState *tag); | ||
194 | /* helper.c */ | ||
195 | target_ulong exception_resume_pc(CPUMIPSState *env); | ||
196 | |||
197 | -static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc, | ||
198 | - target_ulong *cs_base, uint32_t *flags) | ||
199 | +static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, vaddr *pc, | ||
200 | + uint64_t *cs_base, uint32_t *flags) | ||
201 | { | ||
202 | *pc = env->active_tc.PC; | ||
203 | *cs_base = 0; | ||
204 | diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h | ||
205 | index XXXXXXX..XXXXXXX 100644 | ||
206 | --- a/target/nios2/cpu.h | ||
207 | +++ b/target/nios2/cpu.h | ||
208 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAGS, CRS0, 0, 1) /* Set if CRS == 0. */ | ||
209 | FIELD(TBFLAGS, U, 1, 1) /* Overlaps CR_STATUS_U */ | ||
210 | FIELD(TBFLAGS, R0_0, 2, 1) /* Set if R0 == 0. */ | ||
211 | |||
212 | -static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc, | ||
213 | - target_ulong *cs_base, uint32_t *flags) | ||
214 | +static inline void cpu_get_tb_cpu_state(CPUNios2State *env, vaddr *pc, | ||
215 | + uint64_t *cs_base, uint32_t *flags) | ||
216 | { | ||
217 | unsigned crs = FIELD_EX32(env->ctrl[CR_STATUS], CR_STATUS, CRS); | ||
218 | |||
219 | diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h | ||
220 | index XXXXXXX..XXXXXXX 100644 | ||
221 | --- a/target/openrisc/cpu.h | ||
222 | +++ b/target/openrisc/cpu.h | ||
223 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_set_gpr(CPUOpenRISCState *env, int i, uint32_t val) | ||
224 | env->shadow_gpr[0][i] = val; | ||
225 | } | ||
226 | |||
227 | -static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, | ||
228 | - target_ulong *pc, | ||
229 | - target_ulong *cs_base, uint32_t *flags) | ||
230 | +static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, vaddr *pc, | ||
231 | + uint64_t *cs_base, uint32_t *flags) | ||
232 | { | ||
233 | *pc = env->pc; | ||
234 | *cs_base = 0; | ||
235 | diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h | ||
236 | index XXXXXXX..XXXXXXX 100644 | ||
237 | --- a/target/ppc/cpu.h | ||
238 | +++ b/target/ppc/cpu.h | ||
239 | @@ -XXX,XX +XXX,XX @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer); | ||
240 | #define is_book3s_arch2x(ctx) (!!((ctx)->insns_flags & PPC_SEGMENT_64B)) | ||
241 | |||
57 | #ifdef CONFIG_DEBUG_TCG | 242 | #ifdef CONFIG_DEBUG_TCG |
58 | -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | 243 | -void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, |
59 | - "r0", | 244 | - target_ulong *cs_base, uint32_t *flags); |
60 | - "r1", | 245 | +void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, |
61 | - "r2", | 246 | + uint64_t *cs_base, uint32_t *flags); |
62 | - "r3", | 247 | #else |
63 | - "r4", | 248 | -static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, |
64 | - "r5", | 249 | - target_ulong *cs_base, uint32_t *flags) |
65 | - "r6", | 250 | +static inline void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, |
66 | - "r7", | 251 | + uint64_t *cs_base, uint32_t *flags) |
67 | - "r8", | 252 | { |
68 | - "r9", | 253 | *pc = env->nip; |
69 | - "r10", | 254 | *cs_base = 0; |
70 | - "r11", | 255 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h |
71 | - "r12", | 256 | index XXXXXXX..XXXXXXX 100644 |
72 | - "r13", | 257 | --- a/target/riscv/cpu.h |
73 | - "r14", | 258 | +++ b/target/riscv/cpu.h |
74 | - "r15", | 259 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) |
75 | - "r16", | 260 | return cpu->cfg.vlen >> (sew + 3 - lmul); |
76 | - "r17", | 261 | } |
77 | - "r18", | 262 | |
78 | - "r19", | 263 | -void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, |
79 | - "r20", | 264 | - target_ulong *cs_base, uint32_t *pflags); |
80 | - "r21", | 265 | +void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, |
81 | - "r22", | 266 | + uint64_t *cs_base, uint32_t *pflags); |
82 | - "r23", | 267 | |
83 | - "r24", | 268 | void riscv_cpu_update_mask(CPURISCVState *env); |
84 | - "r25", | 269 | |
85 | - "r26", | 270 | diff --git a/target/rx/cpu.h b/target/rx/cpu.h |
86 | - "r27", | 271 | index XXXXXXX..XXXXXXX 100644 |
87 | - "r28", | 272 | --- a/target/rx/cpu.h |
88 | - "r29", | 273 | +++ b/target/rx/cpu.h |
89 | - "r30", | 274 | @@ -XXX,XX +XXX,XX @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte); |
90 | - "r31" | 275 | #define RX_CPU_IRQ 0 |
91 | +static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { | 276 | #define RX_CPU_FIR 1 |
92 | + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", | 277 | |
93 | + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", | 278 | -static inline void cpu_get_tb_cpu_state(CPURXState *env, target_ulong *pc, |
94 | + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", | 279 | - target_ulong *cs_base, uint32_t *flags) |
95 | + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", | 280 | +static inline void cpu_get_tb_cpu_state(CPURXState *env, vaddr *pc, |
96 | + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | 281 | + uint64_t *cs_base, uint32_t *flags) |
97 | + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", | 282 | { |
98 | + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | 283 | *pc = env->pc; |
99 | + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", | 284 | *cs_base = 0; |
100 | }; | 285 | diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h |
286 | index XXXXXXX..XXXXXXX 100644 | ||
287 | --- a/target/s390x/cpu.h | ||
288 | +++ b/target/s390x/cpu.h | ||
289 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch) | ||
101 | #endif | 290 | #endif |
102 | 291 | } | |
103 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | 292 | |
104 | TCG_REG_R5, | 293 | -static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, |
105 | TCG_REG_R4, | 294 | - target_ulong *cs_base, uint32_t *flags) |
106 | TCG_REG_R3, | 295 | +static inline void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, |
107 | + | 296 | + uint64_t *cs_base, uint32_t *flags) |
108 | + /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ | 297 | { |
109 | + TCG_REG_V2, /* call clobbered, vectors */ | 298 | if (env->psw.addr & 1) { |
110 | + TCG_REG_V3, | 299 | /* |
111 | + TCG_REG_V4, | 300 | diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h |
112 | + TCG_REG_V5, | 301 | index XXXXXXX..XXXXXXX 100644 |
113 | + TCG_REG_V6, | 302 | --- a/target/sh4/cpu.h |
114 | + TCG_REG_V7, | 303 | +++ b/target/sh4/cpu.h |
115 | + TCG_REG_V8, | 304 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) |
116 | + TCG_REG_V9, | 305 | env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)); |
117 | + TCG_REG_V10, | 306 | } |
118 | + TCG_REG_V11, | 307 | |
119 | + TCG_REG_V12, | 308 | -static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, |
120 | + TCG_REG_V13, | 309 | - target_ulong *cs_base, uint32_t *flags) |
121 | + TCG_REG_V14, | 310 | +static inline void cpu_get_tb_cpu_state(CPUSH4State *env, vaddr *pc, |
122 | + TCG_REG_V15, | 311 | + uint64_t *cs_base, uint32_t *flags) |
123 | + TCG_REG_V16, | 312 | { |
124 | + TCG_REG_V17, | 313 | *pc = env->pc; |
125 | + TCG_REG_V18, | 314 | /* For a gUSA region, notice the end of the region. */ |
126 | + TCG_REG_V19, | 315 | diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h |
127 | }; | 316 | index XXXXXXX..XXXXXXX 100644 |
128 | 317 | --- a/target/sparc/cpu.h | |
129 | static const int tcg_target_call_iarg_regs[] = { | 318 | +++ b/target/sparc/cpu.h |
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 319 | @@ -XXX,XX +XXX,XX @@ trap_state* cpu_tsptr(CPUSPARCState* env); |
131 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); | 320 | #define TB_FLAG_HYPER (1 << 7) |
132 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); | 321 | #define TB_FLAG_ASI_SHIFT 24 |
133 | 322 | ||
134 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); | 323 | -static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, target_ulong *pc, |
135 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); | 324 | - target_ulong *cs_base, uint32_t *pflags) |
136 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); | 325 | +static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc, |
137 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); | 326 | + uint64_t *cs_base, uint32_t *pflags) |
138 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); | 327 | { |
139 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); | 328 | uint32_t flags; |
140 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); | 329 | *pc = env->pc; |
141 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); | 330 | diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h |
142 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); | 331 | index XXXXXXX..XXXXXXX 100644 |
143 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); | 332 | --- a/target/tricore/cpu.h |
144 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); | 333 | +++ b/target/tricore/cpu.h |
145 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); | 334 | @@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2) |
146 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); | 335 | void cpu_state_reset(CPUTriCoreState *s); |
147 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); | 336 | void tricore_tcg_init(void); |
148 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); | 337 | |
149 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); | 338 | -static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, target_ulong *pc, |
150 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); | 339 | - target_ulong *cs_base, uint32_t *flags) |
151 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); | 340 | +static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc, |
152 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); | 341 | + uint64_t *cs_base, uint32_t *flags) |
153 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); | 342 | { |
154 | + | 343 | uint32_t new_flags = 0; |
155 | s->reserved_regs = 0; | 344 | *pc = env->PC; |
156 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ | 345 | diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h |
157 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ | 346 | index XXXXXXX..XXXXXXX 100644 |
158 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 347 | --- a/target/xtensa/cpu.h |
159 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ | 348 | +++ b/target/xtensa/cpu.h |
349 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch) | ||
350 | |||
351 | #include "exec/cpu-all.h" | ||
352 | |||
353 | -static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, | ||
354 | - target_ulong *cs_base, uint32_t *flags) | ||
355 | +static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc, | ||
356 | + uint64_t *cs_base, uint32_t *flags) | ||
357 | { | ||
358 | *pc = env->pc; | ||
359 | *cs_base = 0; | ||
360 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | ||
361 | index XXXXXXX..XXXXXXX 100644 | ||
362 | --- a/accel/tcg/cpu-exec.c | ||
363 | +++ b/accel/tcg/cpu-exec.c | ||
364 | @@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) | ||
365 | { | ||
366 | CPUState *cpu = env_cpu(env); | ||
367 | TranslationBlock *tb; | ||
368 | - target_ulong cs_base, pc; | ||
369 | + vaddr pc; | ||
370 | + uint64_t cs_base; | ||
371 | uint32_t flags, cflags; | ||
372 | |||
373 | cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); | ||
374 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
375 | { | ||
376 | CPUArchState *env = cpu->env_ptr; | ||
377 | TranslationBlock *tb; | ||
378 | - target_ulong cs_base, pc; | ||
379 | + vaddr pc; | ||
380 | + uint64_t cs_base; | ||
381 | uint32_t flags, cflags; | ||
382 | int tb_exit; | ||
383 | |||
384 | @@ -XXX,XX +XXX,XX @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc) | ||
385 | |||
386 | while (!cpu_handle_interrupt(cpu, &last_tb)) { | ||
387 | TranslationBlock *tb; | ||
388 | - target_ulong cs_base, pc; | ||
389 | + vaddr pc; | ||
390 | + uint64_t cs_base; | ||
391 | uint32_t flags, cflags; | ||
392 | |||
393 | cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags); | ||
394 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
395 | index XXXXXXX..XXXXXXX 100644 | ||
396 | --- a/accel/tcg/translate-all.c | ||
397 | +++ b/accel/tcg/translate-all.c | ||
398 | @@ -XXX,XX +XXX,XX @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) | ||
399 | /* The exception probably happened in a helper. The CPU state should | ||
400 | have been saved before calling it. Fetch the PC from there. */ | ||
401 | CPUArchState *env = cpu->env_ptr; | ||
402 | - target_ulong pc, cs_base; | ||
403 | + vaddr pc; | ||
404 | + uint64_t cs_base; | ||
405 | tb_page_addr_t addr; | ||
406 | uint32_t flags; | ||
407 | |||
408 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
409 | index XXXXXXX..XXXXXXX 100644 | ||
410 | --- a/target/arm/helper.c | ||
411 | +++ b/target/arm/helper.c | ||
412 | @@ -XXX,XX +XXX,XX @@ static bool mve_no_pred(CPUARMState *env) | ||
413 | return true; | ||
414 | } | ||
415 | |||
416 | -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, | ||
417 | - target_ulong *cs_base, uint32_t *pflags) | ||
418 | +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, | ||
419 | + uint64_t *cs_base, uint32_t *pflags) | ||
420 | { | ||
421 | CPUARMTBFlags flags; | ||
422 | |||
423 | diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c | ||
424 | index XXXXXXX..XXXXXXX 100644 | ||
425 | --- a/target/ppc/helper_regs.c | ||
426 | +++ b/target/ppc/helper_regs.c | ||
427 | @@ -XXX,XX +XXX,XX @@ void hreg_update_pmu_hflags(CPUPPCState *env) | ||
428 | } | ||
429 | |||
430 | #ifdef CONFIG_DEBUG_TCG | ||
431 | -void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, | ||
432 | - target_ulong *cs_base, uint32_t *flags) | ||
433 | +void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, | ||
434 | + uint64_t *cs_base, uint32_t *flags) | ||
435 | { | ||
436 | uint32_t hflags_current = env->hflags; | ||
437 | uint32_t hflags_rebuilt; | ||
438 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | ||
439 | index XXXXXXX..XXXXXXX 100644 | ||
440 | --- a/target/riscv/cpu_helper.c | ||
441 | +++ b/target/riscv/cpu_helper.c | ||
442 | @@ -XXX,XX +XXX,XX @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch) | ||
160 | #endif | 443 | #endif |
161 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ | 444 | } |
162 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); | 445 | |
163 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); | 446 | -void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, |
164 | if (USE_REG_TB) { | 447 | - target_ulong *cs_base, uint32_t *pflags) |
165 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ | 448 | +void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, |
166 | } | 449 | + uint64_t *cs_base, uint32_t *pflags) |
450 | { | ||
451 | CPUState *cs = env_cpu(env); | ||
452 | RISCVCPU *cpu = RISCV_CPU(cs); | ||
167 | -- | 453 | -- |
168 | 2.17.1 | 454 | 2.34.1 |
169 | |||
170 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce macro VX4() used for encoding Altivec instructions. | ||
2 | 1 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | ||
6 | tcg/ppc/tcg-target.inc.c | 1 + | ||
7 | 1 file changed, 1 insertion(+) | ||
8 | |||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/ppc/tcg-target.inc.c | ||
12 | +++ b/tcg/ppc/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
14 | #define XO31(opc) (OPCD(31)|((opc)<<1)) | ||
15 | #define XO58(opc) (OPCD(58)|(opc)) | ||
16 | #define XO62(opc) (OPCD(62)|(opc)) | ||
17 | +#define VX4(opc) (OPCD(4)|(opc)) | ||
18 | |||
19 | #define B OPCD( 18) | ||
20 | #define BC OPCD( 16) | ||
21 | -- | ||
22 | 2.17.1 | ||
23 | |||
24 | diff view generated by jsdifflib |
1 | This is only used for 32-bit hosts. | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230621135633.1649-5-anjo@rev.ng> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | 7 | --- |
6 | tcg/ppc/tcg-target.inc.c | 9 +++++++++ | 8 | include/exec/cpu_ldst.h | 10 +++++----- |
7 | 1 file changed, 9 insertions(+) | 9 | accel/tcg/cputlb.c | 8 ++++---- |
10 | 2 files changed, 9 insertions(+), 9 deletions(-) | ||
8 | 11 | ||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 12 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h |
10 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/ppc/tcg-target.inc.c | 14 | --- a/include/exec/cpu_ldst.h |
12 | +++ b/tcg/ppc/tcg-target.inc.c | 15 | +++ b/include/exec/cpu_ldst.h |
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 16 | @@ -XXX,XX +XXX,XX @@ static inline void clear_helper_retaddr(void) |
14 | } | 17 | |
15 | break; | 18 | #include "tcg/oversized-guest.h" |
16 | 19 | ||
17 | + case INDEX_op_dup2_vec: | 20 | -static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, |
18 | + assert(TCG_TARGET_REG_BITS == 32); | 21 | - MMUAccessType access_type) |
19 | + /* With inputs a1 = xLxx, a2 = xHxx */ | 22 | +static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry, |
20 | + tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ | 23 | + MMUAccessType access_type) |
21 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ | 24 | { |
22 | + tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ | 25 | /* Do not rearrange the CPUTLBEntry structure members. */ |
23 | + return; | 26 | QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != |
24 | + | 27 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, |
25 | case INDEX_op_ppc_mrgh_vec: | 28 | #endif |
26 | insn = mrgh_op[vece]; | 29 | } |
27 | break; | 30 | |
28 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 31 | -static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) |
29 | case INDEX_op_ppc_mulou_vec: | 32 | +static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry) |
30 | case INDEX_op_ppc_pkum_vec: | 33 | { |
31 | case INDEX_op_ppc_rotl_vec: | 34 | return tlb_read_idx(entry, MMU_DATA_STORE); |
32 | + case INDEX_op_dup2_vec: | 35 | } |
33 | return &v_v_v; | 36 | |
34 | case INDEX_op_not_vec: | 37 | /* Find the TLB index corresponding to the mmu_idx + address pair. */ |
35 | case INDEX_op_dup_vec: | 38 | static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, |
39 | - target_ulong addr) | ||
40 | + vaddr addr) | ||
41 | { | ||
42 | uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS; | ||
43 | |||
44 | @@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, | ||
45 | |||
46 | /* Find the TLB entry corresponding to the mmu_idx + address pair. */ | ||
47 | static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, | ||
48 | - target_ulong addr) | ||
49 | + vaddr addr) | ||
50 | { | ||
51 | return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)]; | ||
52 | } | ||
53 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/accel/tcg/cputlb.c | ||
56 | +++ b/accel/tcg/cputlb.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
58 | assert_cpu_is_self(env_cpu(env)); | ||
59 | for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { | ||
60 | CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; | ||
61 | - target_ulong cmp = tlb_read_idx(vtlb, access_type); | ||
62 | + uint64_t cmp = tlb_read_idx(vtlb, access_type); | ||
63 | |||
64 | if (cmp == page) { | ||
65 | /* Found entry in victim tlb, swap tlb and iotlb. */ | ||
66 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, | ||
67 | { | ||
68 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
69 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
70 | - target_ulong tlb_addr = tlb_read_idx(entry, access_type); | ||
71 | + uint64_t tlb_addr = tlb_read_idx(entry, access_type); | ||
72 | target_ulong page_addr = addr & TARGET_PAGE_MASK; | ||
73 | int flags = TLB_FLAGS_MASK; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, | ||
76 | CPUArchState *env = cpu->env_ptr; | ||
77 | CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
78 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
79 | - vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; | ||
80 | + uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; | ||
81 | |||
82 | if (likely(tlb_hit(tlb_addr, addr))) { | ||
83 | /* We must have an iotlb entry for MMIO */ | ||
84 | @@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, | ||
85 | target_ulong addr = data->addr; | ||
86 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
87 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
88 | - target_ulong tlb_addr = tlb_read_idx(entry, access_type); | ||
89 | + uint64_t tlb_addr = tlb_read_idx(entry, access_type); | ||
90 | bool maybe_resized = false; | ||
91 | |||
92 | /* If the TLB entry is for a different page, reload and try again. */ | ||
36 | -- | 93 | -- |
37 | 2.17.1 | 94 | 2.34.1 |
38 | |||
39 | diff view generated by jsdifflib |
1 | Add various bits and peaces related mostly to load and store | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | operations. In that context, logic, compare, and splat Altivec | ||
3 | instructions are used, and, therefore, the support for emitting | ||
4 | them is included in this patch too. | ||
5 | 2 | ||
3 | Functions accessing MMULookupPageData are also updated. | ||
4 | |||
5 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230621135633.1649-6-anjo@rev.ng> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 9 | --- |
9 | tcg/ppc/tcg-target.h | 6 +- | 10 | accel/tcg/cputlb.c | 30 +++++++++++++++--------------- |
10 | tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++--- | 11 | 1 file changed, 15 insertions(+), 15 deletions(-) |
11 | 2 files changed, 442 insertions(+), 36 deletions(-) | ||
12 | 12 | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 13 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
14 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 15 | --- a/accel/tcg/cputlb.c |
16 | +++ b/tcg/ppc/tcg-target.h | 16 | +++ b/accel/tcg/cputlb.c |
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 17 | @@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, |
18 | #define TCG_TARGET_HAS_v128 have_altivec | 18 | typedef struct MMULookupPageData { |
19 | #define TCG_TARGET_HAS_v256 0 | 19 | CPUTLBEntryFull *full; |
20 | 20 | void *haddr; | |
21 | -#define TCG_TARGET_HAS_andc_vec 0 | 21 | - target_ulong addr; |
22 | +#define TCG_TARGET_HAS_andc_vec 1 | 22 | + vaddr addr; |
23 | #define TCG_TARGET_HAS_orc_vec 0 | 23 | int flags; |
24 | -#define TCG_TARGET_HAS_not_vec 0 | 24 | int size; |
25 | +#define TCG_TARGET_HAS_not_vec 1 | 25 | } MMULookupPageData; |
26 | #define TCG_TARGET_HAS_neg_vec 0 | 26 | @@ -XXX,XX +XXX,XX @@ typedef struct MMULookupLocals { |
27 | #define TCG_TARGET_HAS_abs_vec 0 | 27 | static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, |
28 | #define TCG_TARGET_HAS_shi_vec 0 | 28 | int mmu_idx, MMUAccessType access_type, uintptr_t ra) |
29 | #define TCG_TARGET_HAS_shs_vec 0 | ||
30 | #define TCG_TARGET_HAS_shv_vec 0 | ||
31 | -#define TCG_TARGET_HAS_cmp_vec 0 | ||
32 | +#define TCG_TARGET_HAS_cmp_vec 1 | ||
33 | #define TCG_TARGET_HAS_mul_vec 0 | ||
34 | #define TCG_TARGET_HAS_sat_vec 0 | ||
35 | #define TCG_TARGET_HAS_minmax_vec 0 | ||
36 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tcg/ppc/tcg-target.inc.c | ||
39 | +++ b/tcg/ppc/tcg-target.inc.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
41 | ct->ct |= TCG_CT_REG; | ||
42 | ct->u.regs = 0xffffffff; | ||
43 | break; | ||
44 | + case 'v': | ||
45 | + ct->ct |= TCG_CT_REG; | ||
46 | + ct->u.regs = 0xffffffff00000000ull; | ||
47 | + break; | ||
48 | case 'L': /* qemu_ld constraint */ | ||
49 | ct->ct |= TCG_CT_REG; | ||
50 | ct->u.regs = 0xffffffff; | ||
51 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
52 | |||
53 | #define NOP ORI /* ori 0,0,0 */ | ||
54 | |||
55 | +#define LVX XO31(103) | ||
56 | +#define LVEBX XO31(7) | ||
57 | +#define LVEHX XO31(39) | ||
58 | +#define LVEWX XO31(71) | ||
59 | + | ||
60 | +#define STVX XO31(231) | ||
61 | +#define STVEWX XO31(199) | ||
62 | + | ||
63 | +#define VCMPEQUB VX4(6) | ||
64 | +#define VCMPEQUH VX4(70) | ||
65 | +#define VCMPEQUW VX4(134) | ||
66 | +#define VCMPGTSB VX4(774) | ||
67 | +#define VCMPGTSH VX4(838) | ||
68 | +#define VCMPGTSW VX4(902) | ||
69 | +#define VCMPGTUB VX4(518) | ||
70 | +#define VCMPGTUH VX4(582) | ||
71 | +#define VCMPGTUW VX4(646) | ||
72 | + | ||
73 | +#define VAND VX4(1028) | ||
74 | +#define VANDC VX4(1092) | ||
75 | +#define VNOR VX4(1284) | ||
76 | +#define VOR VX4(1156) | ||
77 | +#define VXOR VX4(1220) | ||
78 | + | ||
79 | +#define VSPLTB VX4(524) | ||
80 | +#define VSPLTH VX4(588) | ||
81 | +#define VSPLTW VX4(652) | ||
82 | +#define VSPLTISB VX4(780) | ||
83 | +#define VSPLTISH VX4(844) | ||
84 | +#define VSPLTISW VX4(908) | ||
85 | + | ||
86 | +#define VSLDOI VX4(44) | ||
87 | + | ||
88 | #define RT(r) ((r)<<21) | ||
89 | #define RS(r) ((r)<<21) | ||
90 | #define RA(r) ((r)<<16) | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
92 | intptr_t value, intptr_t addend) | ||
93 | { | 29 | { |
94 | tcg_insn_unit *target; | 30 | - target_ulong addr = data->addr; |
95 | + int16_t lo; | 31 | + vaddr addr = data->addr; |
96 | + int32_t hi; | 32 | uintptr_t index = tlb_index(env, mmu_idx, addr); |
97 | 33 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | |
98 | value += addend; | 34 | uint64_t tlb_addr = tlb_read_idx(entry, access_type); |
99 | target = (tcg_insn_unit *)value; | 35 | @@ -XXX,XX +XXX,XX @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, |
100 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 36 | MMUAccessType access_type, uintptr_t ra) |
101 | } | ||
102 | *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); | ||
103 | break; | ||
104 | + case R_PPC_ADDR32: | ||
105 | + /* | ||
106 | + * We are abusing this relocation type. Again, this points to | ||
107 | + * a pair of insns, lis + load. This is an absolute address | ||
108 | + * relocation for PPC32 so the lis cannot be removed. | ||
109 | + */ | ||
110 | + lo = value; | ||
111 | + hi = value - lo; | ||
112 | + if (hi + lo != value) { | ||
113 | + return false; | ||
114 | + } | ||
115 | + code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); | ||
116 | + code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); | ||
117 | + break; | ||
118 | default: | ||
119 | g_assert_not_reached(); | ||
120 | } | ||
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
122 | |||
123 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | ||
124 | { | 37 | { |
125 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | 38 | CPUTLBEntryFull *full = data->full; |
126 | - if (ret != arg) { | 39 | - target_ulong addr = data->addr; |
127 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | 40 | + vaddr addr = data->addr; |
128 | + if (ret == arg) { | 41 | int flags = data->flags; |
129 | + return true; | 42 | int size = data->size; |
130 | + } | 43 | |
131 | + switch (type) { | 44 | @@ -XXX,XX +XXX,XX @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, |
132 | + case TCG_TYPE_I64: | 45 | * Resolve the translation for the page(s) beginning at @addr, for MemOp.size |
133 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | 46 | * bytes. Return true if the lookup crosses a page boundary. |
134 | + /* fallthru */ | 47 | */ |
135 | + case TCG_TYPE_I32: | 48 | -static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, |
136 | + if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | 49 | +static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
137 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | 50 | uintptr_t ra, MMUAccessType type, MMULookupLocals *l) |
138 | + break; | 51 | { |
139 | + } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | 52 | unsigned a_bits; |
140 | + /* Altivec does not support vector/integer moves. */ | 53 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, |
141 | + return false; | 54 | MMUAccessType type, uintptr_t ra) |
142 | + } | 55 | { |
143 | + /* fallthru */ | 56 | CPUTLBEntryFull *full = p->full; |
144 | + case TCG_TYPE_V64: | 57 | - target_ulong addr = p->addr; |
145 | + case TCG_TYPE_V128: | 58 | + vaddr addr = p->addr; |
146 | + tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); | 59 | int i, size = p->size; |
147 | + tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); | 60 | |
148 | + break; | 61 | QEMU_IOTHREAD_LOCK_GUARD(); |
149 | + default: | 62 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, |
150 | + g_assert_not_reached(); | 63 | return ret; |
151 | } | ||
152 | return true; | ||
153 | } | 64 | } |
154 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | 65 | |
155 | static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | 66 | -static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, |
156 | tcg_target_long val) | 67 | +static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, |
68 | uintptr_t ra, MMUAccessType access_type) | ||
157 | { | 69 | { |
158 | - g_assert_not_reached(); | 70 | MMULookupLocals l; |
159 | + uint32_t load_insn; | 71 | @@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr, |
160 | + int rel, low; | 72 | return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); |
161 | + intptr_t add; | ||
162 | + | ||
163 | + low = (int8_t)val; | ||
164 | + if (low >= -16 && low < 16) { | ||
165 | + if (val == (tcg_target_long)dup_const(MO_8, low)) { | ||
166 | + tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); | ||
167 | + return; | ||
168 | + } | ||
169 | + if (val == (tcg_target_long)dup_const(MO_16, low)) { | ||
170 | + tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); | ||
171 | + return; | ||
172 | + } | ||
173 | + if (val == (tcg_target_long)dup_const(MO_32, low)) { | ||
174 | + tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); | ||
175 | + return; | ||
176 | + } | ||
177 | + } | ||
178 | + | ||
179 | + /* | ||
180 | + * Otherwise we must load the value from the constant pool. | ||
181 | + */ | ||
182 | + if (USE_REG_TB) { | ||
183 | + rel = R_PPC_ADDR16; | ||
184 | + add = -(intptr_t)s->code_gen_ptr; | ||
185 | + } else { | ||
186 | + rel = R_PPC_ADDR32; | ||
187 | + add = 0; | ||
188 | + } | ||
189 | + | ||
190 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
191 | + if (TCG_TARGET_REG_BITS == 64) { | ||
192 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
193 | + } else { | ||
194 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
195 | + } | ||
196 | + | ||
197 | + if (USE_REG_TB) { | ||
198 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); | ||
199 | + load_insn |= RA(TCG_REG_TB); | ||
200 | + } else { | ||
201 | + tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); | ||
202 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); | ||
203 | + } | ||
204 | + tcg_out32(s, load_insn); | ||
205 | } | 73 | } |
206 | 74 | ||
207 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | 75 | -static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, |
208 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 76 | +static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, |
209 | align = 3; | 77 | uintptr_t ra, MMUAccessType access_type) |
210 | /* FALLTHRU */ | 78 | { |
211 | default: | 79 | MMULookupLocals l; |
212 | - if (rt != TCG_REG_R0) { | 80 | @@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr, |
213 | + if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { | 81 | return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); |
214 | rs = rt; | ||
215 | break; | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
218 | } | ||
219 | |||
220 | /* For unaligned, or very large offsets, use the indexed form. */ | ||
221 | - if (offset & align || offset != (int32_t)offset) { | ||
222 | + if (offset & align || offset != (int32_t)offset || opi == 0) { | ||
223 | if (rs == base) { | ||
224 | rs = TCG_REG_R0; | ||
225 | } | ||
226 | tcg_debug_assert(!is_store || rs != rt); | ||
227 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
228 | - tcg_out32(s, opx | TAB(rt, base, rs)); | ||
229 | + tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
230 | return; | ||
231 | } | ||
232 | |||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
234 | base = rs; | ||
235 | } | ||
236 | if (opi != ADDI || base != rt || l0 != 0) { | ||
237 | - tcg_out32(s, opi | TAI(rt, base, l0)); | ||
238 | + tcg_out32(s, opi | TAI(rt & 31, base, l0)); | ||
239 | } | ||
240 | } | 82 | } |
241 | 83 | ||
242 | -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 84 | -static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, |
243 | - TCGReg arg1, intptr_t arg2) | 85 | +static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, |
244 | +static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, | 86 | uintptr_t ra, MMUAccessType access_type) |
245 | + TCGReg va, TCGReg vb, int shb) | ||
246 | { | 87 | { |
247 | - int opi, opx; | 88 | MMULookupLocals l; |
248 | - | 89 | @@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr, |
249 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | 90 | return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); |
250 | - if (type == TCG_TYPE_I32) { | ||
251 | - opi = LWZ, opx = LWZX; | ||
252 | - } else { | ||
253 | - opi = LD, opx = LDX; | ||
254 | - } | ||
255 | - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); | ||
256 | + tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); | ||
257 | } | 91 | } |
258 | 92 | ||
259 | -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | 93 | -static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, |
260 | - TCGReg arg1, intptr_t arg2) | 94 | +static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, |
261 | +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 95 | uintptr_t ra, MMUAccessType access_type) |
262 | + TCGReg base, intptr_t offset) | ||
263 | { | 96 | { |
264 | - int opi, opx; | 97 | MMULookupLocals l; |
265 | + int shift; | 98 | @@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr, |
266 | 99 | return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr); | |
267 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
268 | - if (type == TCG_TYPE_I32) { | ||
269 | - opi = STW, opx = STWX; | ||
270 | - } else { | ||
271 | - opi = STD, opx = STDX; | ||
272 | + switch (type) { | ||
273 | + case TCG_TYPE_I32: | ||
274 | + if (ret < TCG_REG_V0) { | ||
275 | + tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | ||
276 | + break; | ||
277 | + } | ||
278 | + tcg_debug_assert((offset & 3) == 0); | ||
279 | + tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
280 | + shift = (offset - 4) & 0xc; | ||
281 | + if (shift) { | ||
282 | + tcg_out_vsldoi(s, ret, ret, ret, shift); | ||
283 | + } | ||
284 | + break; | ||
285 | + case TCG_TYPE_I64: | ||
286 | + if (ret < TCG_REG_V0) { | ||
287 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
288 | + tcg_out_mem_long(s, LD, LDX, ret, base, offset); | ||
289 | + break; | ||
290 | + } | ||
291 | + /* fallthru */ | ||
292 | + case TCG_TYPE_V64: | ||
293 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
294 | + tcg_debug_assert((offset & 7) == 0); | ||
295 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
296 | + if (offset & 8) { | ||
297 | + tcg_out_vsldoi(s, ret, ret, ret, 8); | ||
298 | + } | ||
299 | + break; | ||
300 | + case TCG_TYPE_V128: | ||
301 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
302 | + tcg_debug_assert((offset & 15) == 0); | ||
303 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
304 | + break; | ||
305 | + default: | ||
306 | + g_assert_not_reached(); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
311 | + TCGReg base, intptr_t offset) | ||
312 | +{ | ||
313 | + int shift; | ||
314 | + | ||
315 | + switch (type) { | ||
316 | + case TCG_TYPE_I32: | ||
317 | + if (arg < TCG_REG_V0) { | ||
318 | + tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
319 | + break; | ||
320 | + } | ||
321 | + tcg_debug_assert((offset & 3) == 0); | ||
322 | + shift = (offset - 4) & 0xc; | ||
323 | + if (shift) { | ||
324 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); | ||
325 | + arg = TCG_VEC_TMP1; | ||
326 | + } | ||
327 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
328 | + break; | ||
329 | + case TCG_TYPE_I64: | ||
330 | + if (arg < TCG_REG_V0) { | ||
331 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
332 | + tcg_out_mem_long(s, STD, STDX, arg, base, offset); | ||
333 | + break; | ||
334 | + } | ||
335 | + /* fallthru */ | ||
336 | + case TCG_TYPE_V64: | ||
337 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
338 | + tcg_debug_assert((offset & 7) == 0); | ||
339 | + if (offset & 8) { | ||
340 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
341 | + arg = TCG_VEC_TMP1; | ||
342 | + } | ||
343 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
344 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); | ||
345 | + break; | ||
346 | + case TCG_TYPE_V128: | ||
347 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
348 | + tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
349 | + break; | ||
350 | + default: | ||
351 | + g_assert_not_reached(); | ||
352 | } | ||
353 | - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); | ||
354 | } | 100 | } |
355 | 101 | ||
356 | static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | 102 | -static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr, |
357 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | 103 | +static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, |
358 | 104 | MemOpIdx oi, uintptr_t ra) | |
359 | int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
360 | { | 105 | { |
361 | - g_assert_not_reached(); | 106 | MMULookupLocals l; |
362 | + switch (opc) { | 107 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, |
363 | + case INDEX_op_and_vec: | 108 | uint64_t val_le, int mmu_idx, uintptr_t ra) |
364 | + case INDEX_op_or_vec: | 109 | { |
365 | + case INDEX_op_xor_vec: | 110 | CPUTLBEntryFull *full = p->full; |
366 | + case INDEX_op_andc_vec: | 111 | - target_ulong addr = p->addr; |
367 | + case INDEX_op_not_vec: | 112 | + vaddr addr = p->addr; |
368 | + return 1; | 113 | int i, size = p->size; |
369 | + case INDEX_op_cmp_vec: | 114 | |
370 | + return vece <= MO_32 ? -1 : 0; | 115 | QEMU_IOTHREAD_LOCK_GUARD(); |
371 | + default: | 116 | @@ -XXX,XX +XXX,XX @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val, |
372 | + return 0; | 117 | do_st_1(env, &l.page[0], val, l.mmu_idx, ra); |
373 | + } | ||
374 | } | 118 | } |
375 | 119 | ||
376 | static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 120 | -static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val, |
377 | TCGReg dst, TCGReg src) | 121 | +static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val, |
122 | MemOpIdx oi, uintptr_t ra) | ||
378 | { | 123 | { |
379 | - g_assert_not_reached(); | 124 | MMULookupLocals l; |
380 | + tcg_debug_assert(dst >= TCG_REG_V0); | 125 | @@ -XXX,XX +XXX,XX @@ void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val, |
381 | + tcg_debug_assert(src >= TCG_REG_V0); | 126 | do_st2_mmu(env, addr, val, oi, retaddr); |
382 | + | ||
383 | + /* | ||
384 | + * Recall we use (or emulate) VSX integer loads, so the integer is | ||
385 | + * right justified within the left (zero-index) double-word. | ||
386 | + */ | ||
387 | + switch (vece) { | ||
388 | + case MO_8: | ||
389 | + tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); | ||
390 | + break; | ||
391 | + case MO_16: | ||
392 | + tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); | ||
393 | + break; | ||
394 | + case MO_32: | ||
395 | + tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
396 | + break; | ||
397 | + case MO_64: | ||
398 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
399 | + tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
400 | + break; | ||
401 | + default: | ||
402 | + g_assert_not_reached(); | ||
403 | + } | ||
404 | + return true; | ||
405 | } | 127 | } |
406 | 128 | ||
407 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | 129 | -static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val, |
408 | TCGReg out, TCGReg base, intptr_t offset) | 130 | +static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val, |
131 | MemOpIdx oi, uintptr_t ra) | ||
409 | { | 132 | { |
410 | - g_assert_not_reached(); | 133 | MMULookupLocals l; |
411 | + int elt; | 134 | @@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val, |
412 | + | 135 | do_st4_mmu(env, addr, val, oi, retaddr); |
413 | + tcg_debug_assert(out >= TCG_REG_V0); | ||
414 | + switch (vece) { | ||
415 | + case MO_8: | ||
416 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
417 | + elt = extract32(offset, 0, 4); | ||
418 | +#ifndef HOST_WORDS_BIGENDIAN | ||
419 | + elt ^= 15; | ||
420 | +#endif | ||
421 | + tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); | ||
422 | + break; | ||
423 | + case MO_16: | ||
424 | + tcg_debug_assert((offset & 1) == 0); | ||
425 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
426 | + elt = extract32(offset, 1, 3); | ||
427 | +#ifndef HOST_WORDS_BIGENDIAN | ||
428 | + elt ^= 7; | ||
429 | +#endif | ||
430 | + tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
431 | + break; | ||
432 | + case MO_32: | ||
433 | + tcg_debug_assert((offset & 3) == 0); | ||
434 | + tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
435 | + elt = extract32(offset, 2, 2); | ||
436 | +#ifndef HOST_WORDS_BIGENDIAN | ||
437 | + elt ^= 3; | ||
438 | +#endif | ||
439 | + tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
440 | + break; | ||
441 | + case MO_64: | ||
442 | + tcg_debug_assert((offset & 7) == 0); | ||
443 | + tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
444 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
445 | + elt = extract32(offset, 3, 1); | ||
446 | +#ifndef HOST_WORDS_BIGENDIAN | ||
447 | + elt = !elt; | ||
448 | +#endif | ||
449 | + if (elt) { | ||
450 | + tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); | ||
451 | + } else { | ||
452 | + tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); | ||
453 | + } | ||
454 | + break; | ||
455 | + default: | ||
456 | + g_assert_not_reached(); | ||
457 | + } | ||
458 | + return true; | ||
459 | } | 136 | } |
460 | 137 | ||
461 | static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 138 | -static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val, |
462 | unsigned vecl, unsigned vece, | 139 | +static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val, |
463 | const TCGArg *args, const int *const_args) | 140 | MemOpIdx oi, uintptr_t ra) |
464 | { | 141 | { |
465 | - g_assert_not_reached(); | 142 | MMULookupLocals l; |
466 | + static const uint32_t | 143 | @@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val, |
467 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | 144 | do_st8_mmu(env, addr, val, oi, retaddr); |
468 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
469 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
470 | + | ||
471 | + TCGType type = vecl + TCG_TYPE_V64; | ||
472 | + TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
473 | + uint32_t insn; | ||
474 | + | ||
475 | + switch (opc) { | ||
476 | + case INDEX_op_ld_vec: | ||
477 | + tcg_out_ld(s, type, a0, a1, a2); | ||
478 | + return; | ||
479 | + case INDEX_op_st_vec: | ||
480 | + tcg_out_st(s, type, a0, a1, a2); | ||
481 | + return; | ||
482 | + case INDEX_op_dupm_vec: | ||
483 | + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
484 | + return; | ||
485 | + | ||
486 | + case INDEX_op_and_vec: | ||
487 | + insn = VAND; | ||
488 | + break; | ||
489 | + case INDEX_op_or_vec: | ||
490 | + insn = VOR; | ||
491 | + break; | ||
492 | + case INDEX_op_xor_vec: | ||
493 | + insn = VXOR; | ||
494 | + break; | ||
495 | + case INDEX_op_andc_vec: | ||
496 | + insn = VANDC; | ||
497 | + break; | ||
498 | + case INDEX_op_not_vec: | ||
499 | + insn = VNOR; | ||
500 | + a2 = a1; | ||
501 | + break; | ||
502 | + | ||
503 | + case INDEX_op_cmp_vec: | ||
504 | + switch (args[3]) { | ||
505 | + case TCG_COND_EQ: | ||
506 | + insn = eq_op[vece]; | ||
507 | + break; | ||
508 | + case TCG_COND_GT: | ||
509 | + insn = gts_op[vece]; | ||
510 | + break; | ||
511 | + case TCG_COND_GTU: | ||
512 | + insn = gtu_op[vece]; | ||
513 | + break; | ||
514 | + default: | ||
515 | + g_assert_not_reached(); | ||
516 | + } | ||
517 | + break; | ||
518 | + | ||
519 | + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
520 | + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
521 | + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
522 | + default: | ||
523 | + g_assert_not_reached(); | ||
524 | + } | ||
525 | + | ||
526 | + tcg_debug_assert(insn != 0); | ||
527 | + tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
528 | +} | ||
529 | + | ||
530 | +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
531 | + TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
532 | +{ | ||
533 | + bool need_swap = false, need_inv = false; | ||
534 | + | ||
535 | + tcg_debug_assert(vece <= MO_32); | ||
536 | + | ||
537 | + switch (cond) { | ||
538 | + case TCG_COND_EQ: | ||
539 | + case TCG_COND_GT: | ||
540 | + case TCG_COND_GTU: | ||
541 | + break; | ||
542 | + case TCG_COND_NE: | ||
543 | + case TCG_COND_LE: | ||
544 | + case TCG_COND_LEU: | ||
545 | + need_inv = true; | ||
546 | + break; | ||
547 | + case TCG_COND_LT: | ||
548 | + case TCG_COND_LTU: | ||
549 | + need_swap = true; | ||
550 | + break; | ||
551 | + case TCG_COND_GE: | ||
552 | + case TCG_COND_GEU: | ||
553 | + need_swap = need_inv = true; | ||
554 | + break; | ||
555 | + default: | ||
556 | + g_assert_not_reached(); | ||
557 | + } | ||
558 | + | ||
559 | + if (need_inv) { | ||
560 | + cond = tcg_invert_cond(cond); | ||
561 | + } | ||
562 | + if (need_swap) { | ||
563 | + TCGv_vec t1; | ||
564 | + t1 = v1, v1 = v2, v2 = t1; | ||
565 | + cond = tcg_swap_cond(cond); | ||
566 | + } | ||
567 | + | ||
568 | + vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), | ||
569 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); | ||
570 | + | ||
571 | + if (need_inv) { | ||
572 | + tcg_gen_not_vec(vece, v0, v0); | ||
573 | + } | ||
574 | } | 145 | } |
575 | 146 | ||
576 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | 147 | -static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val, |
577 | TCGArg a0, ...) | 148 | +static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, |
149 | MemOpIdx oi, uintptr_t ra) | ||
578 | { | 150 | { |
579 | - g_assert_not_reached(); | 151 | MMULookupLocals l; |
580 | + va_list va; | ||
581 | + TCGv_vec v0, v1, v2; | ||
582 | + | ||
583 | + va_start(va, a0); | ||
584 | + v0 = temp_tcgv_vec(arg_temp(a0)); | ||
585 | + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
586 | + v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
587 | + | ||
588 | + switch (opc) { | ||
589 | + case INDEX_op_cmp_vec: | ||
590 | + expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
591 | + break; | ||
592 | + default: | ||
593 | + g_assert_not_reached(); | ||
594 | + } | ||
595 | + va_end(va); | ||
596 | } | ||
597 | |||
598 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
599 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
600 | = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } }; | ||
601 | static const TCGTargetOpDef sub2 | ||
602 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
603 | + static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
604 | + static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
605 | + static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
606 | |||
607 | switch (op) { | ||
608 | case INDEX_op_goto_ptr: | ||
609 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
610 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
611 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
612 | |||
613 | + case INDEX_op_and_vec: | ||
614 | + case INDEX_op_or_vec: | ||
615 | + case INDEX_op_xor_vec: | ||
616 | + case INDEX_op_andc_vec: | ||
617 | + case INDEX_op_orc_vec: | ||
618 | + case INDEX_op_cmp_vec: | ||
619 | + return &v_v_v; | ||
620 | + case INDEX_op_not_vec: | ||
621 | + case INDEX_op_dup_vec: | ||
622 | + return &v_v; | ||
623 | + case INDEX_op_ld_vec: | ||
624 | + case INDEX_op_st_vec: | ||
625 | + case INDEX_op_dupm_vec: | ||
626 | + return &v_r; | ||
627 | + | ||
628 | default: | ||
629 | return NULL; | ||
630 | } | ||
631 | -- | 152 | -- |
632 | 2.17.1 | 153 | 2.34.1 |
633 | |||
634 | diff view generated by jsdifflib |
1 | Add support for vector saturated add/subtract using Altivec | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | instructions: | ||
3 | VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and | ||
4 | VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS. | ||
5 | 2 | ||
3 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230621135633.1649-7-anjo@rev.ng> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 7 | --- |
9 | tcg/ppc/tcg-target.h | 2 +- | 8 | accel/tcg/cpu-exec.c | 34 +++++++++++++++++----------------- |
10 | tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++ | 9 | 1 file changed, 17 insertions(+), 17 deletions(-) |
11 | 2 files changed, 37 insertions(+), 1 deletion(-) | ||
12 | 10 | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 11 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c |
14 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 13 | --- a/accel/tcg/cpu-exec.c |
16 | +++ b/tcg/ppc/tcg-target.h | 14 | +++ b/accel/tcg/cpu-exec.c |
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 15 | @@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu) |
18 | #define TCG_TARGET_HAS_shv_vec 0 | 16 | } |
19 | #define TCG_TARGET_HAS_cmp_vec 1 | 17 | |
20 | #define TCG_TARGET_HAS_mul_vec 0 | 18 | struct tb_desc { |
21 | -#define TCG_TARGET_HAS_sat_vec 0 | 19 | - target_ulong pc; |
22 | +#define TCG_TARGET_HAS_sat_vec 1 | 20 | - target_ulong cs_base; |
23 | #define TCG_TARGET_HAS_minmax_vec 1 | 21 | + vaddr pc; |
24 | #define TCG_TARGET_HAS_bitsel_vec 0 | 22 | + uint64_t cs_base; |
25 | #define TCG_TARGET_HAS_cmpsel_vec 0 | 23 | CPUArchState *env; |
26 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 24 | tb_page_addr_t page_addr0; |
27 | index XXXXXXX..XXXXXXX 100644 | 25 | uint32_t flags; |
28 | --- a/tcg/ppc/tcg-target.inc.c | 26 | @@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d) |
29 | +++ b/tcg/ppc/tcg-target.inc.c | 27 | return true; |
30 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 28 | } else { |
31 | #define STVX XO31(231) | 29 | tb_page_addr_t phys_page1; |
32 | #define STVEWX XO31(199) | 30 | - target_ulong virt_page1; |
33 | 31 | + vaddr virt_page1; | |
34 | +#define VADDSBS VX4(768) | 32 | |
35 | +#define VADDUBS VX4(512) | 33 | /* |
36 | #define VADDUBM VX4(0) | 34 | * We know that the first page matched, and an otherwise valid TB |
37 | +#define VADDSHS VX4(832) | 35 | @@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d) |
38 | +#define VADDUHS VX4(576) | 36 | return false; |
39 | #define VADDUHM VX4(64) | 37 | } |
40 | +#define VADDSWS VX4(896) | 38 | |
41 | +#define VADDUWS VX4(640) | 39 | -static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, |
42 | #define VADDUWM VX4(128) | 40 | - target_ulong cs_base, uint32_t flags, |
43 | 41 | +static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc, | |
44 | +#define VSUBSBS VX4(1792) | 42 | + uint64_t cs_base, uint32_t flags, |
45 | +#define VSUBUBS VX4(1536) | 43 | uint32_t cflags) |
46 | #define VSUBUBM VX4(1024) | 44 | { |
47 | +#define VSUBSHS VX4(1856) | 45 | tb_page_addr_t phys_pc; |
48 | +#define VSUBUHS VX4(1600) | 46 | @@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, |
49 | #define VSUBUHM VX4(1088) | 47 | } |
50 | +#define VSUBSWS VX4(1920) | 48 | |
51 | +#define VSUBUWS VX4(1664) | 49 | /* Might cause an exception, so have a longjmp destination ready */ |
52 | #define VSUBUWM VX4(1152) | 50 | -static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, |
53 | 51 | - target_ulong cs_base, | |
54 | #define VMAXSB VX4(258) | 52 | - uint32_t flags, uint32_t cflags) |
55 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 53 | +static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc, |
56 | case INDEX_op_smin_vec: | 54 | + uint64_t cs_base, uint32_t flags, |
57 | case INDEX_op_umax_vec: | 55 | + uint32_t cflags) |
58 | case INDEX_op_umin_vec: | 56 | { |
59 | + case INDEX_op_ssadd_vec: | 57 | TranslationBlock *tb; |
60 | + case INDEX_op_sssub_vec: | 58 | CPUJumpCache *jc; |
61 | + case INDEX_op_usadd_vec: | 59 | @@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, |
62 | + case INDEX_op_ussub_vec: | 60 | return tb; |
63 | return vece <= MO_32; | 61 | } |
64 | case INDEX_op_cmp_vec: | 62 | |
65 | return vece <= MO_32 ? -1 : 0; | 63 | -static void log_cpu_exec(target_ulong pc, CPUState *cpu, |
66 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 64 | +static void log_cpu_exec(vaddr pc, CPUState *cpu, |
67 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | 65 | const TranslationBlock *tb) |
68 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | 66 | { |
69 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | 67 | if (qemu_log_in_addr_range(pc)) { |
70 | + ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | 68 | qemu_log_mask(CPU_LOG_EXEC, |
71 | + usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | 69 | "Trace %d: %p [%08" PRIx64 |
72 | + sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | 70 | - "/" TARGET_FMT_lx "/%08x/%08x] %s\n", |
73 | + ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | 71 | + "/%" VADDR_PRIx "/%08x/%08x] %s\n", |
74 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | 72 | cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, |
75 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | 73 | tb->flags, tb->cflags, lookup_symbol(pc)); |
76 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | 74 | |
77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 75 | @@ -XXX,XX +XXX,XX @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu, |
78 | case INDEX_op_sub_vec: | 76 | } |
79 | insn = sub_op[vece]; | 77 | } |
80 | break; | 78 | |
81 | + case INDEX_op_ssadd_vec: | 79 | -static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc, |
82 | + insn = ssadd_op[vece]; | 80 | +static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc, |
83 | + break; | 81 | uint32_t *cflags) |
84 | + case INDEX_op_sssub_vec: | 82 | { |
85 | + insn = sssub_op[vece]; | 83 | CPUBreakpoint *bp; |
86 | + break; | 84 | @@ -XXX,XX +XXX,XX @@ static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc, |
87 | + case INDEX_op_usadd_vec: | 85 | return false; |
88 | + insn = usadd_op[vece]; | 86 | } |
89 | + break; | 87 | |
90 | + case INDEX_op_ussub_vec: | 88 | -static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc, |
91 | + insn = ussub_op[vece]; | 89 | +static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc, |
92 | + break; | 90 | uint32_t *cflags) |
93 | case INDEX_op_smin_vec: | 91 | { |
94 | insn = smin_op[vece]; | 92 | return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) && |
95 | break; | 93 | @@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) |
96 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 94 | cc->set_pc(cpu, last_tb->pc); |
97 | case INDEX_op_andc_vec: | 95 | } |
98 | case INDEX_op_orc_vec: | 96 | if (qemu_loglevel_mask(CPU_LOG_EXEC)) { |
99 | case INDEX_op_cmp_vec: | 97 | - target_ulong pc = log_pc(cpu, last_tb); |
100 | + case INDEX_op_ssadd_vec: | 98 | + vaddr pc = log_pc(cpu, last_tb); |
101 | + case INDEX_op_sssub_vec: | 99 | if (qemu_log_in_addr_range(pc)) { |
102 | + case INDEX_op_usadd_vec: | 100 | - qemu_log("Stopped execution of TB chain before %p [" |
103 | + case INDEX_op_ussub_vec: | 101 | - TARGET_FMT_lx "] %s\n", |
104 | case INDEX_op_smax_vec: | 102 | + qemu_log("Stopped execution of TB chain before %p [%" |
105 | case INDEX_op_smin_vec: | 103 | + VADDR_PRIx "] %s\n", |
106 | case INDEX_op_umax_vec: | 104 | last_tb->tc.ptr, pc, lookup_symbol(pc)); |
105 | } | ||
106 | } | ||
107 | @@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu, | ||
108 | } | ||
109 | |||
110 | static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, | ||
111 | - target_ulong pc, | ||
112 | - TranslationBlock **last_tb, int *tb_exit) | ||
113 | + vaddr pc, TranslationBlock **last_tb, | ||
114 | + int *tb_exit) | ||
115 | { | ||
116 | int32_t insns_left; | ||
117 | |||
107 | -- | 118 | -- |
108 | 2.17.1 | 119 | 2.34.1 |
109 | |||
110 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VEC and | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | are thus part of the Altivec instruction set, and not VSX. | ||
3 | This includes negation and compare not equal. | ||
4 | 2 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | Related functions dealing with the jump cache are also updated. |
4 | |||
5 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230621135633.1649-8-anjo@rev.ng> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 9 | --- |
8 | tcg/ppc/tcg-target.h | 2 +- | 10 | accel/tcg/tb-hash.h | 12 ++++++------ |
9 | tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++ | 11 | accel/tcg/tb-jmp-cache.h | 2 +- |
10 | 2 files changed, 24 insertions(+), 1 deletion(-) | 12 | accel/tcg/cputlb.c | 2 +- |
13 | 3 files changed, 8 insertions(+), 8 deletions(-) | ||
11 | 14 | ||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 15 | diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h |
13 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/ppc/tcg-target.h | 17 | --- a/accel/tcg/tb-hash.h |
15 | +++ b/tcg/ppc/tcg-target.h | 18 | +++ b/accel/tcg/tb-hash.h |
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | 19 | @@ -XXX,XX +XXX,XX @@ |
17 | #define TCG_TARGET_HAS_andc_vec 1 | 20 | #define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1) |
18 | #define TCG_TARGET_HAS_orc_vec have_isa_2_07 | 21 | #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE) |
19 | #define TCG_TARGET_HAS_not_vec 1 | 22 | |
20 | -#define TCG_TARGET_HAS_neg_vec 0 | 23 | -static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc) |
21 | +#define TCG_TARGET_HAS_neg_vec have_isa_3_00 | 24 | +static inline unsigned int tb_jmp_cache_hash_page(vaddr pc) |
22 | #define TCG_TARGET_HAS_abs_vec 0 | 25 | { |
23 | #define TCG_TARGET_HAS_shi_vec 0 | 26 | - target_ulong tmp; |
24 | #define TCG_TARGET_HAS_shs_vec 0 | 27 | + vaddr tmp; |
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 28 | tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); |
29 | return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK; | ||
30 | } | ||
31 | |||
32 | -static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | ||
33 | +static inline unsigned int tb_jmp_cache_hash_func(vaddr pc) | ||
34 | { | ||
35 | - target_ulong tmp; | ||
36 | + vaddr tmp; | ||
37 | tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); | ||
38 | return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK) | ||
39 | | (tmp & TB_JMP_ADDR_MASK)); | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | ||
41 | #else | ||
42 | |||
43 | /* In user-mode we can get better hashing because we do not have a TLB */ | ||
44 | -static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | ||
45 | +static inline unsigned int tb_jmp_cache_hash_func(vaddr pc) | ||
46 | { | ||
47 | return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1); | ||
48 | } | ||
49 | @@ -XXX,XX +XXX,XX @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | ||
50 | #endif /* CONFIG_SOFTMMU */ | ||
51 | |||
52 | static inline | ||
53 | -uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, | ||
54 | +uint32_t tb_hash_func(tb_page_addr_t phys_pc, vaddr pc, | ||
55 | uint32_t flags, uint64_t flags2, uint32_t cf_mask) | ||
56 | { | ||
57 | return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask); | ||
58 | diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | 59 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/tcg/ppc/tcg-target.inc.c | 60 | --- a/accel/tcg/tb-jmp-cache.h |
28 | +++ b/tcg/ppc/tcg-target.inc.c | 61 | +++ b/accel/tcg/tb-jmp-cache.h |
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 62 | @@ -XXX,XX +XXX,XX @@ struct CPUJumpCache { |
30 | #define VSUBUWM VX4(1152) | 63 | struct rcu_head rcu; |
31 | #define VSUBUDM VX4(1216) /* v2.07 */ | 64 | struct { |
32 | 65 | TranslationBlock *tb; | |
33 | +#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ | 66 | - target_ulong pc; |
34 | +#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ | 67 | + vaddr pc; |
35 | + | 68 | } array[TB_JMP_CACHE_SIZE]; |
36 | #define VMAXSB VX4(258) | 69 | }; |
37 | #define VMAXSH VX4(322) | 70 | |
38 | #define VMAXSW VX4(386) | 71 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
39 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 72 | index XXXXXXX..XXXXXXX 100644 |
40 | #define VCMPGTUH VX4(582) | 73 | --- a/accel/tcg/cputlb.c |
41 | #define VCMPGTUW VX4(646) | 74 | +++ b/accel/tcg/cputlb.c |
42 | #define VCMPGTUD VX4(711) /* v2.07 */ | 75 | @@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, |
43 | +#define VCMPNEB VX4(7) /* v3.00 */ | 76 | desc->window_max_entries = max_entries; |
44 | +#define VCMPNEH VX4(71) /* v3.00 */ | 77 | } |
45 | +#define VCMPNEW VX4(135) /* v3.00 */ | 78 | |
46 | 79 | -static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) | |
47 | #define VSLB VX4(260) | 80 | +static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr) |
48 | #define VSLH VX4(324) | 81 | { |
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 82 | CPUJumpCache *jc = cpu->tb_jmp_cache; |
50 | case INDEX_op_shri_vec: | 83 | int i, i0; |
51 | case INDEX_op_sari_vec: | ||
52 | return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
53 | + case INDEX_op_neg_vec: | ||
54 | + return vece >= MO_32 && have_isa_3_00; | ||
55 | case INDEX_op_mul_vec: | ||
56 | switch (vece) { | ||
57 | case MO_8: | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
59 | static const uint32_t | ||
60 | add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
61 | sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
62 | + neg_op[4] = { 0, 0, VNEGW, VNEGD }, | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
64 | + ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, | ||
65 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
66 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
67 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
69 | case INDEX_op_sub_vec: | ||
70 | insn = sub_op[vece]; | ||
71 | break; | ||
72 | + case INDEX_op_neg_vec: | ||
73 | + insn = neg_op[vece]; | ||
74 | + a2 = a1; | ||
75 | + a1 = 0; | ||
76 | + break; | ||
77 | case INDEX_op_mul_vec: | ||
78 | tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
79 | insn = VMULUWM; | ||
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
81 | case TCG_COND_EQ: | ||
82 | insn = eq_op[vece]; | ||
83 | break; | ||
84 | + case TCG_COND_NE: | ||
85 | + insn = ne_op[vece]; | ||
86 | + break; | ||
87 | case TCG_COND_GT: | ||
88 | insn = gts_op[vece]; | ||
89 | break; | ||
90 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
91 | case TCG_COND_GTU: | ||
92 | break; | ||
93 | case TCG_COND_NE: | ||
94 | + if (have_isa_3_00 && vece <= MO_32) { | ||
95 | + break; | ||
96 | + } | ||
97 | + /* fall through */ | ||
98 | case TCG_COND_LE: | ||
99 | case TCG_COND_LEU: | ||
100 | need_inv = true; | ||
101 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
102 | case INDEX_op_dup2_vec: | ||
103 | return &v_v_v; | ||
104 | case INDEX_op_not_vec: | ||
105 | + case INDEX_op_neg_vec: | ||
106 | case INDEX_op_dup_vec: | ||
107 | return &v_v; | ||
108 | case INDEX_op_ld_vec: | ||
109 | -- | 84 | -- |
110 | 2.17.1 | 85 | 2.34.1 |
111 | |||
112 | diff view generated by jsdifflib |
1 | For Altivec, this is always an expansion. | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | 2 | ||
3 | Functions for probing memory accesses (and functions that call these) | ||
4 | are updated to take a vaddr for guest virtual addresses over | ||
5 | target_ulong. | ||
6 | |||
7 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230621135633.1649-9-anjo@rev.ng> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | 11 | --- |
6 | tcg/ppc/tcg-target.h | 2 +- | 12 | include/exec/exec-all.h | 14 +++++++------- |
7 | tcg/ppc/tcg-target.opc.h | 8 +++ | 13 | accel/stubs/tcg-stub.c | 4 ++-- |
8 | tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++- | 14 | accel/tcg/cputlb.c | 12 ++++++------ |
9 | 3 files changed, 121 insertions(+), 2 deletions(-) | 15 | accel/tcg/user-exec.c | 8 ++++---- |
16 | 4 files changed, 19 insertions(+), 19 deletions(-) | ||
10 | 17 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 18 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h |
12 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 20 | --- a/include/exec/exec-all.h |
14 | +++ b/tcg/ppc/tcg-target.h | 21 | +++ b/include/exec/exec-all.h |
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 22 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, |
16 | #define TCG_TARGET_HAS_shs_vec 0 | 23 | * Finally, return the host address for a page that is backed by RAM, |
17 | #define TCG_TARGET_HAS_shv_vec 1 | 24 | * or NULL if the page requires I/O. |
18 | #define TCG_TARGET_HAS_cmp_vec 1 | 25 | */ |
19 | -#define TCG_TARGET_HAS_mul_vec 0 | 26 | -void *probe_access(CPUArchState *env, target_ulong addr, int size, |
20 | +#define TCG_TARGET_HAS_mul_vec 1 | 27 | +void *probe_access(CPUArchState *env, vaddr addr, int size, |
21 | #define TCG_TARGET_HAS_sat_vec 1 | 28 | MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); |
22 | #define TCG_TARGET_HAS_minmax_vec 1 | 29 | |
23 | #define TCG_TARGET_HAS_bitsel_vec 0 | 30 | -static inline void *probe_write(CPUArchState *env, target_ulong addr, int size, |
24 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | 31 | +static inline void *probe_write(CPUArchState *env, vaddr addr, int size, |
32 | int mmu_idx, uintptr_t retaddr) | ||
33 | { | ||
34 | return probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); | ||
35 | } | ||
36 | |||
37 | -static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, | ||
38 | +static inline void *probe_read(CPUArchState *env, vaddr addr, int size, | ||
39 | int mmu_idx, uintptr_t retaddr) | ||
40 | { | ||
41 | return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); | ||
42 | @@ -XXX,XX +XXX,XX @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, | ||
43 | * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. | ||
44 | * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. | ||
45 | */ | ||
46 | -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, | ||
47 | +int probe_access_flags(CPUArchState *env, vaddr addr, int size, | ||
48 | MMUAccessType access_type, int mmu_idx, | ||
49 | bool nonfault, void **phost, uintptr_t retaddr); | ||
50 | |||
51 | @@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, | ||
52 | * and must be consumed or copied immediately, before any further | ||
53 | * access or changes to TLB @mmu_idx. | ||
54 | */ | ||
55 | -int probe_access_full(CPUArchState *env, target_ulong addr, int size, | ||
56 | +int probe_access_full(CPUArchState *env, vaddr addr, int size, | ||
57 | MMUAccessType access_type, int mmu_idx, | ||
58 | bool nonfault, void **phost, | ||
59 | CPUTLBEntryFull **pfull, uintptr_t retaddr); | ||
60 | @@ -XXX,XX +XXX,XX @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu, | ||
61 | * | ||
62 | * Note: this function can trigger an exception. | ||
63 | */ | ||
64 | -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
65 | +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, | ||
66 | void **hostp); | ||
67 | |||
68 | /** | ||
69 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
70 | * Note: this function can trigger an exception. | ||
71 | */ | ||
72 | static inline tb_page_addr_t get_page_addr_code(CPUArchState *env, | ||
73 | - target_ulong addr) | ||
74 | + vaddr addr) | ||
75 | { | ||
76 | return get_page_addr_code_hostp(env, addr, NULL); | ||
77 | } | ||
78 | diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | 79 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/tcg/ppc/tcg-target.opc.h | 80 | --- a/accel/stubs/tcg-stub.c |
27 | +++ b/tcg/ppc/tcg-target.opc.h | 81 | +++ b/accel/stubs/tcg-stub.c |
28 | @@ -XXX,XX +XXX,XX @@ | 82 | @@ -XXX,XX +XXX,XX @@ void tcg_flush_jmp_cache(CPUState *cpu) |
29 | * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | 83 | { |
30 | * consider these to be UNSPEC with names. | 84 | } |
31 | */ | 85 | |
32 | + | 86 | -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, |
33 | +DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC) | 87 | +int probe_access_flags(CPUArchState *env, vaddr addr, int size, |
34 | +DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC) | 88 | MMUAccessType access_type, int mmu_idx, |
35 | +DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) | 89 | bool nonfault, void **phost, uintptr_t retaddr) |
36 | +DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) | 90 | { |
37 | +DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) | 91 | g_assert_not_reached(); |
38 | +DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) | 92 | } |
39 | +DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) | 93 | |
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 94 | -void *probe_access(CPUArchState *env, target_ulong addr, int size, |
95 | +void *probe_access(CPUArchState *env, vaddr addr, int size, | ||
96 | MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) | ||
97 | { | ||
98 | /* Handled by hardware accelerator. */ | ||
99 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | 100 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/tcg/ppc/tcg-target.inc.c | 101 | --- a/accel/tcg/cputlb.c |
43 | +++ b/tcg/ppc/tcg-target.inc.c | 102 | +++ b/accel/tcg/cputlb.c |
44 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 103 | @@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, |
45 | #define VSRAB VX4(772) | ||
46 | #define VSRAH VX4(836) | ||
47 | #define VSRAW VX4(900) | ||
48 | +#define VRLB VX4(4) | ||
49 | +#define VRLH VX4(68) | ||
50 | +#define VRLW VX4(132) | ||
51 | + | ||
52 | +#define VMULEUB VX4(520) | ||
53 | +#define VMULEUH VX4(584) | ||
54 | +#define VMULOUB VX4(8) | ||
55 | +#define VMULOUH VX4(72) | ||
56 | +#define VMSUMUHM VX4(38) | ||
57 | + | ||
58 | +#define VMRGHB VX4(12) | ||
59 | +#define VMRGHH VX4(76) | ||
60 | +#define VMRGHW VX4(140) | ||
61 | +#define VMRGLB VX4(268) | ||
62 | +#define VMRGLH VX4(332) | ||
63 | +#define VMRGLW VX4(396) | ||
64 | + | ||
65 | +#define VPKUHUM VX4(14) | ||
66 | +#define VPKUWUM VX4(78) | ||
67 | |||
68 | #define VAND VX4(1028) | ||
69 | #define VANDC VX4(1092) | ||
70 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
71 | case INDEX_op_sarv_vec: | ||
72 | return vece <= MO_32; | ||
73 | case INDEX_op_cmp_vec: | ||
74 | + case INDEX_op_mul_vec: | ||
75 | case INDEX_op_shli_vec: | ||
76 | case INDEX_op_shri_vec: | ||
77 | case INDEX_op_sari_vec: | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
79 | smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
80 | shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
81 | shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
82 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
83 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
84 | + mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
85 | + mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
86 | + muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
87 | + mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
88 | + pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
89 | + rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
90 | |||
91 | TCGType type = vecl + TCG_TYPE_V64; | ||
92 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
94 | } | ||
95 | break; | ||
96 | |||
97 | + case INDEX_op_ppc_mrgh_vec: | ||
98 | + insn = mrgh_op[vece]; | ||
99 | + break; | ||
100 | + case INDEX_op_ppc_mrgl_vec: | ||
101 | + insn = mrgl_op[vece]; | ||
102 | + break; | ||
103 | + case INDEX_op_ppc_muleu_vec: | ||
104 | + insn = muleu_op[vece]; | ||
105 | + break; | ||
106 | + case INDEX_op_ppc_mulou_vec: | ||
107 | + insn = mulou_op[vece]; | ||
108 | + break; | ||
109 | + case INDEX_op_ppc_pkum_vec: | ||
110 | + insn = pkum_op[vece]; | ||
111 | + break; | ||
112 | + case INDEX_op_ppc_rotl_vec: | ||
113 | + insn = rotl_op[vece]; | ||
114 | + break; | ||
115 | + case INDEX_op_ppc_msum_vec: | ||
116 | + tcg_debug_assert(vece == MO_16); | ||
117 | + tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); | ||
118 | + return; | ||
119 | + | ||
120 | case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
121 | case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
122 | case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
123 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
124 | } | 104 | } |
125 | } | 105 | } |
126 | 106 | ||
127 | +static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | 107 | -static int probe_access_internal(CPUArchState *env, target_ulong addr, |
128 | + TCGv_vec v1, TCGv_vec v2) | 108 | +static int probe_access_internal(CPUArchState *env, vaddr addr, |
129 | +{ | 109 | int fault_size, MMUAccessType access_type, |
130 | + TCGv_vec t1 = tcg_temp_new_vec(type); | 110 | int mmu_idx, bool nonfault, |
131 | + TCGv_vec t2 = tcg_temp_new_vec(type); | 111 | void **phost, CPUTLBEntryFull **pfull, |
132 | + TCGv_vec t3, t4; | 112 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, |
133 | + | 113 | uintptr_t index = tlb_index(env, mmu_idx, addr); |
134 | + switch (vece) { | 114 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); |
135 | + case MO_8: | 115 | uint64_t tlb_addr = tlb_read_idx(entry, access_type); |
136 | + case MO_16: | 116 | - target_ulong page_addr = addr & TARGET_PAGE_MASK; |
137 | + vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), | 117 | + vaddr page_addr = addr & TARGET_PAGE_MASK; |
138 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 118 | int flags = TLB_FLAGS_MASK; |
139 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), | 119 | |
140 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 120 | if (!tlb_hit_page(tlb_addr, page_addr)) { |
141 | + vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), | 121 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, |
142 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | 122 | return flags; |
143 | + vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), | 123 | } |
144 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | 124 | |
145 | + vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), | 125 | -int probe_access_full(CPUArchState *env, target_ulong addr, int size, |
146 | + tcgv_vec_arg(v0), tcgv_vec_arg(t1)); | 126 | +int probe_access_full(CPUArchState *env, vaddr addr, int size, |
147 | + break; | 127 | MMUAccessType access_type, int mmu_idx, |
148 | + | 128 | bool nonfault, void **phost, CPUTLBEntryFull **pfull, |
149 | + case MO_32: | 129 | uintptr_t retaddr) |
150 | + t3 = tcg_temp_new_vec(type); | 130 | @@ -XXX,XX +XXX,XX @@ int probe_access_full(CPUArchState *env, target_ulong addr, int size, |
151 | + t4 = tcg_temp_new_vec(type); | 131 | return flags; |
152 | + tcg_gen_dupi_vec(MO_8, t4, -16); | 132 | } |
153 | + vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1), | 133 | |
154 | + tcgv_vec_arg(v2), tcgv_vec_arg(t4)); | 134 | -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, |
155 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), | 135 | +int probe_access_flags(CPUArchState *env, vaddr addr, int size, |
156 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 136 | MMUAccessType access_type, int mmu_idx, |
157 | + tcg_gen_dupi_vec(MO_8, t3, 0); | 137 | bool nonfault, void **phost, uintptr_t retaddr) |
158 | + vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3), | ||
159 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); | ||
160 | + vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3), | ||
161 | + tcgv_vec_arg(t3), tcgv_vec_arg(t4)); | ||
162 | + tcg_gen_add_vec(MO_32, v0, t2, t3); | ||
163 | + tcg_temp_free_vec(t3); | ||
164 | + tcg_temp_free_vec(t4); | ||
165 | + break; | ||
166 | + | ||
167 | + default: | ||
168 | + g_assert_not_reached(); | ||
169 | + } | ||
170 | + tcg_temp_free_vec(t1); | ||
171 | + tcg_temp_free_vec(t2); | ||
172 | +} | ||
173 | + | ||
174 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
175 | TCGArg a0, ...) | ||
176 | { | 138 | { |
177 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | 139 | @@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, |
178 | v2 = temp_tcgv_vec(arg_temp(a2)); | 140 | return flags; |
179 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | 141 | } |
180 | break; | 142 | |
181 | + case INDEX_op_mul_vec: | 143 | -void *probe_access(CPUArchState *env, target_ulong addr, int size, |
182 | + v2 = temp_tcgv_vec(arg_temp(a2)); | 144 | +void *probe_access(CPUArchState *env, vaddr addr, int size, |
183 | + expand_vec_mul(type, vece, v0, v1, v2); | 145 | MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) |
184 | + break; | 146 | { |
185 | default: | 147 | CPUTLBEntryFull *full; |
186 | g_assert_not_reached(); | 148 | @@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, |
187 | } | 149 | * NOTE: This function will trigger an exception if the page is |
188 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 150 | * not executable. |
189 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | 151 | */ |
190 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | 152 | -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, |
191 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | 153 | +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, |
192 | + static const TCGTargetOpDef v_v_v_v | 154 | void **hostp) |
193 | + = { .args_ct_str = { "v", "v", "v", "v" } }; | 155 | { |
194 | 156 | CPUTLBEntryFull *full; | |
195 | switch (op) { | 157 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c |
196 | case INDEX_op_goto_ptr: | 158 | index XXXXXXX..XXXXXXX 100644 |
197 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 159 | --- a/accel/tcg/user-exec.c |
198 | 160 | +++ b/accel/tcg/user-exec.c | |
199 | case INDEX_op_add_vec: | 161 | @@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc) |
200 | case INDEX_op_sub_vec: | 162 | return current_tb_invalidated ? 2 : 1; |
201 | + case INDEX_op_mul_vec: | 163 | } |
202 | case INDEX_op_and_vec: | 164 | |
203 | case INDEX_op_or_vec: | 165 | -static int probe_access_internal(CPUArchState *env, target_ulong addr, |
204 | case INDEX_op_xor_vec: | 166 | +static int probe_access_internal(CPUArchState *env, vaddr addr, |
205 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 167 | int fault_size, MMUAccessType access_type, |
206 | case INDEX_op_shlv_vec: | 168 | bool nonfault, uintptr_t ra) |
207 | case INDEX_op_shrv_vec: | 169 | { |
208 | case INDEX_op_sarv_vec: | 170 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, |
209 | + case INDEX_op_ppc_mrgh_vec: | 171 | cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); |
210 | + case INDEX_op_ppc_mrgl_vec: | 172 | } |
211 | + case INDEX_op_ppc_muleu_vec: | 173 | |
212 | + case INDEX_op_ppc_mulou_vec: | 174 | -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, |
213 | + case INDEX_op_ppc_pkum_vec: | 175 | +int probe_access_flags(CPUArchState *env, vaddr addr, int size, |
214 | + case INDEX_op_ppc_rotl_vec: | 176 | MMUAccessType access_type, int mmu_idx, |
215 | return &v_v_v; | 177 | bool nonfault, void **phost, uintptr_t ra) |
216 | case INDEX_op_not_vec: | 178 | { |
217 | case INDEX_op_dup_vec: | 179 | @@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, |
218 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 180 | return flags; |
219 | case INDEX_op_st_vec: | 181 | } |
220 | case INDEX_op_dupm_vec: | 182 | |
221 | return &v_r; | 183 | -void *probe_access(CPUArchState *env, target_ulong addr, int size, |
222 | + case INDEX_op_ppc_msum_vec: | 184 | +void *probe_access(CPUArchState *env, vaddr addr, int size, |
223 | + return &v_v_v_v; | 185 | MMUAccessType access_type, int mmu_idx, uintptr_t ra) |
224 | 186 | { | |
225 | default: | 187 | int flags; |
226 | return NULL; | 188 | @@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, |
189 | return size ? g2h(env_cpu(env), addr) : NULL; | ||
190 | } | ||
191 | |||
192 | -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
193 | +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, | ||
194 | void **hostp) | ||
195 | { | ||
196 | int flags; | ||
227 | -- | 197 | -- |
228 | 2.17.1 | 198 | 2.34.1 |
229 | |||
230 | diff view generated by jsdifflib |
1 | These new instructions are conditional on MSR.FP when TX=0 and | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | MSR.VEC when TX=1. Since we only care about the Altivec registers, | ||
3 | and force TX=1, we can consider these to be Altivec instructions. | ||
4 | Since Altivec is true for any use of vector types, we only need | ||
5 | test have_isa_2_07. | ||
6 | 2 | ||
7 | This includes moves to and from the integer registers. | 3 | Update atomic_mmu_lookup() and cpu_mmu_lookup() to take the guest |
4 | virtual address as a vaddr instead of a target_ulong. | ||
8 | 5 | ||
9 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 6 | Signed-off-by: Anton Johansson <anjo@rev.ng> |
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230621135633.1649-10-anjo@rev.ng> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 10 | --- |
12 | tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------ | 11 | accel/tcg/cputlb.c | 6 +++--- |
13 | 1 file changed, 26 insertions(+), 6 deletions(-) | 12 | accel/tcg/user-exec.c | 6 +++--- |
13 | 2 files changed, 6 insertions(+), 6 deletions(-) | ||
14 | 14 | ||
15 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 15 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/ppc/tcg-target.inc.c | 17 | --- a/accel/tcg/cputlb.c |
18 | +++ b/tcg/ppc/tcg-target.inc.c | 18 | +++ b/accel/tcg/cputlb.c |
19 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 19 | @@ -XXX,XX +XXX,XX @@ static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
20 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | 20 | * Probe for an atomic operation. Do not allow unaligned operations, |
21 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | 21 | * or io operations to proceed. Return the host address. |
22 | 22 | */ | |
23 | +#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | 23 | -static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, |
24 | +#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | 24 | - MemOpIdx oi, int size, uintptr_t retaddr) |
25 | +#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | 25 | +static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
26 | +#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | 26 | + int size, uintptr_t retaddr) |
27 | + | 27 | { |
28 | #define RT(r) ((r)<<21) | 28 | uintptr_t mmu_idx = get_mmuidx(oi); |
29 | #define RS(r) ((r)<<21) | 29 | MemOp mop = get_memop(oi); |
30 | #define RA(r) ((r)<<16) | 30 | int a_bits = get_alignment_bits(mop); |
31 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | 31 | uintptr_t index; |
32 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | 32 | CPUTLBEntry *tlbe; |
33 | /* fallthru */ | 33 | - target_ulong tlb_addr; |
34 | case TCG_TYPE_I32: | 34 | + vaddr tlb_addr; |
35 | - if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | 35 | void *hostaddr; |
36 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | 36 | CPUTLBEntryFull *full; |
37 | - break; | 37 | |
38 | - } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | 38 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c |
39 | - /* Altivec does not support vector/integer moves. */ | 39 | index XXXXXXX..XXXXXXX 100644 |
40 | - return false; | 40 | --- a/accel/tcg/user-exec.c |
41 | + if (ret < TCG_REG_V0) { | 41 | +++ b/accel/tcg/user-exec.c |
42 | + if (arg < TCG_REG_V0) { | 42 | @@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { } |
43 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | 43 | |
44 | + break; | 44 | /* The softmmu versions of these helpers are in cputlb.c. */ |
45 | + } else if (have_isa_2_07) { | 45 | |
46 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) | 46 | -static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr, |
47 | + | VRT(arg) | RA(ret)); | 47 | +static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr, |
48 | + break; | 48 | MemOp mop, uintptr_t ra, MMUAccessType type) |
49 | + } else { | 49 | { |
50 | + /* Altivec does not support vector->integer moves. */ | 50 | int a_bits = get_alignment_bits(mop); |
51 | + return false; | 51 | @@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, |
52 | + } | 52 | /* |
53 | + } else if (arg < TCG_REG_V0) { | 53 | * Do not allow unaligned operations to proceed. Return the host address. |
54 | + if (have_isa_2_07) { | 54 | */ |
55 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) | 55 | -static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, |
56 | + | VRT(ret) | RA(arg)); | 56 | - MemOpIdx oi, int size, uintptr_t retaddr) |
57 | + break; | 57 | +static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
58 | + } else { | 58 | + int size, uintptr_t retaddr) |
59 | + /* Altivec does not support integer->vector moves. */ | 59 | { |
60 | + return false; | 60 | MemOp mop = get_memop(oi); |
61 | + } | 61 | int a_bits = get_alignment_bits(mop); |
62 | } | ||
63 | /* fallthru */ | ||
64 | case TCG_TYPE_V64: | ||
65 | -- | 62 | -- |
66 | 2.17.1 | 63 | 2.34.1 |
67 | |||
68 | diff view generated by jsdifflib |
1 | Introduce all of the flags required to enable tcg backend vector support, | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | and a runtime flag to indicate the host supports Altivec instructions. | ||
3 | 2 | ||
4 | For now, do not actually set have_isa_altivec to true, because we have not | 3 | Use vaddr for guest virtual address in translator_use_goto_tb() and |
5 | yet added all of the code to actually generate all of the required insns. | 4 | translator_loop(). |
6 | However, we must define these flags in order to disable ifndefs that create | ||
7 | stub versions of the functions added here. | ||
8 | 5 | ||
9 | The change to tcg_out_movi works around a buglet in tcg.c wherein if we | 6 | Signed-off-by: Anton Johansson <anjo@rev.ng> |
10 | do not define tcg_out_dupi_vec we get a declared but not defined Werror, | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | but if we only declare it we get a defined but not used Werror. We need | 8 | Message-Id: <20230621135633.1649-11-anjo@rev.ng> |
12 | to this change to tcg_out_movi eventually anyway, so it's no biggie. | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | ||
11 | include/exec/translator.h | 6 +++--- | ||
12 | accel/tcg/translator.c | 10 +++++----- | ||
13 | 2 files changed, 8 insertions(+), 8 deletions(-) | ||
13 | 14 | ||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | diff --git a/include/exec/translator.h b/include/exec/translator.h |
15 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
16 | --- | ||
17 | tcg/ppc/tcg-target.h | 25 ++++++++++++++++ | ||
18 | tcg/ppc/tcg-target.opc.h | 5 ++++ | ||
19 | tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++-- | ||
20 | 3 files changed, 89 insertions(+), 3 deletions(-) | ||
21 | create mode 100644 tcg/ppc/tcg-target.opc.h | ||
22 | |||
23 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/tcg/ppc/tcg-target.h | 17 | --- a/include/exec/translator.h |
26 | +++ b/tcg/ppc/tcg-target.h | 18 | +++ b/include/exec/translator.h |
27 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 19 | @@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps { |
28 | } TCGPowerISA; | 20 | * - When too many instructions have been translated. |
29 | 21 | */ | |
30 | extern TCGPowerISA have_isa; | 22 | void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, |
31 | +extern bool have_altivec; | 23 | - target_ulong pc, void *host_pc, |
32 | 24 | - const TranslatorOps *ops, DisasContextBase *db); | |
33 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | 25 | + vaddr pc, void *host_pc, const TranslatorOps *ops, |
34 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | 26 | + DisasContextBase *db); |
35 | @@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa; | 27 | |
36 | #define TCG_TARGET_HAS_mulsh_i64 1 | 28 | /** |
37 | #endif | 29 | * translator_use_goto_tb |
38 | 30 | @@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, | |
39 | +/* | 31 | * Return true if goto_tb is allowed between the current TB |
40 | + * While technically Altivec could support V64, it has no 64-bit store | 32 | * and the destination PC. |
41 | + * instruction and substituting two 32-bit stores makes the generated | 33 | */ |
42 | + * code quite large. | 34 | -bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest); |
43 | + */ | 35 | +bool translator_use_goto_tb(DisasContextBase *db, vaddr dest); |
44 | +#define TCG_TARGET_HAS_v64 0 | 36 | |
45 | +#define TCG_TARGET_HAS_v128 have_altivec | 37 | /** |
46 | +#define TCG_TARGET_HAS_v256 0 | 38 | * translator_io_start |
47 | + | 39 | diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c |
48 | +#define TCG_TARGET_HAS_andc_vec 0 | ||
49 | +#define TCG_TARGET_HAS_orc_vec 0 | ||
50 | +#define TCG_TARGET_HAS_not_vec 0 | ||
51 | +#define TCG_TARGET_HAS_neg_vec 0 | ||
52 | +#define TCG_TARGET_HAS_abs_vec 0 | ||
53 | +#define TCG_TARGET_HAS_shi_vec 0 | ||
54 | +#define TCG_TARGET_HAS_shs_vec 0 | ||
55 | +#define TCG_TARGET_HAS_shv_vec 0 | ||
56 | +#define TCG_TARGET_HAS_cmp_vec 0 | ||
57 | +#define TCG_TARGET_HAS_mul_vec 0 | ||
58 | +#define TCG_TARGET_HAS_sat_vec 0 | ||
59 | +#define TCG_TARGET_HAS_minmax_vec 0 | ||
60 | +#define TCG_TARGET_HAS_bitsel_vec 0 | ||
61 | +#define TCG_TARGET_HAS_cmpsel_vec 0 | ||
62 | + | ||
63 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
64 | void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); | ||
65 | |||
66 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | ||
67 | new file mode 100644 | ||
68 | index XXXXXXX..XXXXXXX | ||
69 | --- /dev/null | ||
70 | +++ b/tcg/ppc/tcg-target.opc.h | ||
71 | @@ -XXX,XX +XXX,XX @@ | ||
72 | +/* | ||
73 | + * Target-specific opcodes for host vector expansion. These will be | ||
74 | + * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | ||
75 | + * consider these to be UNSPEC with names. | ||
76 | + */ | ||
77 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
79 | --- a/tcg/ppc/tcg-target.inc.c | 41 | --- a/accel/tcg/translator.c |
80 | +++ b/tcg/ppc/tcg-target.inc.c | 42 | +++ b/accel/tcg/translator.c |
81 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 43 | @@ -XXX,XX +XXX,XX @@ static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags, |
82 | |||
83 | TCGPowerISA have_isa; | ||
84 | static bool have_isel; | ||
85 | +bool have_altivec; | ||
86 | |||
87 | #ifndef CONFIG_SOFTMMU | ||
88 | #define TCG_GUEST_BASE_REG 30 | ||
89 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | ||
90 | } | 44 | } |
91 | } | 45 | } |
92 | 46 | ||
93 | -static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | 47 | -bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) |
94 | - tcg_target_long arg) | 48 | +bool translator_use_goto_tb(DisasContextBase *db, vaddr dest) |
95 | +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
96 | + tcg_target_long val) | ||
97 | { | 49 | { |
98 | - tcg_out_movi_int(s, type, ret, arg, false); | 50 | /* Suppress goto_tb if requested. */ |
99 | + g_assert_not_reached(); | 51 | if (tb_cflags(db->tb) & CF_NO_GOTO_TB) { |
100 | +} | 52 | @@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) |
101 | + | ||
102 | +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
103 | + tcg_target_long arg) | ||
104 | +{ | ||
105 | + switch (type) { | ||
106 | + case TCG_TYPE_I32: | ||
107 | + case TCG_TYPE_I64: | ||
108 | + tcg_debug_assert(ret < TCG_REG_V0); | ||
109 | + tcg_out_movi_int(s, type, ret, arg, false); | ||
110 | + break; | ||
111 | + | ||
112 | + case TCG_TYPE_V64: | ||
113 | + case TCG_TYPE_V128: | ||
114 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
115 | + tcg_out_dupi_vec(s, type, ret, arg); | ||
116 | + break; | ||
117 | + | ||
118 | + default: | ||
119 | + g_assert_not_reached(); | ||
120 | + } | ||
121 | } | 53 | } |
122 | 54 | ||
123 | static bool mask_operand(uint32_t c, int *mb, int *me) | 55 | void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, |
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | 56 | - target_ulong pc, void *host_pc, |
125 | } | 57 | - const TranslatorOps *ops, DisasContextBase *db) |
58 | + vaddr pc, void *host_pc, const TranslatorOps *ops, | ||
59 | + DisasContextBase *db) | ||
60 | { | ||
61 | uint32_t cflags = tb_cflags(tb); | ||
62 | TCGOp *icount_start_insn; | ||
63 | @@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, | ||
126 | } | 64 | } |
127 | 65 | ||
128 | +int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 66 | static void *translator_access(CPUArchState *env, DisasContextBase *db, |
129 | +{ | 67 | - target_ulong pc, size_t len) |
130 | + g_assert_not_reached(); | 68 | + vaddr pc, size_t len) |
131 | +} | ||
132 | + | ||
133 | +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
134 | + TCGReg dst, TCGReg src) | ||
135 | +{ | ||
136 | + g_assert_not_reached(); | ||
137 | +} | ||
138 | + | ||
139 | +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
140 | + TCGReg out, TCGReg base, intptr_t offset) | ||
141 | +{ | ||
142 | + g_assert_not_reached(); | ||
143 | +} | ||
144 | + | ||
145 | +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
146 | + unsigned vecl, unsigned vece, | ||
147 | + const TCGArg *args, const int *const_args) | ||
148 | +{ | ||
149 | + g_assert_not_reached(); | ||
150 | +} | ||
151 | + | ||
152 | +void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
153 | + TCGArg a0, ...) | ||
154 | +{ | ||
155 | + g_assert_not_reached(); | ||
156 | +} | ||
157 | + | ||
158 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
159 | { | 69 | { |
160 | static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; | 70 | void *host; |
161 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 71 | - target_ulong base, end; |
162 | 72 | + vaddr base, end; | |
163 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | 73 | TranslationBlock *tb; |
164 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | 74 | |
165 | + if (have_altivec) { | 75 | tb = db->tb; |
166 | + tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; | ||
167 | + tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; | ||
168 | + } | ||
169 | |||
170 | tcg_target_call_clobber_regs = 0; | ||
171 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); | ||
172 | -- | 76 | -- |
173 | 2.17.1 | 77 | 2.34.1 |
174 | |||
175 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VSX and | 1 | From: Anton Johansson <anjo@rev.ng> |
---|---|---|---|
2 | are thus part of the VSX instruction set, and not Altivec. | ||
3 | This includes double-word loads and stores. | ||
4 | 2 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | Signed-off-by: Anton Johansson <anjo@rev.ng> |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230621135633.1649-13-anjo@rev.ng> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 7 | --- |
8 | tcg/ppc/tcg-target.inc.c | 11 +++++++++++ | 8 | include/exec/exec-all.h | 2 +- |
9 | 1 file changed, 11 insertions(+) | 9 | cpu.c | 2 +- |
10 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
10 | 11 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 12 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h |
12 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 14 | --- a/include/exec/exec-all.h |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 15 | +++ b/include/exec/exec-all.h |
15 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 16 | @@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu); |
16 | #define LVEWX XO31(71) | 17 | |
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 18 | /* TranslationBlock invalidate API */ |
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 19 | #if defined(CONFIG_USER_ONLY) |
19 | +#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | 20 | -void tb_invalidate_phys_addr(target_ulong addr); |
20 | 21 | +void tb_invalidate_phys_addr(hwaddr addr); | |
21 | #define STVX XO31(231) | 22 | #else |
22 | #define STVEWX XO31(199) | 23 | void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs); |
23 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 24 | #endif |
24 | +#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | 25 | diff --git a/cpu.c b/cpu.c |
25 | 26 | index XXXXXXX..XXXXXXX 100644 | |
26 | #define VADDSBS VX4(768) | 27 | --- a/cpu.c |
27 | #define VADDUBS VX4(512) | 28 | +++ b/cpu.c |
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 29 | @@ -XXX,XX +XXX,XX @@ void list_cpus(void) |
29 | tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | 30 | } |
30 | break; | 31 | |
31 | } | 32 | #if defined(CONFIG_USER_ONLY) |
32 | + if (have_isa_2_07 && have_vsx) { | 33 | -void tb_invalidate_phys_addr(target_ulong addr) |
33 | + tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); | 34 | +void tb_invalidate_phys_addr(hwaddr addr) |
34 | + break; | 35 | { |
35 | + } | 36 | mmap_lock(); |
36 | tcg_debug_assert((offset & 3) == 0); | 37 | tb_invalidate_phys_page(addr); |
37 | tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
38 | shift = (offset - 4) & 0xc; | ||
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
40 | tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
41 | break; | ||
42 | } | ||
43 | + if (have_isa_2_07 && have_vsx) { | ||
44 | + tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); | ||
45 | + break; | ||
46 | + } | ||
47 | + assert((offset & 3) == 0); | ||
48 | tcg_debug_assert((offset & 3) == 0); | ||
49 | shift = (offset - 4) & 0xc; | ||
50 | if (shift) { | ||
51 | -- | 38 | -- |
52 | 2.17.1 | 39 | 2.34.1 |
53 | |||
54 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | From: Alex Bennée <alex.bennee@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | qemu_cpu_kick is used for a number of reasons including to indicate | 3 | Balton discovered that asserts for the extract/deposit calls had a |
4 | there is work to be done. However when thread=single the old | 4 | significant impact on a lame benchmark on qemu-ppc. Replicating with: |
5 | qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one | ||
6 | which can lead to a hang in the case that: | ||
7 | 5 | ||
8 | a) the kick is from outside the vCPUs (e.g. iothread) | 6 | ./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame \ |
9 | b) the timers are paused (i.e. iothread calling run_on_cpu) | 7 | -h pts-trondheim-3.wav pts-trondheim-3.mp3 |
10 | 8 | ||
11 | To avoid this lets split qemu_cpu_kick_rr into two functions. One for | 9 | showed up the pack/unpack routines not eliding the assert checks as it |
12 | the timer which continues to advance to the next timeslice and another | 10 | should have done causing them to prominently figure in the profile: |
13 | for all other kicks. | ||
14 | 11 | ||
15 | Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org> | 12 | 11.44% qemu-ppc64 qemu-ppc64 [.] unpack_raw64.isra.0 |
16 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 13 | 11.03% qemu-ppc64 qemu-ppc64 [.] parts64_uncanon_normal |
14 | 8.26% qemu-ppc64 qemu-ppc64 [.] helper_compute_fprf_float64 | ||
15 | 6.75% qemu-ppc64 qemu-ppc64 [.] do_float_check_status | ||
16 | 5.34% qemu-ppc64 qemu-ppc64 [.] parts64_muladd | ||
17 | 4.75% qemu-ppc64 qemu-ppc64 [.] pack_raw64.isra.0 | ||
18 | 4.38% qemu-ppc64 qemu-ppc64 [.] parts64_canonicalize | ||
19 | 3.62% qemu-ppc64 qemu-ppc64 [.] float64r32_round_pack_canonical | ||
20 | |||
21 | After this patch the same test runs 31 seconds faster with a profile | ||
22 | where the generated code dominates more: | ||
23 | |||
24 | + 14.12% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619420 | ||
25 | + 13.30% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000616850 | ||
26 | + 12.58% 12.19% qemu-ppc64 qemu-ppc64 [.] parts64_uncanon_normal | ||
27 | + 10.62% 0.00% qemu-ppc64 [unknown] [.] 0x000000400061bf70 | ||
28 | + 9.91% 9.73% qemu-ppc64 qemu-ppc64 [.] helper_compute_fprf_float64 | ||
29 | + 7.84% 7.82% qemu-ppc64 qemu-ppc64 [.] do_float_check_status | ||
30 | + 6.47% 5.78% qemu-ppc64 qemu-ppc64 [.] parts64_canonicalize.constprop.0 | ||
31 | + 6.46% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000620130 | ||
32 | + 6.42% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619400 | ||
33 | + 6.17% 6.04% qemu-ppc64 qemu-ppc64 [.] parts64_muladd | ||
34 | + 5.85% 0.00% qemu-ppc64 [unknown] [.] 0x00000040006167e0 | ||
35 | + 5.74% 0.00% qemu-ppc64 [unknown] [.] 0x0000b693fcffffd3 | ||
36 | + 5.45% 4.78% qemu-ppc64 qemu-ppc64 [.] float64r32_round_pack_canonical | ||
37 | |||
38 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
39 | Message-Id: <ec9cfe5a-d5f2-466d-34dc-c35817e7e010@linaro.org> | ||
40 | [AJB: Patchified rth's suggestion] | ||
41 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
42 | Cc: BALATON Zoltan <balaton@eik.bme.hu> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 43 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
18 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | 44 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
45 | Tested-by: BALATON Zoltan <balaton@eik.bme.hu> | ||
46 | Message-Id: <20230523131107.3680641-1-alex.bennee@linaro.org> | ||
19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 47 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
20 | --- | 48 | --- |
21 | cpus.c | 24 ++++++++++++++++++------ | 49 | fpu/softfloat.c | 22 +++++++++++----------- |
22 | 1 file changed, 18 insertions(+), 6 deletions(-) | 50 | 1 file changed, 11 insertions(+), 11 deletions(-) |
23 | 51 | ||
24 | diff --git a/cpus.c b/cpus.c | 52 | diff --git a/fpu/softfloat.c b/fpu/softfloat.c |
25 | index XXXXXXX..XXXXXXX 100644 | 53 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/cpus.c | 54 | --- a/fpu/softfloat.c |
27 | +++ b/cpus.c | 55 | +++ b/fpu/softfloat.c |
28 | @@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void) | 56 | @@ -XXX,XX +XXX,XX @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) |
29 | return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; | 57 | }; |
30 | } | 58 | } |
31 | 59 | ||
32 | -/* Kick the currently round-robin scheduled vCPU */ | 60 | -static inline void float16_unpack_raw(FloatParts64 *p, float16 f) |
33 | -static void qemu_cpu_kick_rr_cpu(void) | 61 | +static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f) |
34 | +/* Kick the currently round-robin scheduled vCPU to next */ | ||
35 | +static void qemu_cpu_kick_rr_next_cpu(void) | ||
36 | { | 62 | { |
37 | CPUState *cpu; | 63 | unpack_raw64(p, &float16_params, f); |
38 | do { | ||
39 | @@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void) | ||
40 | } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); | ||
41 | } | 64 | } |
42 | 65 | ||
43 | +/* Kick all RR vCPUs */ | 66 | -static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) |
44 | +static void qemu_cpu_kick_rr_cpus(void) | 67 | +static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) |
45 | +{ | ||
46 | + CPUState *cpu; | ||
47 | + | ||
48 | + CPU_FOREACH(cpu) { | ||
49 | + cpu_exit(cpu); | ||
50 | + }; | ||
51 | +} | ||
52 | + | ||
53 | static void do_nothing(CPUState *cpu, run_on_cpu_data unused) | ||
54 | { | 68 | { |
69 | unpack_raw64(p, &bfloat16_params, f); | ||
55 | } | 70 | } |
56 | @@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type) | 71 | |
57 | static void kick_tcg_thread(void *opaque) | 72 | -static inline void float32_unpack_raw(FloatParts64 *p, float32 f) |
73 | +static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f) | ||
58 | { | 74 | { |
59 | timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); | 75 | unpack_raw64(p, &float32_params, f); |
60 | - qemu_cpu_kick_rr_cpu(); | ||
61 | + qemu_cpu_kick_rr_next_cpu(); | ||
62 | } | 76 | } |
63 | 77 | ||
64 | static void start_tcg_kick_timer(void) | 78 | -static inline void float64_unpack_raw(FloatParts64 *p, float64 f) |
65 | @@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu) | 79 | +static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f) |
66 | { | 80 | { |
67 | qemu_cond_broadcast(cpu->halt_cond); | 81 | unpack_raw64(p, &float64_params, f); |
68 | if (tcg_enabled()) { | 82 | } |
69 | - cpu_exit(cpu); | 83 | |
70 | - /* NOP unless doing single-thread RR */ | 84 | -static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f) |
71 | - qemu_cpu_kick_rr_cpu(); | 85 | +static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f) |
72 | + if (qemu_tcg_mttcg_enabled()) { | 86 | { |
73 | + cpu_exit(cpu); | 87 | *p = (FloatParts128) { |
74 | + } else { | 88 | .cls = float_class_unclassified, |
75 | + qemu_cpu_kick_rr_cpus(); | 89 | @@ -XXX,XX +XXX,XX @@ static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f) |
76 | + } | 90 | }; |
77 | } else { | 91 | } |
78 | if (hax_enabled()) { | 92 | |
79 | /* | 93 | -static void float128_unpack_raw(FloatParts128 *p, float128 f) |
94 | +static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f) | ||
95 | { | ||
96 | const int f_size = float128_params.frac_size - 64; | ||
97 | const int e_size = float128_params.exp_size; | ||
98 | @@ -XXX,XX +XXX,XX @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | -static inline float16 float16_pack_raw(const FloatParts64 *p) | ||
103 | +static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p) | ||
104 | { | ||
105 | return make_float16(pack_raw64(p, &float16_params)); | ||
106 | } | ||
107 | |||
108 | -static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p) | ||
109 | +static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p) | ||
110 | { | ||
111 | return pack_raw64(p, &bfloat16_params); | ||
112 | } | ||
113 | |||
114 | -static inline float32 float32_pack_raw(const FloatParts64 *p) | ||
115 | +static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p) | ||
116 | { | ||
117 | return make_float32(pack_raw64(p, &float32_params)); | ||
118 | } | ||
119 | |||
120 | -static inline float64 float64_pack_raw(const FloatParts64 *p) | ||
121 | +static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p) | ||
122 | { | ||
123 | return make_float64(pack_raw64(p, &float64_params)); | ||
124 | } | ||
125 | |||
126 | -static float128 float128_pack_raw(const FloatParts128 *p) | ||
127 | +static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p) | ||
128 | { | ||
129 | const int f_size = float128_params.frac_size - 64; | ||
130 | const int e_size = float128_params.exp_size; | ||
80 | -- | 131 | -- |
81 | 2.17.1 | 132 | 2.34.1 |
82 | 133 | ||
83 | 134 | diff view generated by jsdifflib |
1 | Add support for vector add/subtract using Altivec instructions: | 1 | This is a perfectly natural occurrence for x86 "rep movb", |
---|---|---|---|
2 | VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM. | 2 | where the "rep" prefix forms a counted loop of the one insn. |
3 | |||
4 | During the tests/tcg/multiarch/memory test, this logging is | ||
5 | triggered over 350000 times. Within the context of cross-i386-tci | ||
6 | build, which is already slow by nature, the logging is sufficient | ||
7 | to push the test into timeout. | ||
3 | 8 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 10 | --- |
7 | tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++ | 11 | tests/plugin/insn.c | 9 +-------- |
8 | 1 file changed, 20 insertions(+) | 12 | tests/tcg/i386/Makefile.softmmu-target | 9 --------- |
13 | tests/tcg/i386/Makefile.target | 6 ------ | ||
14 | tests/tcg/x86_64/Makefile.softmmu-target | 9 --------- | ||
15 | 4 files changed, 1 insertion(+), 32 deletions(-) | ||
9 | 16 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 17 | diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c |
11 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 19 | --- a/tests/plugin/insn.c |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 20 | +++ b/tests/plugin/insn.c |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 21 | @@ -XXX,XX +XXX,XX @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; |
15 | #define STVX XO31(231) | 22 | #define MAX_CPUS 8 /* lets not go nuts */ |
16 | #define STVEWX XO31(199) | 23 | |
17 | 24 | typedef struct { | |
18 | +#define VADDUBM VX4(0) | 25 | - uint64_t last_pc; |
19 | +#define VADDUHM VX4(64) | 26 | uint64_t insn_count; |
20 | +#define VADDUWM VX4(128) | 27 | } InstructionCount; |
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) | ||
30 | { | ||
31 | unsigned int i = cpu_index % MAX_CPUS; | ||
32 | InstructionCount *c = &counts[i]; | ||
33 | - uint64_t this_pc = GPOINTER_TO_UINT(udata); | ||
34 | - if (this_pc == c->last_pc) { | ||
35 | - g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%" | ||
36 | - PRIx64 "\n", this_pc); | ||
37 | - qemu_plugin_outs(out); | ||
38 | - } | ||
39 | - c->last_pc = this_pc; | ||
21 | + | 40 | + |
22 | +#define VSUBUBM VX4(1024) | 41 | c->insn_count++; |
23 | +#define VSUBUHM VX4(1088) | 42 | } |
24 | +#define VSUBUWM VX4(1152) | 43 | |
25 | + | 44 | diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target |
26 | #define VMAXSB VX4(258) | 45 | index XXXXXXX..XXXXXXX 100644 |
27 | #define VMAXSH VX4(322) | 46 | --- a/tests/tcg/i386/Makefile.softmmu-target |
28 | #define VMAXSW VX4(386) | 47 | +++ b/tests/tcg/i386/Makefile.softmmu-target |
29 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 48 | @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) |
30 | case INDEX_op_andc_vec: | 49 | |
31 | case INDEX_op_not_vec: | 50 | memory: CFLAGS+=-DCHECK_UNALIGNED=1 |
32 | return 1; | 51 | |
33 | + case INDEX_op_add_vec: | 52 | -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so |
34 | + case INDEX_op_sub_vec: | 53 | -run-plugin-%-with-libinsn.so: |
35 | case INDEX_op_smax_vec: | 54 | - $(call run-test, $@, \ |
36 | case INDEX_op_smin_vec: | 55 | - $(QEMU) -monitor none -display none \ |
37 | case INDEX_op_umax_vec: | 56 | - -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ |
38 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 57 | - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ |
39 | const TCGArg *args, const int *const_args) | 58 | - -d plugin -D $*-with-libinsn.so.pout \ |
40 | { | 59 | - $(QEMU_OPTS) $*) |
41 | static const uint32_t | 60 | - |
42 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | 61 | # Running |
43 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | 62 | QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel |
44 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | 63 | diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target |
45 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | 64 | index XXXXXXX..XXXXXXX 100644 |
46 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | 65 | --- a/tests/tcg/i386/Makefile.target |
47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 66 | +++ b/tests/tcg/i386/Makefile.target |
48 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | 67 | @@ -XXX,XX +XXX,XX @@ else |
49 | return; | 68 | SKIP_I386_TESTS+=test-i386-fprem |
50 | 69 | endif | |
51 | + case INDEX_op_add_vec: | 70 | |
52 | + insn = add_op[vece]; | 71 | -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so |
53 | + break; | 72 | -run-plugin-%-with-libinsn.so: |
54 | + case INDEX_op_sub_vec: | 73 | - $(call run-test, $@, $(QEMU) $(QEMU_OPTS) \ |
55 | + insn = sub_op[vece]; | 74 | - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ |
56 | + break; | 75 | - -d plugin -D $*-with-libinsn.so.pout $*) |
57 | case INDEX_op_smin_vec: | 76 | - |
58 | insn = smin_op[vece]; | 77 | # Update TESTS |
59 | break; | 78 | I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) |
60 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 79 | TESTS=$(MULTIARCH_TESTS) $(I386_TESTS) |
61 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | 80 | diff --git a/tests/tcg/x86_64/Makefile.softmmu-target b/tests/tcg/x86_64/Makefile.softmmu-target |
62 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | 81 | index XXXXXXX..XXXXXXX 100644 |
63 | 82 | --- a/tests/tcg/x86_64/Makefile.softmmu-target | |
64 | + case INDEX_op_add_vec: | 83 | +++ b/tests/tcg/x86_64/Makefile.softmmu-target |
65 | + case INDEX_op_sub_vec: | 84 | @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) |
66 | case INDEX_op_and_vec: | 85 | |
67 | case INDEX_op_or_vec: | 86 | memory: CFLAGS+=-DCHECK_UNALIGNED=1 |
68 | case INDEX_op_xor_vec: | 87 | |
88 | -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so | ||
89 | -run-plugin-%-with-libinsn.so: | ||
90 | - $(call run-test, $@, \ | ||
91 | - $(QEMU) -monitor none -display none \ | ||
92 | - -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ | ||
93 | - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ | ||
94 | - -d plugin -D $*-with-libinsn.so.pout \ | ||
95 | - $(QEMU_OPTS) $*) | ||
96 | - | ||
97 | # Running | ||
98 | QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel | ||
69 | -- | 99 | -- |
70 | 2.17.1 | 100 | 2.34.1 |
71 | |||
72 | diff view generated by jsdifflib |
1 | Previously we've been hard-coding knowledge that Power7 has ISEL, but | 1 | From: Fei Wu <fei2.wu@intel.com> |
---|---|---|---|
2 | it was an optional instruction before that. Use the AT_HWCAP2 bit, | ||
3 | when present, to properly determine support. | ||
4 | 2 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | TBStats will be introduced to replace CONFIG_PROFILER totally, here |
4 | remove all CONFIG_PROFILER related stuffs first. | ||
5 | |||
6 | Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com> | ||
7 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
8 | Signed-off-by: Fei Wu <fei2.wu@intel.com> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-Id: <20230607122411.3394702-2-fei2.wu@intel.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 12 | --- |
8 | tcg/ppc/tcg-target.inc.c | 17 ++++++++++++----- | 13 | meson.build | 2 - |
9 | 1 file changed, 12 insertions(+), 5 deletions(-) | 14 | qapi/machine.json | 18 --- |
15 | include/qemu/timer.h | 9 -- | ||
16 | include/tcg/tcg.h | 26 ----- | ||
17 | accel/tcg/monitor.c | 31 ----- | ||
18 | accel/tcg/tcg-accel-ops.c | 10 -- | ||
19 | accel/tcg/translate-all.c | 33 ------ | ||
20 | softmmu/runstate.c | 9 -- | ||
21 | tcg/tcg.c | 214 ---------------------------------- | ||
22 | tests/qtest/qmp-cmd-test.c | 3 - | ||
23 | hmp-commands-info.hx | 15 --- | ||
24 | meson_options.txt | 2 - | ||
25 | scripts/meson-buildoptions.sh | 3 - | ||
26 | 13 files changed, 375 deletions(-) | ||
10 | 27 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 28 | diff --git a/meson.build b/meson.build |
12 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 30 | --- a/meson.build |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 31 | +++ b/meson.build |
32 | @@ -XXX,XX +XXX,XX @@ if numa.found() | ||
33 | dependencies: numa)) | ||
34 | endif | ||
35 | config_host_data.set('CONFIG_OPENGL', opengl.found()) | ||
36 | -config_host_data.set('CONFIG_PROFILER', get_option('profiler')) | ||
37 | config_host_data.set('CONFIG_RBD', rbd.found()) | ||
38 | config_host_data.set('CONFIG_RDMA', rdma.found()) | ||
39 | config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack')) | ||
40 | @@ -XXX,XX +XXX,XX @@ if 'objc' in all_languages | ||
41 | summary_info += {'QEMU_OBJCFLAGS': ' '.join(qemu_common_flags)} | ||
42 | endif | ||
43 | summary_info += {'QEMU_LDFLAGS': ' '.join(qemu_ldflags)} | ||
44 | -summary_info += {'profiler': get_option('profiler')} | ||
45 | summary_info += {'link-time optimization (LTO)': get_option('b_lto')} | ||
46 | summary_info += {'PIE': get_option('b_pie')} | ||
47 | summary_info += {'static build': get_option('prefer_static')} | ||
48 | diff --git a/qapi/machine.json b/qapi/machine.json | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/qapi/machine.json | ||
51 | +++ b/qapi/machine.json | ||
15 | @@ -XXX,XX +XXX,XX @@ | 52 | @@ -XXX,XX +XXX,XX @@ |
16 | static tcg_insn_unit *tb_ret_addr; | 53 | 'if': 'CONFIG_TCG', |
17 | 54 | 'features': [ 'unstable' ] } | |
18 | TCGPowerISA have_isa; | 55 | |
19 | - | 56 | -## |
20 | -#define HAVE_ISEL have_isa_2_06 | 57 | -# @x-query-profile: |
21 | +static bool have_isel; | 58 | -# |
22 | 59 | -# Query TCG profiling information | |
23 | #ifndef CONFIG_SOFTMMU | 60 | -# |
24 | #define TCG_GUEST_BASE_REG 30 | 61 | -# Features: |
25 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, | 62 | -# |
26 | /* If we have ISEL, we can implement everything with 3 or 4 insns. | 63 | -# @unstable: This command is meant for debugging. |
27 | All other cases below are also at least 3 insns, so speed up the | 64 | -# |
28 | code generator by not considering them and always using ISEL. */ | 65 | -# Returns: profile information |
29 | - if (HAVE_ISEL) { | 66 | -# |
30 | + if (have_isel) { | 67 | -# Since: 6.2 |
31 | int isel, tab; | 68 | -## |
32 | 69 | -{ 'command': 'x-query-profile', | |
33 | tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); | 70 | - 'returns': 'HumanReadableText', |
34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, | 71 | - 'if': 'CONFIG_TCG', |
35 | 72 | - 'features': [ 'unstable' ] } | |
36 | tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); | 73 | - |
37 | 74 | ## | |
38 | - if (HAVE_ISEL) { | 75 | # @x-query-ramblock: |
39 | + if (have_isel) { | 76 | # |
40 | int isel = tcg_to_isel[cond]; | 77 | diff --git a/include/qemu/timer.h b/include/qemu/timer.h |
41 | 78 | index XXXXXXX..XXXXXXX 100644 | |
42 | /* Swap the V operands if the operation indicates inversion. */ | 79 | --- a/include/qemu/timer.h |
43 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, | 80 | +++ b/include/qemu/timer.h |
44 | } else { | 81 | @@ -XXX,XX +XXX,XX @@ static inline int64_t cpu_get_host_ticks(void) |
45 | tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); | 82 | } |
46 | /* Note that the only other valid constant for a2 is 0. */ | 83 | #endif |
47 | - if (HAVE_ISEL) { | 84 | |
48 | + if (have_isel) { | 85 | -#ifdef CONFIG_PROFILER |
49 | tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); | 86 | -static inline int64_t profile_getclock(void) |
50 | tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); | 87 | -{ |
51 | } else if (!const_a2 && a0 == a2) { | 88 | - return get_clock(); |
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 89 | -} |
90 | - | ||
91 | -extern int64_t dev_time; | ||
92 | -#endif | ||
93 | - | ||
94 | #endif | ||
95 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/include/tcg/tcg.h | ||
98 | +++ b/include/tcg/tcg.h | ||
99 | @@ -XXX,XX +XXX,XX @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) | ||
100 | return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0; | ||
101 | } | ||
102 | |||
103 | -typedef struct TCGProfile { | ||
104 | - int64_t cpu_exec_time; | ||
105 | - int64_t tb_count1; | ||
106 | - int64_t tb_count; | ||
107 | - int64_t op_count; /* total insn count */ | ||
108 | - int op_count_max; /* max insn per TB */ | ||
109 | - int temp_count_max; | ||
110 | - int64_t temp_count; | ||
111 | - int64_t del_op_count; | ||
112 | - int64_t code_in_len; | ||
113 | - int64_t code_out_len; | ||
114 | - int64_t search_out_len; | ||
115 | - int64_t interm_time; | ||
116 | - int64_t code_time; | ||
117 | - int64_t la_time; | ||
118 | - int64_t opt_time; | ||
119 | - int64_t restore_count; | ||
120 | - int64_t restore_time; | ||
121 | - int64_t table_op_count[NB_OPS]; | ||
122 | -} TCGProfile; | ||
123 | - | ||
124 | struct TCGContext { | ||
125 | uint8_t *pool_cur, *pool_end; | ||
126 | TCGPool *pool_first, *pool_current, *pool_first_large; | ||
127 | @@ -XXX,XX +XXX,XX @@ struct TCGContext { | ||
128 | tcg_insn_unit *code_buf; /* pointer for start of tb */ | ||
129 | tcg_insn_unit *code_ptr; /* pointer for running end of tb */ | ||
130 | |||
131 | -#ifdef CONFIG_PROFILER | ||
132 | - TCGProfile prof; | ||
133 | -#endif | ||
134 | - | ||
135 | #ifdef CONFIG_DEBUG_TCG | ||
136 | int goto_tb_issue_mask; | ||
137 | const TCGOpcode *vecop_list; | ||
138 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_temp_new_ptr(void) | ||
139 | return temp_tcgv_ptr(t); | ||
140 | } | ||
141 | |||
142 | -int64_t tcg_cpu_exec_time(void); | ||
143 | void tcg_dump_info(GString *buf); | ||
144 | void tcg_dump_op_count(GString *buf); | ||
145 | |||
146 | diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c | ||
147 | index XXXXXXX..XXXXXXX 100644 | ||
148 | --- a/accel/tcg/monitor.c | ||
149 | +++ b/accel/tcg/monitor.c | ||
150 | @@ -XXX,XX +XXX,XX @@ HumanReadableText *qmp_x_query_opcount(Error **errp) | ||
151 | return human_readable_text_from_str(buf); | ||
152 | } | ||
153 | |||
154 | -#ifdef CONFIG_PROFILER | ||
155 | - | ||
156 | -int64_t dev_time; | ||
157 | - | ||
158 | -HumanReadableText *qmp_x_query_profile(Error **errp) | ||
159 | -{ | ||
160 | - g_autoptr(GString) buf = g_string_new(""); | ||
161 | - static int64_t last_cpu_exec_time; | ||
162 | - int64_t cpu_exec_time; | ||
163 | - int64_t delta; | ||
164 | - | ||
165 | - cpu_exec_time = tcg_cpu_exec_time(); | ||
166 | - delta = cpu_exec_time - last_cpu_exec_time; | ||
167 | - | ||
168 | - g_string_append_printf(buf, "async time %" PRId64 " (%0.3f)\n", | ||
169 | - dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); | ||
170 | - g_string_append_printf(buf, "qemu time %" PRId64 " (%0.3f)\n", | ||
171 | - delta, delta / (double)NANOSECONDS_PER_SECOND); | ||
172 | - last_cpu_exec_time = cpu_exec_time; | ||
173 | - dev_time = 0; | ||
174 | - | ||
175 | - return human_readable_text_from_str(buf); | ||
176 | -} | ||
177 | -#else | ||
178 | -HumanReadableText *qmp_x_query_profile(Error **errp) | ||
179 | -{ | ||
180 | - error_setg(errp, "Internal profiler not compiled"); | ||
181 | - return NULL; | ||
182 | -} | ||
183 | -#endif | ||
184 | - | ||
185 | static void hmp_tcg_register(void) | ||
186 | { | ||
187 | monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); | ||
188 | diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c | ||
189 | index XXXXXXX..XXXXXXX 100644 | ||
190 | --- a/accel/tcg/tcg-accel-ops.c | ||
191 | +++ b/accel/tcg/tcg-accel-ops.c | ||
192 | @@ -XXX,XX +XXX,XX @@ void tcg_cpus_destroy(CPUState *cpu) | ||
193 | int tcg_cpus_exec(CPUState *cpu) | ||
194 | { | ||
195 | int ret; | ||
196 | -#ifdef CONFIG_PROFILER | ||
197 | - int64_t ti; | ||
198 | -#endif | ||
199 | assert(tcg_enabled()); | ||
200 | -#ifdef CONFIG_PROFILER | ||
201 | - ti = profile_getclock(); | ||
202 | -#endif | ||
203 | cpu_exec_start(cpu); | ||
204 | ret = cpu_exec(cpu); | ||
205 | cpu_exec_end(cpu); | ||
206 | -#ifdef CONFIG_PROFILER | ||
207 | - qatomic_set(&tcg_ctx->prof.cpu_exec_time, | ||
208 | - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); | ||
209 | -#endif | ||
210 | return ret; | ||
211 | } | ||
212 | |||
213 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
214 | index XXXXXXX..XXXXXXX 100644 | ||
215 | --- a/accel/tcg/translate-all.c | ||
216 | +++ b/accel/tcg/translate-all.c | ||
217 | @@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | ||
218 | uintptr_t host_pc) | ||
219 | { | ||
220 | uint64_t data[TARGET_INSN_START_WORDS]; | ||
221 | -#ifdef CONFIG_PROFILER | ||
222 | - TCGProfile *prof = &tcg_ctx->prof; | ||
223 | - int64_t ti = profile_getclock(); | ||
224 | -#endif | ||
225 | int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); | ||
226 | |||
227 | if (insns_left < 0) { | ||
228 | @@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | ||
229 | } | ||
230 | |||
231 | cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); | ||
232 | - | ||
233 | -#ifdef CONFIG_PROFILER | ||
234 | - qatomic_set(&prof->restore_time, | ||
235 | - prof->restore_time + profile_getclock() - ti); | ||
236 | - qatomic_set(&prof->restore_count, prof->restore_count + 1); | ||
237 | -#endif | ||
238 | } | ||
239 | |||
240 | bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) | ||
241 | @@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, | ||
242 | tcg_ctx->cpu = NULL; | ||
243 | *max_insns = tb->icount; | ||
244 | |||
245 | -#ifdef CONFIG_PROFILER | ||
246 | - qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); | ||
247 | - qatomic_set(&tcg_ctx->prof.interm_time, | ||
248 | - tcg_ctx->prof.interm_time + profile_getclock() - *ti); | ||
249 | - *ti = profile_getclock(); | ||
250 | -#endif | ||
251 | - | ||
252 | return tcg_gen_code(tcg_ctx, tb, pc); | ||
253 | } | ||
254 | |||
255 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
256 | tb_page_addr_t phys_pc; | ||
257 | tcg_insn_unit *gen_code_buf; | ||
258 | int gen_code_size, search_size, max_insns; | ||
259 | -#ifdef CONFIG_PROFILER | ||
260 | - TCGProfile *prof = &tcg_ctx->prof; | ||
261 | -#endif | ||
262 | int64_t ti; | ||
263 | void *host_pc; | ||
264 | |||
265 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
266 | |||
267 | tb_overflow: | ||
268 | |||
269 | -#ifdef CONFIG_PROFILER | ||
270 | - /* includes aborted translations because of exceptions */ | ||
271 | - qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); | ||
272 | - ti = profile_getclock(); | ||
273 | -#endif | ||
274 | - | ||
275 | trace_translate_block(tb, pc, tb->tc.ptr); | ||
276 | |||
277 | gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); | ||
278 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
279 | */ | ||
280 | perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); | ||
281 | |||
282 | -#ifdef CONFIG_PROFILER | ||
283 | - qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); | ||
284 | - qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); | ||
285 | - qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); | ||
286 | - qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); | ||
287 | -#endif | ||
288 | - | ||
289 | if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && | ||
290 | qemu_log_in_addr_range(pc)) { | ||
291 | FILE *logfile = qemu_log_trylock(); | ||
292 | diff --git a/softmmu/runstate.c b/softmmu/runstate.c | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/softmmu/runstate.c | ||
295 | +++ b/softmmu/runstate.c | ||
296 | @@ -XXX,XX +XXX,XX @@ static bool main_loop_should_exit(int *status) | ||
297 | int qemu_main_loop(void) | ||
298 | { | ||
299 | int status = EXIT_SUCCESS; | ||
300 | -#ifdef CONFIG_PROFILER | ||
301 | - int64_t ti; | ||
302 | -#endif | ||
303 | |||
304 | while (!main_loop_should_exit(&status)) { | ||
305 | -#ifdef CONFIG_PROFILER | ||
306 | - ti = profile_getclock(); | ||
307 | -#endif | ||
308 | main_loop_wait(false); | ||
309 | -#ifdef CONFIG_PROFILER | ||
310 | - dev_time += profile_getclock() - ti; | ||
311 | -#endif | ||
312 | } | ||
313 | |||
314 | return status; | ||
315 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
316 | index XXXXXXX..XXXXXXX 100644 | ||
317 | --- a/tcg/tcg.c | ||
318 | +++ b/tcg/tcg.c | ||
319 | @@ -XXX,XX +XXX,XX @@ void tcg_op_remove(TCGContext *s, TCGOp *op) | ||
320 | QTAILQ_REMOVE(&s->ops, op, link); | ||
321 | QTAILQ_INSERT_TAIL(&s->free_ops, op, link); | ||
322 | s->nb_ops--; | ||
323 | - | ||
324 | -#ifdef CONFIG_PROFILER | ||
325 | - qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); | ||
326 | -#endif | ||
327 | } | ||
328 | |||
329 | void tcg_remove_ops_after(TCGOp *op) | ||
330 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, | ||
331 | tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); | ||
332 | } | ||
333 | |||
334 | -#ifdef CONFIG_PROFILER | ||
335 | - | ||
336 | -/* avoid copy/paste errors */ | ||
337 | -#define PROF_ADD(to, from, field) \ | ||
338 | - do { \ | ||
339 | - (to)->field += qatomic_read(&((from)->field)); \ | ||
340 | - } while (0) | ||
341 | - | ||
342 | -#define PROF_MAX(to, from, field) \ | ||
343 | - do { \ | ||
344 | - typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ | ||
345 | - if (val__ > (to)->field) { \ | ||
346 | - (to)->field = val__; \ | ||
347 | - } \ | ||
348 | - } while (0) | ||
349 | - | ||
350 | -/* Pass in a zero'ed @prof */ | ||
351 | -static inline | ||
352 | -void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) | ||
353 | -{ | ||
354 | - unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); | ||
355 | - unsigned int i; | ||
356 | - | ||
357 | - for (i = 0; i < n_ctxs; i++) { | ||
358 | - TCGContext *s = qatomic_read(&tcg_ctxs[i]); | ||
359 | - const TCGProfile *orig = &s->prof; | ||
360 | - | ||
361 | - if (counters) { | ||
362 | - PROF_ADD(prof, orig, cpu_exec_time); | ||
363 | - PROF_ADD(prof, orig, tb_count1); | ||
364 | - PROF_ADD(prof, orig, tb_count); | ||
365 | - PROF_ADD(prof, orig, op_count); | ||
366 | - PROF_MAX(prof, orig, op_count_max); | ||
367 | - PROF_ADD(prof, orig, temp_count); | ||
368 | - PROF_MAX(prof, orig, temp_count_max); | ||
369 | - PROF_ADD(prof, orig, del_op_count); | ||
370 | - PROF_ADD(prof, orig, code_in_len); | ||
371 | - PROF_ADD(prof, orig, code_out_len); | ||
372 | - PROF_ADD(prof, orig, search_out_len); | ||
373 | - PROF_ADD(prof, orig, interm_time); | ||
374 | - PROF_ADD(prof, orig, code_time); | ||
375 | - PROF_ADD(prof, orig, la_time); | ||
376 | - PROF_ADD(prof, orig, opt_time); | ||
377 | - PROF_ADD(prof, orig, restore_count); | ||
378 | - PROF_ADD(prof, orig, restore_time); | ||
379 | - } | ||
380 | - if (table) { | ||
381 | - int i; | ||
382 | - | ||
383 | - for (i = 0; i < NB_OPS; i++) { | ||
384 | - PROF_ADD(prof, orig, table_op_count[i]); | ||
385 | - } | ||
386 | - } | ||
387 | - } | ||
388 | -} | ||
389 | - | ||
390 | -#undef PROF_ADD | ||
391 | -#undef PROF_MAX | ||
392 | - | ||
393 | -static void tcg_profile_snapshot_counters(TCGProfile *prof) | ||
394 | -{ | ||
395 | - tcg_profile_snapshot(prof, true, false); | ||
396 | -} | ||
397 | - | ||
398 | -static void tcg_profile_snapshot_table(TCGProfile *prof) | ||
399 | -{ | ||
400 | - tcg_profile_snapshot(prof, false, true); | ||
401 | -} | ||
402 | - | ||
403 | -void tcg_dump_op_count(GString *buf) | ||
404 | -{ | ||
405 | - TCGProfile prof = {}; | ||
406 | - int i; | ||
407 | - | ||
408 | - tcg_profile_snapshot_table(&prof); | ||
409 | - for (i = 0; i < NB_OPS; i++) { | ||
410 | - g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, | ||
411 | - prof.table_op_count[i]); | ||
412 | - } | ||
413 | -} | ||
414 | - | ||
415 | -int64_t tcg_cpu_exec_time(void) | ||
416 | -{ | ||
417 | - unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); | ||
418 | - unsigned int i; | ||
419 | - int64_t ret = 0; | ||
420 | - | ||
421 | - for (i = 0; i < n_ctxs; i++) { | ||
422 | - const TCGContext *s = qatomic_read(&tcg_ctxs[i]); | ||
423 | - const TCGProfile *prof = &s->prof; | ||
424 | - | ||
425 | - ret += qatomic_read(&prof->cpu_exec_time); | ||
426 | - } | ||
427 | - return ret; | ||
428 | -} | ||
429 | -#else | ||
430 | void tcg_dump_op_count(GString *buf) | ||
431 | { | ||
432 | g_string_append_printf(buf, "[TCG profiler not compiled]\n"); | ||
433 | } | ||
434 | |||
435 | -int64_t tcg_cpu_exec_time(void) | ||
436 | -{ | ||
437 | - error_report("%s: TCG profiler not compiled", __func__); | ||
438 | - exit(EXIT_FAILURE); | ||
439 | -} | ||
440 | -#endif | ||
441 | - | ||
442 | - | ||
443 | int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
444 | { | ||
445 | -#ifdef CONFIG_PROFILER | ||
446 | - TCGProfile *prof = &s->prof; | ||
447 | -#endif | ||
448 | int i, start_words, num_insns; | ||
449 | TCGOp *op; | ||
450 | |||
451 | -#ifdef CONFIG_PROFILER | ||
452 | - { | ||
453 | - int n = 0; | ||
454 | - | ||
455 | - QTAILQ_FOREACH(op, &s->ops, link) { | ||
456 | - n++; | ||
457 | - } | ||
458 | - qatomic_set(&prof->op_count, prof->op_count + n); | ||
459 | - if (n > prof->op_count_max) { | ||
460 | - qatomic_set(&prof->op_count_max, n); | ||
461 | - } | ||
462 | - | ||
463 | - n = s->nb_temps; | ||
464 | - qatomic_set(&prof->temp_count, prof->temp_count + n); | ||
465 | - if (n > prof->temp_count_max) { | ||
466 | - qatomic_set(&prof->temp_count_max, n); | ||
467 | - } | ||
468 | - } | ||
469 | -#endif | ||
470 | - | ||
471 | if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) | ||
472 | && qemu_log_in_addr_range(pc_start))) { | ||
473 | FILE *logfile = qemu_log_trylock(); | ||
474 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
53 | } | 475 | } |
54 | #endif | 476 | #endif |
55 | 477 | ||
56 | +#ifdef PPC_FEATURE2_HAS_ISEL | 478 | -#ifdef CONFIG_PROFILER |
57 | + /* Prefer explicit instruction from the kernel. */ | 479 | - qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); |
58 | + have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; | 480 | -#endif |
59 | +#else | 481 | - |
60 | + /* Fall back to knowing Power7 (2.06) has ISEL. */ | 482 | tcg_optimize(s); |
61 | + have_isel = have_isa_2_06; | 483 | |
62 | +#endif | 484 | -#ifdef CONFIG_PROFILER |
63 | + | 485 | - qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); |
64 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | 486 | - qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); |
65 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | 487 | -#endif |
66 | 488 | - | |
489 | reachable_code_pass(s); | ||
490 | liveness_pass_0(s); | ||
491 | liveness_pass_1(s); | ||
492 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
493 | } | ||
494 | } | ||
495 | |||
496 | -#ifdef CONFIG_PROFILER | ||
497 | - qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); | ||
498 | -#endif | ||
499 | - | ||
500 | if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) | ||
501 | && qemu_log_in_addr_range(pc_start))) { | ||
502 | FILE *logfile = qemu_log_trylock(); | ||
503 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
504 | QTAILQ_FOREACH(op, &s->ops, link) { | ||
505 | TCGOpcode opc = op->opc; | ||
506 | |||
507 | -#ifdef CONFIG_PROFILER | ||
508 | - qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); | ||
509 | -#endif | ||
510 | - | ||
511 | switch (opc) { | ||
512 | case INDEX_op_mov_i32: | ||
513 | case INDEX_op_mov_i64: | ||
514 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
515 | return tcg_current_code_size(s); | ||
516 | } | ||
517 | |||
518 | -#ifdef CONFIG_PROFILER | ||
519 | -void tcg_dump_info(GString *buf) | ||
520 | -{ | ||
521 | - TCGProfile prof = {}; | ||
522 | - const TCGProfile *s; | ||
523 | - int64_t tb_count; | ||
524 | - int64_t tb_div_count; | ||
525 | - int64_t tot; | ||
526 | - | ||
527 | - tcg_profile_snapshot_counters(&prof); | ||
528 | - s = &prof; | ||
529 | - tb_count = s->tb_count; | ||
530 | - tb_div_count = tb_count ? tb_count : 1; | ||
531 | - tot = s->interm_time + s->code_time; | ||
532 | - | ||
533 | - g_string_append_printf(buf, "JIT cycles %" PRId64 | ||
534 | - " (%0.3f s at 2.4 GHz)\n", | ||
535 | - tot, tot / 2.4e9); | ||
536 | - g_string_append_printf(buf, "translated TBs %" PRId64 | ||
537 | - " (aborted=%" PRId64 " %0.1f%%)\n", | ||
538 | - tb_count, s->tb_count1 - tb_count, | ||
539 | - (double)(s->tb_count1 - s->tb_count) | ||
540 | - / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); | ||
541 | - g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", | ||
542 | - (double)s->op_count / tb_div_count, s->op_count_max); | ||
543 | - g_string_append_printf(buf, "deleted ops/TB %0.2f\n", | ||
544 | - (double)s->del_op_count / tb_div_count); | ||
545 | - g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", | ||
546 | - (double)s->temp_count / tb_div_count, | ||
547 | - s->temp_count_max); | ||
548 | - g_string_append_printf(buf, "avg host code/TB %0.1f\n", | ||
549 | - (double)s->code_out_len / tb_div_count); | ||
550 | - g_string_append_printf(buf, "avg search data/TB %0.1f\n", | ||
551 | - (double)s->search_out_len / tb_div_count); | ||
552 | - | ||
553 | - g_string_append_printf(buf, "cycles/op %0.1f\n", | ||
554 | - s->op_count ? (double)tot / s->op_count : 0); | ||
555 | - g_string_append_printf(buf, "cycles/in byte %0.1f\n", | ||
556 | - s->code_in_len ? (double)tot / s->code_in_len : 0); | ||
557 | - g_string_append_printf(buf, "cycles/out byte %0.1f\n", | ||
558 | - s->code_out_len ? (double)tot / s->code_out_len : 0); | ||
559 | - g_string_append_printf(buf, "cycles/search byte %0.1f\n", | ||
560 | - s->search_out_len ? | ||
561 | - (double)tot / s->search_out_len : 0); | ||
562 | - if (tot == 0) { | ||
563 | - tot = 1; | ||
564 | - } | ||
565 | - g_string_append_printf(buf, " gen_interm time %0.1f%%\n", | ||
566 | - (double)s->interm_time / tot * 100.0); | ||
567 | - g_string_append_printf(buf, " gen_code time %0.1f%%\n", | ||
568 | - (double)s->code_time / tot * 100.0); | ||
569 | - g_string_append_printf(buf, "optim./code time %0.1f%%\n", | ||
570 | - (double)s->opt_time / (s->code_time ? | ||
571 | - s->code_time : 1) | ||
572 | - * 100.0); | ||
573 | - g_string_append_printf(buf, "liveness/code time %0.1f%%\n", | ||
574 | - (double)s->la_time / (s->code_time ? | ||
575 | - s->code_time : 1) * 100.0); | ||
576 | - g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", | ||
577 | - s->restore_count); | ||
578 | - g_string_append_printf(buf, " avg cycles %0.1f\n", | ||
579 | - s->restore_count ? | ||
580 | - (double)s->restore_time / s->restore_count : 0); | ||
581 | -} | ||
582 | -#else | ||
583 | void tcg_dump_info(GString *buf) | ||
584 | { | ||
585 | g_string_append_printf(buf, "[TCG profiler not compiled]\n"); | ||
586 | } | ||
587 | -#endif | ||
588 | |||
589 | #ifdef ELF_HOST_MACHINE | ||
590 | /* In order to use this feature, the backend needs to do three things: | ||
591 | diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c | ||
592 | index XXXXXXX..XXXXXXX 100644 | ||
593 | --- a/tests/qtest/qmp-cmd-test.c | ||
594 | +++ b/tests/qtest/qmp-cmd-test.c | ||
595 | @@ -XXX,XX +XXX,XX @@ static int query_error_class(const char *cmd) | ||
596 | { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE }, | ||
597 | { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR }, | ||
598 | { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR }, | ||
599 | -#ifndef CONFIG_PROFILER | ||
600 | - { "x-query-profile", ERROR_CLASS_GENERIC_ERROR }, | ||
601 | -#endif | ||
602 | /* Only valid with a USB bus added */ | ||
603 | { "x-query-usb", ERROR_CLASS_GENERIC_ERROR }, | ||
604 | /* Only valid with accel=tcg */ | ||
605 | diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx | ||
606 | index XXXXXXX..XXXXXXX 100644 | ||
607 | --- a/hmp-commands-info.hx | ||
608 | +++ b/hmp-commands-info.hx | ||
609 | @@ -XXX,XX +XXX,XX @@ SRST | ||
610 | Show host USB devices. | ||
611 | ERST | ||
612 | |||
613 | -#if defined(CONFIG_TCG) | ||
614 | - { | ||
615 | - .name = "profile", | ||
616 | - .args_type = "", | ||
617 | - .params = "", | ||
618 | - .help = "show profiling information", | ||
619 | - .cmd_info_hrt = qmp_x_query_profile, | ||
620 | - }, | ||
621 | -#endif | ||
622 | - | ||
623 | -SRST | ||
624 | - ``info profile`` | ||
625 | - Show profiling information. | ||
626 | -ERST | ||
627 | - | ||
628 | { | ||
629 | .name = "capture", | ||
630 | .args_type = "", | ||
631 | diff --git a/meson_options.txt b/meson_options.txt | ||
632 | index XXXXXXX..XXXXXXX 100644 | ||
633 | --- a/meson_options.txt | ||
634 | +++ b/meson_options.txt | ||
635 | @@ -XXX,XX +XXX,XX @@ option('qom_cast_debug', type: 'boolean', value: true, | ||
636 | option('gprof', type: 'boolean', value: false, | ||
637 | description: 'QEMU profiling with gprof', | ||
638 | deprecated: true) | ||
639 | -option('profiler', type: 'boolean', value: false, | ||
640 | - description: 'profiler support') | ||
641 | option('slirp_smbd', type : 'feature', value : 'auto', | ||
642 | description: 'use smbd (at path --smbd=*) in slirp networking') | ||
643 | |||
644 | diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh | ||
645 | index XXXXXXX..XXXXXXX 100644 | ||
646 | --- a/scripts/meson-buildoptions.sh | ||
647 | +++ b/scripts/meson-buildoptions.sh | ||
648 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { | ||
649 | printf "%s\n" ' jemalloc/system/tcmalloc)' | ||
650 | printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for' | ||
651 | printf "%s\n" ' upgrades' | ||
652 | - printf "%s\n" ' --enable-profiler profiler support' | ||
653 | printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and' | ||
654 | printf "%s\n" ' getrandom()' | ||
655 | printf "%s\n" ' --enable-safe-stack SafeStack Stack Smash Protection (requires' | ||
656 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { | ||
657 | --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;; | ||
658 | --enable-png) printf "%s" -Dpng=enabled ;; | ||
659 | --disable-png) printf "%s" -Dpng=disabled ;; | ||
660 | - --enable-profiler) printf "%s" -Dprofiler=true ;; | ||
661 | - --disable-profiler) printf "%s" -Dprofiler=false ;; | ||
662 | --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;; | ||
663 | --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;; | ||
664 | --enable-qcow1) printf "%s" -Dqcow1=enabled ;; | ||
67 | -- | 665 | -- |
68 | 2.17.1 | 666 | 2.34.1 |
69 | 667 | ||
70 | 668 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VEC and | 1 | From: Max Chou <max.chou@sifive.com> |
---|---|---|---|
2 | are thus part of the Altivec instruction set, and not VSX. | ||
3 | This includes lots of double-word arithmetic and a few extra | ||
4 | logical operations. | ||
5 | 2 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | The 5th parameter of tcg_gen_gvec_2s should be replaced by the |
4 | temporary tmp variable in the tcg_gen_gvec_andcs function. | ||
5 | |||
6 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
7 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
8 | Message-Id: <20230622161646.32005-9-max.chou@sifive.com> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 10 | --- |
9 | tcg/ppc/tcg-target.h | 4 +- | 11 | tcg/tcg-op-gvec.c | 2 +- |
10 | tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++---------- | 12 | 1 file changed, 1 insertion(+), 1 deletion(-) |
11 | 2 files changed, 67 insertions(+), 22 deletions(-) | ||
12 | 13 | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 14 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c |
14 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 16 | --- a/tcg/tcg-op-gvec.c |
16 | +++ b/tcg/ppc/tcg-target.h | 17 | +++ b/tcg/tcg-op-gvec.c |
17 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 18 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, |
18 | typedef enum { | 19 | |
19 | tcg_isa_base, | 20 | TCGv_i64 tmp = tcg_temp_ebb_new_i64(); |
20 | tcg_isa_2_06, | 21 | tcg_gen_dup_i64(vece, tmp, c); |
21 | + tcg_isa_2_07, | 22 | - tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g); |
22 | tcg_isa_3_00, | 23 | + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &g); |
23 | } TCGPowerISA; | 24 | tcg_temp_free_i64(tmp); |
24 | 25 | } | |
25 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 26 | |
26 | extern bool have_vsx; | ||
27 | |||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
29 | +#define have_isa_2_07 (have_isa >= tcg_isa_2_07) | ||
30 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | ||
34 | #define TCG_TARGET_HAS_v256 0 | ||
35 | |||
36 | #define TCG_TARGET_HAS_andc_vec 1 | ||
37 | -#define TCG_TARGET_HAS_orc_vec 0 | ||
38 | +#define TCG_TARGET_HAS_orc_vec have_isa_2_07 | ||
39 | #define TCG_TARGET_HAS_not_vec 1 | ||
40 | #define TCG_TARGET_HAS_neg_vec 0 | ||
41 | #define TCG_TARGET_HAS_abs_vec 0 | ||
42 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/ppc/tcg-target.inc.c | ||
45 | +++ b/tcg/ppc/tcg-target.inc.c | ||
46 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
47 | #define VADDSWS VX4(896) | ||
48 | #define VADDUWS VX4(640) | ||
49 | #define VADDUWM VX4(128) | ||
50 | +#define VADDUDM VX4(192) /* v2.07 */ | ||
51 | |||
52 | #define VSUBSBS VX4(1792) | ||
53 | #define VSUBUBS VX4(1536) | ||
54 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
55 | #define VSUBSWS VX4(1920) | ||
56 | #define VSUBUWS VX4(1664) | ||
57 | #define VSUBUWM VX4(1152) | ||
58 | +#define VSUBUDM VX4(1216) /* v2.07 */ | ||
59 | |||
60 | #define VMAXSB VX4(258) | ||
61 | #define VMAXSH VX4(322) | ||
62 | #define VMAXSW VX4(386) | ||
63 | +#define VMAXSD VX4(450) /* v2.07 */ | ||
64 | #define VMAXUB VX4(2) | ||
65 | #define VMAXUH VX4(66) | ||
66 | #define VMAXUW VX4(130) | ||
67 | +#define VMAXUD VX4(194) /* v2.07 */ | ||
68 | #define VMINSB VX4(770) | ||
69 | #define VMINSH VX4(834) | ||
70 | #define VMINSW VX4(898) | ||
71 | +#define VMINSD VX4(962) /* v2.07 */ | ||
72 | #define VMINUB VX4(514) | ||
73 | #define VMINUH VX4(578) | ||
74 | #define VMINUW VX4(642) | ||
75 | +#define VMINUD VX4(706) /* v2.07 */ | ||
76 | |||
77 | #define VCMPEQUB VX4(6) | ||
78 | #define VCMPEQUH VX4(70) | ||
79 | #define VCMPEQUW VX4(134) | ||
80 | +#define VCMPEQUD VX4(199) /* v2.07 */ | ||
81 | #define VCMPGTSB VX4(774) | ||
82 | #define VCMPGTSH VX4(838) | ||
83 | #define VCMPGTSW VX4(902) | ||
84 | +#define VCMPGTSD VX4(967) /* v2.07 */ | ||
85 | #define VCMPGTUB VX4(518) | ||
86 | #define VCMPGTUH VX4(582) | ||
87 | #define VCMPGTUW VX4(646) | ||
88 | +#define VCMPGTUD VX4(711) /* v2.07 */ | ||
89 | |||
90 | #define VSLB VX4(260) | ||
91 | #define VSLH VX4(324) | ||
92 | #define VSLW VX4(388) | ||
93 | +#define VSLD VX4(1476) /* v2.07 */ | ||
94 | #define VSRB VX4(516) | ||
95 | #define VSRH VX4(580) | ||
96 | #define VSRW VX4(644) | ||
97 | +#define VSRD VX4(1732) /* v2.07 */ | ||
98 | #define VSRAB VX4(772) | ||
99 | #define VSRAH VX4(836) | ||
100 | #define VSRAW VX4(900) | ||
101 | +#define VSRAD VX4(964) /* v2.07 */ | ||
102 | #define VRLB VX4(4) | ||
103 | #define VRLH VX4(68) | ||
104 | #define VRLW VX4(132) | ||
105 | +#define VRLD VX4(196) /* v2.07 */ | ||
106 | |||
107 | #define VMULEUB VX4(520) | ||
108 | #define VMULEUH VX4(584) | ||
109 | +#define VMULEUW VX4(648) /* v2.07 */ | ||
110 | #define VMULOUB VX4(8) | ||
111 | #define VMULOUH VX4(72) | ||
112 | +#define VMULOUW VX4(136) /* v2.07 */ | ||
113 | +#define VMULUWM VX4(137) /* v2.07 */ | ||
114 | #define VMSUMUHM VX4(38) | ||
115 | |||
116 | #define VMRGHB VX4(12) | ||
117 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
118 | #define VNOR VX4(1284) | ||
119 | #define VOR VX4(1156) | ||
120 | #define VXOR VX4(1220) | ||
121 | +#define VEQV VX4(1668) /* v2.07 */ | ||
122 | +#define VNAND VX4(1412) /* v2.07 */ | ||
123 | +#define VORC VX4(1348) /* v2.07 */ | ||
124 | |||
125 | #define VSPLTB VX4(524) | ||
126 | #define VSPLTH VX4(588) | ||
127 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
128 | case INDEX_op_andc_vec: | ||
129 | case INDEX_op_not_vec: | ||
130 | return 1; | ||
131 | + case INDEX_op_orc_vec: | ||
132 | + return have_isa_2_07; | ||
133 | case INDEX_op_add_vec: | ||
134 | case INDEX_op_sub_vec: | ||
135 | case INDEX_op_smax_vec: | ||
136 | case INDEX_op_smin_vec: | ||
137 | case INDEX_op_umax_vec: | ||
138 | case INDEX_op_umin_vec: | ||
139 | + case INDEX_op_shlv_vec: | ||
140 | + case INDEX_op_shrv_vec: | ||
141 | + case INDEX_op_sarv_vec: | ||
142 | + return vece <= MO_32 || have_isa_2_07; | ||
143 | case INDEX_op_ssadd_vec: | ||
144 | case INDEX_op_sssub_vec: | ||
145 | case INDEX_op_usadd_vec: | ||
146 | case INDEX_op_ussub_vec: | ||
147 | - case INDEX_op_shlv_vec: | ||
148 | - case INDEX_op_shrv_vec: | ||
149 | - case INDEX_op_sarv_vec: | ||
150 | return vece <= MO_32; | ||
151 | case INDEX_op_cmp_vec: | ||
152 | - case INDEX_op_mul_vec: | ||
153 | case INDEX_op_shli_vec: | ||
154 | case INDEX_op_shri_vec: | ||
155 | case INDEX_op_sari_vec: | ||
156 | - return vece <= MO_32 ? -1 : 0; | ||
157 | + return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
158 | + case INDEX_op_mul_vec: | ||
159 | + switch (vece) { | ||
160 | + case MO_8: | ||
161 | + case MO_16: | ||
162 | + return -1; | ||
163 | + case MO_32: | ||
164 | + return have_isa_2_07 ? 1 : -1; | ||
165 | + } | ||
166 | + return 0; | ||
167 | case INDEX_op_bitsel_vec: | ||
168 | return have_vsx; | ||
169 | default: | ||
170 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
171 | const TCGArg *args, const int *const_args) | ||
172 | { | ||
173 | static const uint32_t | ||
174 | - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | ||
175 | - sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | ||
176 | - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
177 | - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
178 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
179 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
180 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
181 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
182 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
183 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
184 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
185 | usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
186 | sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
187 | ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
188 | - umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
189 | - smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
190 | - umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
191 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
192 | - shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
193 | - shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
194 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
195 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, | ||
196 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, | ||
197 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, | ||
198 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, | ||
199 | + shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, | ||
200 | + shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, | ||
201 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, | ||
202 | mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
203 | mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
204 | - muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
205 | - mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
206 | + muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, | ||
207 | + mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, | ||
208 | pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
209 | - rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
210 | + rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; | ||
211 | |||
212 | TCGType type = vecl + TCG_TYPE_V64; | ||
213 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
214 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
215 | case INDEX_op_sub_vec: | ||
216 | insn = sub_op[vece]; | ||
217 | break; | ||
218 | + case INDEX_op_mul_vec: | ||
219 | + tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
220 | + insn = VMULUWM; | ||
221 | + break; | ||
222 | case INDEX_op_ssadd_vec: | ||
223 | insn = ssadd_op[vece]; | ||
224 | break; | ||
225 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
226 | insn = VNOR; | ||
227 | a2 = a1; | ||
228 | break; | ||
229 | + case INDEX_op_orc_vec: | ||
230 | + insn = VORC; | ||
231 | + break; | ||
232 | |||
233 | case INDEX_op_cmp_vec: | ||
234 | switch (args[3]) { | ||
235 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
236 | { | ||
237 | bool need_swap = false, need_inv = false; | ||
238 | |||
239 | - tcg_debug_assert(vece <= MO_32); | ||
240 | + tcg_debug_assert(vece <= MO_32 || have_isa_2_07); | ||
241 | |||
242 | switch (cond) { | ||
243 | case TCG_COND_EQ: | ||
244 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
245 | break; | ||
246 | |||
247 | case MO_32: | ||
248 | + tcg_debug_assert(!have_isa_2_07); | ||
249 | t3 = tcg_temp_new_vec(type); | ||
250 | t4 = tcg_temp_new_vec(type); | ||
251 | tcg_gen_dupi_vec(MO_8, t4, -16); | ||
252 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
253 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
254 | have_isa = tcg_isa_2_06; | ||
255 | } | ||
256 | +#ifdef PPC_FEATURE2_ARCH_2_07 | ||
257 | + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { | ||
258 | + have_isa = tcg_isa_2_07; | ||
259 | + } | ||
260 | +#endif | ||
261 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
262 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
263 | have_isa = tcg_isa_3_00; | ||
264 | -- | 27 | -- |
265 | 2.17.1 | 28 | 2.34.1 |
266 | |||
267 | diff view generated by jsdifflib |
1 | Now that we have implemented the required tcg operations, | 1 | The microblaze architecture does not reorder instructions. |
---|---|---|---|
2 | we can enable detection of host vector support. | 2 | While there is an MBAR wait-for-data-access instruction, |
3 | this concerns synchronizing with DMA. | ||
3 | 4 | ||
4 | Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32) | 5 | This should have been defined when enabling MTTCG. |
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 6 | |
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Reviewed-by: Edgar E. Iglesias <edgar@zeroasic.com> | ||
9 | Fixes: d449561b130 ("configure: microblaze: Enable mttcg") | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 11 | --- |
8 | tcg/ppc/tcg-target.inc.c | 4 ++++ | 12 | target/microblaze/cpu.h | 3 +++ |
9 | 1 file changed, 4 insertions(+) | 13 | 1 file changed, 3 insertions(+) |
10 | 14 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 15 | diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h |
12 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 17 | --- a/target/microblaze/cpu.h |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 18 | +++ b/target/microblaze/cpu.h |
15 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 19 | @@ -XXX,XX +XXX,XX @@ |
16 | have_isel = have_isa_2_06; | 20 | #include "exec/cpu-defs.h" |
17 | #endif | 21 | #include "qemu/cpu-float.h" |
18 | 22 | ||
19 | + if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | 23 | +/* MicroBlaze is always in-order. */ |
20 | + have_altivec = true; | 24 | +#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL |
21 | + } | ||
22 | + | 25 | + |
23 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | 26 | typedef struct CPUArchState CPUMBState; |
24 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | 27 | #if !defined(CONFIG_USER_ONLY) |
25 | if (have_altivec) { | 28 | #include "mmu.h" |
26 | -- | 29 | -- |
27 | 2.17.1 | 30 | 2.34.1 |
28 | 31 | ||
29 | 32 | diff view generated by jsdifflib |
1 | Add support for vector maximum/minimum using Altivec instructions | 1 | The virtio devices require proper memory ordering between |
---|---|---|---|
2 | VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and | 2 | the vcpus and the iothreads. |
3 | VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW. | ||
4 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
7 | --- | 6 | --- |
8 | tcg/ppc/tcg-target.h | 2 +- | 7 | tcg/tcg-op.c | 14 +++++++++++++- |
9 | tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++- | 8 | 1 file changed, 13 insertions(+), 1 deletion(-) |
10 | 2 files changed, 40 insertions(+), 2 deletions(-) | ||
11 | 9 | ||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 10 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c |
13 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/ppc/tcg-target.h | 12 | --- a/tcg/tcg-op.c |
15 | +++ b/tcg/ppc/tcg-target.h | 13 | +++ b/tcg/tcg-op.c |
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 14 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_br(TCGLabel *l) |
17 | #define TCG_TARGET_HAS_cmp_vec 1 | 15 | |
18 | #define TCG_TARGET_HAS_mul_vec 0 | 16 | void tcg_gen_mb(TCGBar mb_type) |
19 | #define TCG_TARGET_HAS_sat_vec 0 | 17 | { |
20 | -#define TCG_TARGET_HAS_minmax_vec 0 | 18 | - if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { |
21 | +#define TCG_TARGET_HAS_minmax_vec 1 | 19 | +#ifdef CONFIG_USER_ONLY |
22 | #define TCG_TARGET_HAS_bitsel_vec 0 | 20 | + bool parallel = tcg_ctx->gen_tb->cflags & CF_PARALLEL; |
23 | #define TCG_TARGET_HAS_cmpsel_vec 0 | 21 | +#else |
24 | 22 | + /* | |
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 23 | + * It is tempting to elide the barrier in a uniprocessor context. |
26 | index XXXXXXX..XXXXXXX 100644 | 24 | + * However, even with a single cpu we have i/o threads running in |
27 | --- a/tcg/ppc/tcg-target.inc.c | 25 | + * parallel, and lack of memory order can result in e.g. virtio |
28 | +++ b/tcg/ppc/tcg-target.inc.c | 26 | + * queue entries being read incorrectly. |
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 27 | + */ |
30 | #define STVX XO31(231) | 28 | + bool parallel = true; |
31 | #define STVEWX XO31(199) | 29 | +#endif |
32 | |||
33 | +#define VMAXSB VX4(258) | ||
34 | +#define VMAXSH VX4(322) | ||
35 | +#define VMAXSW VX4(386) | ||
36 | +#define VMAXUB VX4(2) | ||
37 | +#define VMAXUH VX4(66) | ||
38 | +#define VMAXUW VX4(130) | ||
39 | +#define VMINSB VX4(770) | ||
40 | +#define VMINSH VX4(834) | ||
41 | +#define VMINSW VX4(898) | ||
42 | +#define VMINUB VX4(514) | ||
43 | +#define VMINUH VX4(578) | ||
44 | +#define VMINUW VX4(642) | ||
45 | + | 30 | + |
46 | #define VCMPEQUB VX4(6) | 31 | + if (parallel) { |
47 | #define VCMPEQUH VX4(70) | 32 | tcg_gen_op1(INDEX_op_mb, mb_type); |
48 | #define VCMPEQUW VX4(134) | 33 | } |
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 34 | } |
50 | case INDEX_op_andc_vec: | ||
51 | case INDEX_op_not_vec: | ||
52 | return 1; | ||
53 | + case INDEX_op_smax_vec: | ||
54 | + case INDEX_op_smin_vec: | ||
55 | + case INDEX_op_umax_vec: | ||
56 | + case INDEX_op_umin_vec: | ||
57 | + return vece <= MO_32; | ||
58 | case INDEX_op_cmp_vec: | ||
59 | return vece <= MO_32 ? -1 : 0; | ||
60 | default: | ||
61 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
62 | static const uint32_t | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
64 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
65 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
66 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
67 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
68 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
69 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
70 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
71 | |||
72 | TCGType type = vecl + TCG_TYPE_V64; | ||
73 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
75 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
76 | return; | ||
77 | |||
78 | + case INDEX_op_smin_vec: | ||
79 | + insn = smin_op[vece]; | ||
80 | + break; | ||
81 | + case INDEX_op_umin_vec: | ||
82 | + insn = umin_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_smax_vec: | ||
85 | + insn = smax_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_umax_vec: | ||
88 | + insn = umax_op[vece]; | ||
89 | + break; | ||
90 | case INDEX_op_and_vec: | ||
91 | insn = VAND; | ||
92 | break; | ||
93 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
94 | case INDEX_op_andc_vec: | ||
95 | case INDEX_op_orc_vec: | ||
96 | case INDEX_op_cmp_vec: | ||
97 | + case INDEX_op_smax_vec: | ||
98 | + case INDEX_op_smin_vec: | ||
99 | + case INDEX_op_umax_vec: | ||
100 | + case INDEX_op_umin_vec: | ||
101 | return &v_v_v; | ||
102 | case INDEX_op_not_vec: | ||
103 | case INDEX_op_dup_vec: | ||
104 | -- | 35 | -- |
105 | 2.17.1 | 36 | 2.34.1 |
106 | 37 | ||
107 | 38 | diff view generated by jsdifflib |
1 | Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding | 1 | Bring the helpers into line with the rest of tcg in respecting |
---|---|---|---|
2 | elements of Altivec instructions. | 2 | guest memory ordering. |
3 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 6 | --- |
7 | tcg/ppc/tcg-target.inc.c | 5 +++++ | 7 | accel/tcg/internal.h | 34 ++++++++++++++++++++++++++++++++++ |
8 | 1 file changed, 5 insertions(+) | 8 | accel/tcg/cputlb.c | 10 ++++++++++ |
9 | 9 | accel/tcg/user-exec.c | 10 ++++++++++ | |
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 10 | 3 files changed, 54 insertions(+) |
11 | |||
12 | diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 14 | --- a/accel/tcg/internal.h |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 15 | +++ b/accel/tcg/internal.h |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 16 | @@ -XXX,XX +XXX,XX @@ extern int64_t max_advance; |
15 | #define MB64(b) ((b)<<5) | 17 | |
16 | #define FXM(b) (1 << (19 - (b))) | 18 | extern bool one_insn_per_tb; |
17 | 19 | ||
18 | +#define VRT(r) (((r) & 31) << 21) | 20 | +/** |
19 | +#define VRA(r) (((r) & 31) << 16) | 21 | + * tcg_req_mo: |
20 | +#define VRB(r) (((r) & 31) << 11) | 22 | + * @type: TCGBar |
21 | +#define VRC(r) (((r) & 31) << 6) | 23 | + * |
24 | + * Filter @type to the barrier that is required for the guest | ||
25 | + * memory ordering vs the host memory ordering. A non-zero | ||
26 | + * result indicates that some barrier is required. | ||
27 | + * | ||
28 | + * If TCG_GUEST_DEFAULT_MO is not defined, assume that the | ||
29 | + * guest requires strict ordering. | ||
30 | + * | ||
31 | + * This is a macro so that it's constant even without optimization. | ||
32 | + */ | ||
33 | +#ifdef TCG_GUEST_DEFAULT_MO | ||
34 | +# define tcg_req_mo(type) \ | ||
35 | + ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) | ||
36 | +#else | ||
37 | +# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO) | ||
38 | +#endif | ||
22 | + | 39 | + |
23 | #define LK 1 | 40 | +/** |
24 | 41 | + * cpu_req_mo: | |
25 | #define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) | 42 | + * @type: TCGBar |
43 | + * | ||
44 | + * If tcg_req_mo indicates a barrier for @type is required | ||
45 | + * for the guest memory model, issue a host memory barrier. | ||
46 | + */ | ||
47 | +#define cpu_req_mo(type) \ | ||
48 | + do { \ | ||
49 | + if (tcg_req_mo(type)) { \ | ||
50 | + smp_mb(); \ | ||
51 | + } \ | ||
52 | + } while (0) | ||
53 | + | ||
54 | #endif /* ACCEL_TCG_INTERNAL_H */ | ||
55 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/accel/tcg/cputlb.c | ||
58 | +++ b/accel/tcg/cputlb.c | ||
59 | @@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, | ||
60 | MMULookupLocals l; | ||
61 | bool crosspage; | ||
62 | |||
63 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
64 | crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); | ||
65 | tcg_debug_assert(!crosspage); | ||
66 | |||
67 | @@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, | ||
68 | uint16_t ret; | ||
69 | uint8_t a, b; | ||
70 | |||
71 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
72 | crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); | ||
73 | if (likely(!crosspage)) { | ||
74 | return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); | ||
75 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, | ||
76 | bool crosspage; | ||
77 | uint32_t ret; | ||
78 | |||
79 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
80 | crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); | ||
81 | if (likely(!crosspage)) { | ||
82 | return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); | ||
83 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, | ||
84 | bool crosspage; | ||
85 | uint64_t ret; | ||
86 | |||
87 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
88 | crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); | ||
89 | if (likely(!crosspage)) { | ||
90 | return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); | ||
91 | @@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, | ||
92 | Int128 ret; | ||
93 | int first; | ||
94 | |||
95 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
96 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l); | ||
97 | if (likely(!crosspage)) { | ||
98 | /* Perform the load host endian. */ | ||
99 | @@ -XXX,XX +XXX,XX @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val, | ||
100 | bool crosspage; | ||
101 | |||
102 | tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8); | ||
103 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
104 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); | ||
105 | tcg_debug_assert(!crosspage); | ||
106 | |||
107 | @@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val, | ||
108 | bool crosspage; | ||
109 | uint8_t a, b; | ||
110 | |||
111 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
112 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); | ||
113 | if (likely(!crosspage)) { | ||
114 | do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); | ||
115 | @@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val, | ||
116 | MMULookupLocals l; | ||
117 | bool crosspage; | ||
118 | |||
119 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
120 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); | ||
121 | if (likely(!crosspage)) { | ||
122 | do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); | ||
123 | @@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val, | ||
124 | MMULookupLocals l; | ||
125 | bool crosspage; | ||
126 | |||
127 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
128 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); | ||
129 | if (likely(!crosspage)) { | ||
130 | do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); | ||
131 | @@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, | ||
132 | uint64_t a, b; | ||
133 | int first; | ||
134 | |||
135 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
136 | crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); | ||
137 | if (likely(!crosspage)) { | ||
138 | /* Swap to host endian if necessary, then store. */ | ||
139 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
140 | index XXXXXXX..XXXXXXX 100644 | ||
141 | --- a/accel/tcg/user-exec.c | ||
142 | +++ b/accel/tcg/user-exec.c | ||
143 | @@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr, | ||
144 | uint8_t ret; | ||
145 | |||
146 | tcg_debug_assert((mop & MO_SIZE) == MO_8); | ||
147 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
148 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); | ||
149 | ret = ldub_p(haddr); | ||
150 | clear_helper_retaddr(); | ||
151 | @@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr, | ||
152 | uint16_t ret; | ||
153 | |||
154 | tcg_debug_assert((mop & MO_SIZE) == MO_16); | ||
155 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
156 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); | ||
157 | ret = load_atom_2(env, ra, haddr, mop); | ||
158 | clear_helper_retaddr(); | ||
159 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr, | ||
160 | uint32_t ret; | ||
161 | |||
162 | tcg_debug_assert((mop & MO_SIZE) == MO_32); | ||
163 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
164 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); | ||
165 | ret = load_atom_4(env, ra, haddr, mop); | ||
166 | clear_helper_retaddr(); | ||
167 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr, | ||
168 | uint64_t ret; | ||
169 | |||
170 | tcg_debug_assert((mop & MO_SIZE) == MO_64); | ||
171 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
172 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); | ||
173 | ret = load_atom_8(env, ra, haddr, mop); | ||
174 | clear_helper_retaddr(); | ||
175 | @@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr, | ||
176 | Int128 ret; | ||
177 | |||
178 | tcg_debug_assert((mop & MO_SIZE) == MO_128); | ||
179 | + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
180 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); | ||
181 | ret = load_atom_16(env, ra, haddr, mop); | ||
182 | clear_helper_retaddr(); | ||
183 | @@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, | ||
184 | void *haddr; | ||
185 | |||
186 | tcg_debug_assert((mop & MO_SIZE) == MO_8); | ||
187 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
188 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); | ||
189 | stb_p(haddr, val); | ||
190 | clear_helper_retaddr(); | ||
191 | @@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, | ||
192 | void *haddr; | ||
193 | |||
194 | tcg_debug_assert((mop & MO_SIZE) == MO_16); | ||
195 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
196 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); | ||
197 | |||
198 | if (mop & MO_BSWAP) { | ||
199 | @@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, | ||
200 | void *haddr; | ||
201 | |||
202 | tcg_debug_assert((mop & MO_SIZE) == MO_32); | ||
203 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
204 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); | ||
205 | |||
206 | if (mop & MO_BSWAP) { | ||
207 | @@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, | ||
208 | void *haddr; | ||
209 | |||
210 | tcg_debug_assert((mop & MO_SIZE) == MO_64); | ||
211 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
212 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); | ||
213 | |||
214 | if (mop & MO_BSWAP) { | ||
215 | @@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, | ||
216 | void *haddr; | ||
217 | |||
218 | tcg_debug_assert((mop & MO_SIZE) == MO_128); | ||
219 | + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
220 | haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); | ||
221 | |||
222 | if (mop & MO_BSWAP) { | ||
26 | -- | 223 | -- |
27 | 2.17.1 | 224 | 2.34.1 |
28 | 225 | ||
29 | 226 | diff view generated by jsdifflib |
1 | For Altivec, this is done via vector shift by vector, | 1 | We now issue host memory barriers to match the guest memory order. |
---|---|---|---|
2 | and loading the immediate into a register. | 2 | Continue to disable MTTCG only if the guest has not been ported. |
3 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 6 | --- |
7 | tcg/ppc/tcg-target.h | 2 +- | 7 | accel/tcg/tcg-all.c | 39 ++++++++++----------------------------- |
8 | tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++-- | 8 | 1 file changed, 10 insertions(+), 29 deletions(-) |
9 | 2 files changed, 57 insertions(+), 3 deletions(-) | ||
10 | 9 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 10 | diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c |
12 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 12 | --- a/accel/tcg/tcg-all.c |
14 | +++ b/tcg/ppc/tcg-target.h | 13 | +++ b/accel/tcg/tcg-all.c |
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 14 | @@ -XXX,XX +XXX,XX @@ DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE, |
16 | #define TCG_TARGET_HAS_abs_vec 0 | 15 | * they can set the appropriate CONFIG flags in ${target}-softmmu.mak |
17 | #define TCG_TARGET_HAS_shi_vec 0 | 16 | * |
18 | #define TCG_TARGET_HAS_shs_vec 0 | 17 | * Once a guest architecture has been converted to the new primitives |
19 | -#define TCG_TARGET_HAS_shv_vec 0 | 18 | - * there are two remaining limitations to check. |
20 | +#define TCG_TARGET_HAS_shv_vec 1 | 19 | - * |
21 | #define TCG_TARGET_HAS_cmp_vec 1 | 20 | - * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) |
22 | #define TCG_TARGET_HAS_mul_vec 0 | 21 | - * - The host must have a stronger memory order than the guest |
23 | #define TCG_TARGET_HAS_sat_vec 1 | 22 | - * |
24 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 23 | - * It may be possible in future to support strong guests on weak hosts |
25 | index XXXXXXX..XXXXXXX 100644 | 24 | - * but that will require tagging all load/stores in a guest with their |
26 | --- a/tcg/ppc/tcg-target.inc.c | 25 | - * implicit memory order requirements which would likely slow things |
27 | +++ b/tcg/ppc/tcg-target.inc.c | 26 | - * down a lot. |
28 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 27 | + * there is one remaining limitation to check: |
29 | #define VCMPGTUH VX4(582) | 28 | + * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) |
30 | #define VCMPGTUW VX4(646) | 29 | */ |
31 | 30 | ||
32 | +#define VSLB VX4(260) | 31 | -static bool check_tcg_memory_orders_compatible(void) |
33 | +#define VSLH VX4(324) | 32 | -{ |
34 | +#define VSLW VX4(388) | 33 | -#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO) |
35 | +#define VSRB VX4(516) | 34 | - return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0; |
36 | +#define VSRH VX4(580) | 35 | -#else |
37 | +#define VSRW VX4(644) | 36 | - return false; |
38 | +#define VSRAB VX4(772) | 37 | -#endif |
39 | +#define VSRAH VX4(836) | 38 | -} |
40 | +#define VSRAW VX4(900) | 39 | - |
41 | + | 40 | static bool default_mttcg_enabled(void) |
42 | #define VAND VX4(1028) | 41 | { |
43 | #define VANDC VX4(1092) | 42 | if (icount_enabled() || TCG_OVERSIZED_GUEST) { |
44 | #define VNOR VX4(1284) | 43 | return false; |
45 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 44 | - } else { |
46 | case INDEX_op_sssub_vec: | 45 | -#ifdef TARGET_SUPPORTS_MTTCG |
47 | case INDEX_op_usadd_vec: | 46 | - return check_tcg_memory_orders_compatible(); |
48 | case INDEX_op_ussub_vec: | 47 | -#else |
49 | + case INDEX_op_shlv_vec: | 48 | - return false; |
50 | + case INDEX_op_shrv_vec: | 49 | -#endif |
51 | + case INDEX_op_sarv_vec: | 50 | } |
52 | return vece <= MO_32; | 51 | +#ifdef TARGET_SUPPORTS_MTTCG |
53 | case INDEX_op_cmp_vec: | 52 | +# ifndef TCG_GUEST_DEFAULT_MO |
54 | + case INDEX_op_shli_vec: | 53 | +# error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO" |
55 | + case INDEX_op_shri_vec: | 54 | +# endif |
56 | + case INDEX_op_sari_vec: | 55 | + return true; |
57 | return vece <= MO_32 ? -1 : 0; | 56 | +#else |
58 | default: | 57 | + return false; |
59 | return 0; | 58 | +#endif |
60 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
61 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
62 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
63 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
64 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
65 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
66 | + shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
67 | + shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
68 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
69 | |||
70 | TCGType type = vecl + TCG_TYPE_V64; | ||
71 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
73 | case INDEX_op_umax_vec: | ||
74 | insn = umax_op[vece]; | ||
75 | break; | ||
76 | + case INDEX_op_shlv_vec: | ||
77 | + insn = shlv_op[vece]; | ||
78 | + break; | ||
79 | + case INDEX_op_shrv_vec: | ||
80 | + insn = shrv_op[vece]; | ||
81 | + break; | ||
82 | + case INDEX_op_sarv_vec: | ||
83 | + insn = sarv_op[vece]; | ||
84 | + break; | ||
85 | case INDEX_op_and_vec: | ||
86 | insn = VAND; | ||
87 | break; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
89 | tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
90 | } | 59 | } |
91 | 60 | ||
92 | +static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, | 61 | static void tcg_accel_instance_init(Object *obj) |
93 | + TCGv_vec v1, TCGArg imm, TCGOpcode opci) | 62 | @@ -XXX,XX +XXX,XX @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp) |
94 | +{ | 63 | warn_report("Guest not yet converted to MTTCG - " |
95 | + TCGv_vec t1 = tcg_temp_new_vec(type); | 64 | "you may get unexpected results"); |
96 | + | 65 | #endif |
97 | + /* Splat w/bytes for xxspltib. */ | 66 | - if (!check_tcg_memory_orders_compatible()) { |
98 | + tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1)); | 67 | - warn_report("Guest expects a stronger memory ordering " |
99 | + vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), | 68 | - "than the host provides"); |
100 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); | 69 | - error_printf("This may cause strange/hard to debug errors\n"); |
101 | + tcg_temp_free_vec(t1); | 70 | - } |
102 | +} | 71 | s->mttcg_enabled = true; |
103 | + | 72 | } |
104 | static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | 73 | } else if (strcmp(value, "single") == 0) { |
105 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
106 | { | ||
107 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
108 | { | ||
109 | va_list va; | ||
110 | TCGv_vec v0, v1, v2; | ||
111 | + TCGArg a2; | ||
112 | |||
113 | va_start(va, a0); | ||
114 | v0 = temp_tcgv_vec(arg_temp(a0)); | ||
115 | v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
116 | - v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
117 | + a2 = va_arg(va, TCGArg); | ||
118 | |||
119 | switch (opc) { | ||
120 | + case INDEX_op_shli_vec: | ||
121 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); | ||
122 | + break; | ||
123 | + case INDEX_op_shri_vec: | ||
124 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); | ||
125 | + break; | ||
126 | + case INDEX_op_sari_vec: | ||
127 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); | ||
128 | + break; | ||
129 | case INDEX_op_cmp_vec: | ||
130 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
131 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
132 | break; | ||
133 | default: | ||
134 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
135 | case INDEX_op_smin_vec: | ||
136 | case INDEX_op_umax_vec: | ||
137 | case INDEX_op_umin_vec: | ||
138 | + case INDEX_op_shlv_vec: | ||
139 | + case INDEX_op_shrv_vec: | ||
140 | + case INDEX_op_sarv_vec: | ||
141 | return &v_v_v; | ||
142 | case INDEX_op_not_vec: | ||
143 | case INDEX_op_dup_vec: | ||
144 | -- | 74 | -- |
145 | 2.17.1 | 75 | 2.34.1 |
146 | 76 | ||
147 | 77 | diff view generated by jsdifflib |
1 | This is identical to have_isa_2_06, so replace it. | 1 | We have run out of bits we can use within the CPUTLBEntry comparators, |
---|---|---|---|
2 | 2 | as TLB_FLAGS_MASK cannot overlap alignment. | |
3 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 3 | |
4 | Store slow_flags[] in CPUTLBEntryFull, and merge with the flags from | ||
5 | the comparator. A new TLB_FORCE_SLOW bit is set within the comparator | ||
6 | as an indication that the slow path must be used. | ||
7 | |||
8 | Move TLB_BSWAP to TLB_SLOW_FLAGS_MASK. Since we are out of bits, | ||
9 | we cannot create a new bit without moving an old one. | ||
10 | |||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 13 | --- |
6 | tcg/ppc/tcg-target.inc.c | 5 ++--- | 14 | include/exec/cpu-all.h | 21 +++++++-- |
7 | 1 file changed, 2 insertions(+), 3 deletions(-) | 15 | include/exec/cpu-defs.h | 6 +++ |
8 | 16 | include/hw/core/cpu.h | 1 + | |
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 17 | accel/tcg/cputlb.c | 98 ++++++++++++++++++++++++----------------- |
10 | index XXXXXXX..XXXXXXX 100644 | 18 | 4 files changed, 82 insertions(+), 44 deletions(-) |
11 | --- a/tcg/ppc/tcg-target.inc.c | 19 | |
12 | +++ b/tcg/ppc/tcg-target.inc.c | 20 | diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h |
13 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 21 | index XXXXXXX..XXXXXXX 100644 |
14 | 22 | --- a/include/exec/cpu-all.h | |
15 | TCGPowerISA have_isa; | 23 | +++ b/include/exec/cpu-all.h |
16 | 24 | @@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env); | |
17 | -#define HAVE_ISA_2_06 have_isa_2_06 | 25 | #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) |
18 | #define HAVE_ISEL have_isa_2_06 | 26 | /* Set if TLB entry contains a watchpoint. */ |
19 | 27 | #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) | |
20 | #ifndef CONFIG_SOFTMMU | 28 | -/* Set if TLB entry requires byte swap. */ |
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) | 29 | -#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) |
30 | +/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ | ||
31 | +#define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) | ||
32 | /* Set if TLB entry writes ignored. */ | ||
33 | #define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) | ||
34 | |||
35 | -/* Use this mask to check interception with an alignment mask | ||
36 | +/* | ||
37 | + * Use this mask to check interception with an alignment mask | ||
38 | * in a TCG backend. | ||
39 | */ | ||
40 | #define TLB_FLAGS_MASK \ | ||
41 | (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ | ||
42 | - | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) | ||
43 | + | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) | ||
44 | + | ||
45 | +/* | ||
46 | + * Flags stored in CPUTLBEntryFull.slow_flags[x]. | ||
47 | + * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x]. | ||
48 | + */ | ||
49 | +/* Set if TLB entry requires byte swap. */ | ||
50 | +#define TLB_BSWAP (1 << 0) | ||
51 | + | ||
52 | +#define TLB_SLOW_FLAGS_MASK TLB_BSWAP | ||
53 | + | ||
54 | +/* The two sets of flags must not overlap. */ | ||
55 | +QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK); | ||
56 | |||
57 | /** | ||
58 | * tlb_hit_page: return true if page aligned @addr is a hit against the | ||
59 | diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/include/exec/cpu-defs.h | ||
62 | +++ b/include/exec/cpu-defs.h | ||
63 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull { | ||
64 | /* @lg_page_size contains the log2 of the page size. */ | ||
65 | uint8_t lg_page_size; | ||
66 | |||
67 | + /* | ||
68 | + * Additional tlb flags for use by the slow path. If non-zero, | ||
69 | + * the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW. | ||
70 | + */ | ||
71 | + uint8_t slow_flags[MMU_ACCESS_COUNT]; | ||
72 | + | ||
73 | /* | ||
74 | * Allow target-specific additions to this structure. | ||
75 | * This may be used to cache items from the guest cpu | ||
76 | diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/include/hw/core/cpu.h | ||
79 | +++ b/include/hw/core/cpu.h | ||
80 | @@ -XXX,XX +XXX,XX @@ typedef enum MMUAccessType { | ||
81 | MMU_DATA_LOAD = 0, | ||
82 | MMU_DATA_STORE = 1, | ||
83 | MMU_INST_FETCH = 2 | ||
84 | +#define MMU_ACCESS_COUNT 3 | ||
85 | } MMUAccessType; | ||
86 | |||
87 | typedef struct CPUWatchpoint CPUWatchpoint; | ||
88 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/accel/tcg/cputlb.c | ||
91 | +++ b/accel/tcg/cputlb.c | ||
92 | @@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, | ||
93 | env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; | ||
94 | } | ||
95 | |||
96 | +static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent, | ||
97 | + target_ulong address, int flags, | ||
98 | + MMUAccessType access_type, bool enable) | ||
99 | +{ | ||
100 | + if (enable) { | ||
101 | + address |= flags & TLB_FLAGS_MASK; | ||
102 | + flags &= TLB_SLOW_FLAGS_MASK; | ||
103 | + if (flags) { | ||
104 | + address |= TLB_FORCE_SLOW; | ||
105 | + } | ||
106 | + } else { | ||
107 | + address = -1; | ||
108 | + flags = 0; | ||
109 | + } | ||
110 | + ent->addr_idx[access_type] = address; | ||
111 | + full->slow_flags[access_type] = flags; | ||
112 | +} | ||
113 | + | ||
114 | /* | ||
115 | * Add a new TLB entry. At most one entry for a given virtual address | ||
116 | * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the | ||
117 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
118 | CPUTLB *tlb = env_tlb(env); | ||
119 | CPUTLBDesc *desc = &tlb->d[mmu_idx]; | ||
120 | MemoryRegionSection *section; | ||
121 | - unsigned int index; | ||
122 | - vaddr address; | ||
123 | - vaddr write_address; | ||
124 | + unsigned int index, read_flags, write_flags; | ||
125 | uintptr_t addend; | ||
126 | CPUTLBEntry *te, tn; | ||
127 | hwaddr iotlb, xlat, sz, paddr_page; | ||
128 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
129 | " prot=%x idx=%d\n", | ||
130 | addr, full->phys_addr, prot, mmu_idx); | ||
131 | |||
132 | - address = addr_page; | ||
133 | + read_flags = 0; | ||
134 | if (full->lg_page_size < TARGET_PAGE_BITS) { | ||
135 | /* Repeat the MMU check and TLB fill on every access. */ | ||
136 | - address |= TLB_INVALID_MASK; | ||
137 | + read_flags |= TLB_INVALID_MASK; | ||
138 | } | ||
139 | if (full->attrs.byte_swap) { | ||
140 | - address |= TLB_BSWAP; | ||
141 | + read_flags |= TLB_BSWAP; | ||
142 | } | ||
143 | |||
144 | is_ram = memory_region_is_ram(section->mr); | ||
145 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
146 | addend = 0; | ||
147 | } | ||
148 | |||
149 | - write_address = address; | ||
150 | + write_flags = read_flags; | ||
151 | if (is_ram) { | ||
152 | iotlb = memory_region_get_ram_addr(section->mr) + xlat; | ||
153 | /* | ||
154 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
155 | */ | ||
156 | if (prot & PAGE_WRITE) { | ||
157 | if (section->readonly) { | ||
158 | - write_address |= TLB_DISCARD_WRITE; | ||
159 | + write_flags |= TLB_DISCARD_WRITE; | ||
160 | } else if (cpu_physical_memory_is_clean(iotlb)) { | ||
161 | - write_address |= TLB_NOTDIRTY; | ||
162 | + write_flags |= TLB_NOTDIRTY; | ||
163 | } | ||
22 | } | 164 | } |
23 | } else { | 165 | } else { |
24 | uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; | 166 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, |
25 | - if (!HAVE_ISA_2_06 && insn == LDBRX) { | 167 | * Reads to romd devices go through the ram_ptr found above, |
26 | + if (!have_isa_2_06 && insn == LDBRX) { | 168 | * but of course reads to I/O must go through MMIO. |
27 | tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); | 169 | */ |
28 | tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); | 170 | - write_address |= TLB_MMIO; |
29 | tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); | 171 | + write_flags |= TLB_MMIO; |
30 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) | 172 | if (!is_romd) { |
173 | - address = write_address; | ||
174 | + read_flags = write_flags; | ||
31 | } | 175 | } |
32 | } else { | 176 | } |
33 | uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; | 177 | |
34 | - if (!HAVE_ISA_2_06 && insn == STDBRX) { | 178 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, |
35 | + if (!have_isa_2_06 && insn == STDBRX) { | 179 | * TARGET_PAGE_BITS, and either |
36 | tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); | 180 | * + the ram_addr_t of the page base of the target RAM (RAM) |
37 | tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); | 181 | * + the offset within section->mr of the page base (I/O, ROMD) |
38 | tcg_out_shri64(s, TCG_REG_R0, datalo, 32); | 182 | - * We subtract the vaddr_page (which is page aligned and thus won't |
183 | + * We subtract addr_page (which is page aligned and thus won't | ||
184 | * disturb the low bits) to give an offset which can be added to the | ||
185 | * (non-page-aligned) vaddr of the eventual memory access to get | ||
186 | * the MemoryRegion offset for the access. Note that the vaddr we | ||
187 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
188 | * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). | ||
189 | */ | ||
190 | desc->fulltlb[index] = *full; | ||
191 | - desc->fulltlb[index].xlat_section = iotlb - addr_page; | ||
192 | - desc->fulltlb[index].phys_addr = paddr_page; | ||
193 | + full = &desc->fulltlb[index]; | ||
194 | + full->xlat_section = iotlb - addr_page; | ||
195 | + full->phys_addr = paddr_page; | ||
196 | |||
197 | /* Now calculate the new entry */ | ||
198 | tn.addend = addend - addr_page; | ||
199 | - if (prot & PAGE_READ) { | ||
200 | - tn.addr_read = address; | ||
201 | - if (wp_flags & BP_MEM_READ) { | ||
202 | - tn.addr_read |= TLB_WATCHPOINT; | ||
203 | - } | ||
204 | - } else { | ||
205 | - tn.addr_read = -1; | ||
206 | - } | ||
207 | |||
208 | - if (prot & PAGE_EXEC) { | ||
209 | - tn.addr_code = address; | ||
210 | - } else { | ||
211 | - tn.addr_code = -1; | ||
212 | - } | ||
213 | + tlb_set_compare(full, &tn, addr_page, read_flags, | ||
214 | + MMU_INST_FETCH, prot & PAGE_EXEC); | ||
215 | |||
216 | - tn.addr_write = -1; | ||
217 | - if (prot & PAGE_WRITE) { | ||
218 | - tn.addr_write = write_address; | ||
219 | - if (prot & PAGE_WRITE_INV) { | ||
220 | - tn.addr_write |= TLB_INVALID_MASK; | ||
221 | - } | ||
222 | - if (wp_flags & BP_MEM_WRITE) { | ||
223 | - tn.addr_write |= TLB_WATCHPOINT; | ||
224 | - } | ||
225 | + if (wp_flags & BP_MEM_READ) { | ||
226 | + read_flags |= TLB_WATCHPOINT; | ||
227 | } | ||
228 | + tlb_set_compare(full, &tn, addr_page, read_flags, | ||
229 | + MMU_DATA_LOAD, prot & PAGE_READ); | ||
230 | + | ||
231 | + if (prot & PAGE_WRITE_INV) { | ||
232 | + write_flags |= TLB_INVALID_MASK; | ||
233 | + } | ||
234 | + if (wp_flags & BP_MEM_WRITE) { | ||
235 | + write_flags |= TLB_WATCHPOINT; | ||
236 | + } | ||
237 | + tlb_set_compare(full, &tn, addr_page, write_flags, | ||
238 | + MMU_DATA_STORE, prot & PAGE_WRITE); | ||
239 | |||
240 | copy_tlb_helper_locked(te, &tn); | ||
241 | tlb_n_used_entries_inc(env, mmu_idx); | ||
242 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, vaddr addr, | ||
243 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
244 | uint64_t tlb_addr = tlb_read_idx(entry, access_type); | ||
245 | vaddr page_addr = addr & TARGET_PAGE_MASK; | ||
246 | - int flags = TLB_FLAGS_MASK; | ||
247 | + int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW; | ||
248 | + CPUTLBEntryFull *full; | ||
249 | |||
250 | if (!tlb_hit_page(tlb_addr, page_addr)) { | ||
251 | if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) { | ||
252 | @@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, vaddr addr, | ||
253 | } | ||
254 | flags &= tlb_addr; | ||
255 | |||
256 | - *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index]; | ||
257 | + *pfull = full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; | ||
258 | + flags |= full->slow_flags[access_type]; | ||
259 | |||
260 | /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ | ||
261 | if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { | ||
262 | @@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, | ||
263 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
264 | uint64_t tlb_addr = tlb_read_idx(entry, access_type); | ||
265 | bool maybe_resized = false; | ||
266 | + CPUTLBEntryFull *full; | ||
267 | + int flags; | ||
268 | |||
269 | /* If the TLB entry is for a different page, reload and try again. */ | ||
270 | if (!tlb_hit(tlb_addr, addr)) { | ||
271 | @@ -XXX,XX +XXX,XX @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, | ||
272 | tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK; | ||
273 | } | ||
274 | |||
275 | - data->flags = tlb_addr & TLB_FLAGS_MASK; | ||
276 | - data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; | ||
277 | + full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; | ||
278 | + flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW); | ||
279 | + flags |= full->slow_flags[access_type]; | ||
280 | + | ||
281 | + data->full = full; | ||
282 | + data->flags = flags; | ||
283 | /* Compute haddr speculatively; depending on flags it might be invalid. */ | ||
284 | data->haddr = (void *)((uintptr_t)addr + entry->addend); | ||
285 | |||
39 | -- | 286 | -- |
40 | 2.17.1 | 287 | 2.34.1 |
41 | 288 | ||
42 | 289 | diff view generated by jsdifflib |
1 | The VSX instruction set instructions include double-word loads and | 1 | This frees up one bit of the primary tlb flags without |
---|---|---|---|
2 | stores, double-word load and splat, double-word permute, and bit | 2 | impacting the TLB_NOTDIRTY logic. |
3 | select. All of which require multiple operations in the Altivec | ||
4 | instruction set. | ||
5 | 3 | ||
6 | Because the VSX registers map %vsr32 to %vr0, and we have no current | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | intention or need to use vector registers outside %vr0-%vr19, force | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't | 6 | --- |
9 | have to otherwise modify the VR[TABC] macros. | 7 | include/exec/cpu-all.h | 8 ++++---- |
8 | accel/tcg/cputlb.c | 18 ++++++++++++++---- | ||
9 | 2 files changed, 18 insertions(+), 8 deletions(-) | ||
10 | 10 | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h |
12 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
13 | --- | ||
14 | tcg/ppc/tcg-target.h | 5 ++-- | ||
15 | tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++---- | ||
16 | 2 files changed, 51 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/tcg/ppc/tcg-target.h | 13 | --- a/include/exec/cpu-all.h |
21 | +++ b/tcg/ppc/tcg-target.h | 14 | +++ b/include/exec/cpu-all.h |
22 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 15 | @@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env); |
23 | 16 | #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) | |
24 | extern TCGPowerISA have_isa; | 17 | /* Set if TLB entry is an IO callback. */ |
25 | extern bool have_altivec; | 18 | #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) |
26 | +extern bool have_vsx; | 19 | -/* Set if TLB entry contains a watchpoint. */ |
27 | 20 | -#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) | |
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | 21 | /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ |
29 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | 22 | #define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) |
30 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 23 | /* Set if TLB entry writes ignored. */ |
31 | * instruction and substituting two 32-bit stores makes the generated | 24 | @@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env); |
32 | * code quite large. | ||
33 | */ | 25 | */ |
34 | -#define TCG_TARGET_HAS_v64 0 | 26 | #define TLB_FLAGS_MASK \ |
35 | +#define TCG_TARGET_HAS_v64 have_vsx | 27 | (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ |
36 | #define TCG_TARGET_HAS_v128 have_altivec | 28 | - | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) |
37 | #define TCG_TARGET_HAS_v256 0 | 29 | + | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) |
38 | 30 | ||
39 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 31 | /* |
40 | #define TCG_TARGET_HAS_mul_vec 1 | 32 | * Flags stored in CPUTLBEntryFull.slow_flags[x]. |
41 | #define TCG_TARGET_HAS_sat_vec 1 | 33 | @@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env); |
42 | #define TCG_TARGET_HAS_minmax_vec 1 | 34 | */ |
43 | -#define TCG_TARGET_HAS_bitsel_vec 0 | 35 | /* Set if TLB entry requires byte swap. */ |
44 | +#define TCG_TARGET_HAS_bitsel_vec have_vsx | 36 | #define TLB_BSWAP (1 << 0) |
45 | #define TCG_TARGET_HAS_cmpsel_vec 0 | 37 | +/* Set if TLB entry contains a watchpoint. */ |
46 | 38 | +#define TLB_WATCHPOINT (1 << 1) | |
47 | void flush_icache_range(uintptr_t start, uintptr_t stop); | 39 | |
48 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 40 | -#define TLB_SLOW_FLAGS_MASK TLB_BSWAP |
41 | +#define TLB_SLOW_FLAGS_MASK (TLB_BSWAP | TLB_WATCHPOINT) | ||
42 | |||
43 | /* The two sets of flags must not overlap. */ | ||
44 | QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK); | ||
45 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/tcg/ppc/tcg-target.inc.c | 47 | --- a/accel/tcg/cputlb.c |
51 | +++ b/tcg/ppc/tcg-target.inc.c | 48 | +++ b/accel/tcg/cputlb.c |
52 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 49 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
53 | TCGPowerISA have_isa; | 50 | */ |
54 | static bool have_isel; | 51 | goto stop_the_world; |
55 | bool have_altivec; | 52 | } |
56 | +bool have_vsx; | 53 | - /* Collect TLB_WATCHPOINT for read. */ |
57 | 54 | + /* Collect tlb flags for read. */ | |
58 | #ifndef CONFIG_SOFTMMU | 55 | tlb_addr |= tlbe->addr_read; |
59 | #define TCG_GUEST_BASE_REG 30 | 56 | |
60 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 57 | /* Notice an IO access or a needs-MMU-lookup access */ |
61 | #define LVEBX XO31(7) | 58 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, |
62 | #define LVEHX XO31(39) | 59 | notdirty_write(env_cpu(env), addr, size, full, retaddr); |
63 | #define LVEWX XO31(71) | 60 | } |
64 | +#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 61 | |
65 | +#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 62 | - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { |
66 | 63 | - cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, | |
67 | #define STVX XO31(231) | 64 | - BP_MEM_READ | BP_MEM_WRITE, retaddr); |
68 | #define STVEWX XO31(199) | 65 | + if (unlikely(tlb_addr & TLB_FORCE_SLOW)) { |
69 | +#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 66 | + int wp_flags = 0; |
70 | |||
71 | #define VADDSBS VX4(768) | ||
72 | #define VADDUBS VX4(512) | ||
73 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
74 | |||
75 | #define VSLDOI VX4(44) | ||
76 | |||
77 | +#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | ||
78 | +#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | ||
79 | + | 67 | + |
80 | #define RT(r) ((r)<<21) | 68 | + if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) { |
81 | #define RS(r) ((r)<<21) | 69 | + wp_flags |= BP_MEM_WRITE; |
82 | #define RA(r) ((r)<<16) | ||
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
84 | add = 0; | ||
85 | } | ||
86 | |||
87 | - load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
88 | - if (TCG_TARGET_REG_BITS == 64) { | ||
89 | - new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
90 | + if (have_vsx) { | ||
91 | + load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; | ||
92 | + load_insn |= VRT(ret) | RB(TCG_REG_TMP1); | ||
93 | + if (TCG_TARGET_REG_BITS == 64) { | ||
94 | + new_pool_label(s, val, rel, s->code_ptr, add); | ||
95 | + } else { | ||
96 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
97 | + } | 70 | + } |
98 | } else { | 71 | + if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) { |
99 | - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | 72 | + wp_flags |= BP_MEM_READ; |
100 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | 73 | + } |
101 | + if (TCG_TARGET_REG_BITS == 64) { | 74 | + if (wp_flags) { |
102 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | 75 | + cpu_check_watchpoint(env_cpu(env), addr, size, |
103 | + } else { | 76 | + full->attrs, wp_flags, retaddr); |
104 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
105 | + } | 77 | + } |
106 | } | 78 | } |
107 | 79 | ||
108 | if (USE_REG_TB) { | 80 | return hostaddr; |
109 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
110 | /* fallthru */ | ||
111 | case TCG_TYPE_V64: | ||
112 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
113 | + if (have_vsx) { | ||
114 | + tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
115 | + break; | ||
116 | + } | ||
117 | tcg_debug_assert((offset & 7) == 0); | ||
118 | tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
119 | if (offset & 8) { | ||
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
121 | /* fallthru */ | ||
122 | case TCG_TYPE_V64: | ||
123 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
124 | + if (have_vsx) { | ||
125 | + tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | ||
126 | + break; | ||
127 | + } | ||
128 | tcg_debug_assert((offset & 7) == 0); | ||
129 | if (offset & 8) { | ||
130 | tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
131 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
132 | case INDEX_op_shri_vec: | ||
133 | case INDEX_op_sari_vec: | ||
134 | return vece <= MO_32 ? -1 : 0; | ||
135 | + case INDEX_op_bitsel_vec: | ||
136 | + return have_vsx; | ||
137 | default: | ||
138 | return 0; | ||
139 | } | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
141 | tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
142 | break; | ||
143 | case MO_64: | ||
144 | + if (have_vsx) { | ||
145 | + tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); | ||
146 | + break; | ||
147 | + } | ||
148 | tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
149 | tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
150 | break; | ||
151 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
152 | tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
153 | break; | ||
154 | case MO_64: | ||
155 | + if (have_vsx) { | ||
156 | + tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); | ||
157 | + break; | ||
158 | + } | ||
159 | tcg_debug_assert((offset & 7) == 0); | ||
160 | tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
161 | tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
162 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
163 | } | ||
164 | break; | ||
165 | |||
166 | + case INDEX_op_bitsel_vec: | ||
167 | + tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); | ||
168 | + return; | ||
169 | + | ||
170 | case INDEX_op_dup2_vec: | ||
171 | assert(TCG_TARGET_REG_BITS == 32); | ||
172 | /* With inputs a1 = xLxx, a2 = xHxx */ | ||
173 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
174 | case INDEX_op_st_vec: | ||
175 | case INDEX_op_dupm_vec: | ||
176 | return &v_r; | ||
177 | + case INDEX_op_bitsel_vec: | ||
178 | case INDEX_op_ppc_msum_vec: | ||
179 | return &v_v_v_v; | ||
180 | |||
181 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
182 | |||
183 | if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | ||
184 | have_altivec = true; | ||
185 | + /* We only care about the portion of VSX that overlaps Altivec. */ | ||
186 | + if (hwcap & PPC_FEATURE_HAS_VSX) { | ||
187 | + have_vsx = true; | ||
188 | + } | ||
189 | } | ||
190 | |||
191 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
192 | -- | 81 | -- |
193 | 2.17.1 | 82 | 2.34.1 |
194 | 83 | ||
195 | 84 | diff view generated by jsdifflib |
1 | Introduce an enum to hold base < 2.06 < 3.00. Use macros to | 1 | Move to fill a hole in the set of bits. |
---|---|---|---|
2 | preserve the existing have_isa_2_06 and have_isa_3_00 predicates. | 2 | Reduce the total number of tlb bits by 1. |
3 | 3 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 6 | --- |
7 | tcg/ppc/tcg-target.h | 12 ++++++++++-- | 7 | include/exec/cpu-all.h | 4 ++-- |
8 | tcg/ppc/tcg-target.inc.c | 8 ++++---- | 8 | tcg/tcg-op-ldst.c | 2 +- |
9 | 2 files changed, 14 insertions(+), 6 deletions(-) | 9 | 2 files changed, 3 insertions(+), 3 deletions(-) |
10 | 10 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 11 | diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 13 | --- a/include/exec/cpu-all.h |
14 | +++ b/tcg/ppc/tcg-target.h | 14 | +++ b/include/exec/cpu-all.h |
15 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 15 | @@ -XXX,XX +XXX,XX @@ CPUArchState *cpu_copy(CPUArchState *env); |
16 | TCG_AREG0 = TCG_REG_R27 | 16 | #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) |
17 | } TCGReg; | 17 | /* Set if TLB entry is an IO callback. */ |
18 | 18 | #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) | |
19 | -extern bool have_isa_2_06; | 19 | +/* Set if TLB entry writes ignored. */ |
20 | -extern bool have_isa_3_00; | 20 | +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 4)) |
21 | +typedef enum { | 21 | /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ |
22 | + tcg_isa_base, | 22 | #define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) |
23 | + tcg_isa_2_06, | 23 | -/* Set if TLB entry writes ignored. */ |
24 | + tcg_isa_3_00, | 24 | -#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) |
25 | +} TCGPowerISA; | 25 | |
26 | + | 26 | /* |
27 | +extern TCGPowerISA have_isa; | 27 | * Use this mask to check interception with an alignment mask |
28 | + | 28 | diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c |
29 | +#define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
30 | +#define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ | ||
34 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/tcg/ppc/tcg-target.inc.c | 30 | --- a/tcg/tcg-op-ldst.c |
37 | +++ b/tcg/ppc/tcg-target.inc.c | 31 | +++ b/tcg/tcg-op-ldst.c |
38 | @@ -XXX,XX +XXX,XX @@ | 32 | @@ -XXX,XX +XXX,XX @@ static void check_max_alignment(unsigned a_bits) |
39 | 33 | * The requested alignment cannot overlap the TLB flags. | |
40 | static tcg_insn_unit *tb_ret_addr; | 34 | * FIXME: Must keep the count up-to-date with "exec/cpu-all.h". |
41 | 35 | */ | |
42 | -bool have_isa_2_06; | 36 | - tcg_debug_assert(a_bits + 6 <= tcg_ctx->page_bits); |
43 | -bool have_isa_3_00; | 37 | + tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits); |
44 | +TCGPowerISA have_isa; | ||
45 | |||
46 | #define HAVE_ISA_2_06 have_isa_2_06 | ||
47 | #define HAVE_ISEL have_isa_2_06 | ||
48 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
49 | unsigned long hwcap = qemu_getauxval(AT_HWCAP); | ||
50 | unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); | ||
51 | |||
52 | + have_isa = tcg_isa_base; | ||
53 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
54 | - have_isa_2_06 = true; | ||
55 | + have_isa = tcg_isa_2_06; | ||
56 | } | ||
57 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
58 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
59 | - have_isa_3_00 = true; | ||
60 | + have_isa = tcg_isa_3_00; | ||
61 | } | ||
62 | #endif | 38 | #endif |
39 | } | ||
63 | 40 | ||
64 | -- | 41 | -- |
65 | 2.17.1 | 42 | 2.34.1 |
66 | 43 | ||
67 | 44 | diff view generated by jsdifflib |