1 | The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb: | 1 | v3: One more try to fix macos issues. |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100) | 3 | |
4 | r~ | ||
5 | |||
6 | |||
7 | |||
8 | The following changes since commit e0209297cddd5e10a07e15fac5cca7aa1a8e0e59: | ||
9 | |||
10 | Merge tag 'pull-ufs-20250217' of https://gitlab.com/jeuk20.kim/qemu into staging (2025-02-18 10:58:48 +0800) | ||
4 | 11 | ||
5 | are available in the Git repository at: | 12 | are available in the Git repository at: |
6 | 13 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20191013 | 14 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250215-3 |
8 | 15 | ||
9 | for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328: | 16 | for you to fetch changes up to e726f65867087d86436de05e9f372a86ec1381a6: |
10 | 17 | ||
11 | cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400) | 18 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 (2025-02-18 08:29:03 -0800) |
12 | 19 | ||
13 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
14 | Host vector support for tcg/ppc. | 21 | tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS |
15 | Fix thread=single cpu kicking. | 22 | tcg: Cleanups after disallowing 64-on-32 |
23 | tcg: Introduce constraint for zero register | ||
24 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 | ||
25 | tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2 | ||
26 | linux-user: Move TARGET_SA_RESTORER out of generic/signal.h | ||
27 | linux-user: Fix alignment when unmapping excess reservation | ||
28 | target/sparc: Fix register selection for all F*TOx and FxTO* instructions | ||
29 | target/sparc: Fix gdbstub incorrectly handling registers f32-f62 | ||
30 | target/sparc: fake UltraSPARC T1 PCR and PIC registers | ||
16 | 31 | ||
17 | ---------------------------------------------------------------- | 32 | ---------------------------------------------------------------- |
18 | Alex Bennée (1): | 33 | Andreas Schwab (1): |
19 | cpus: kick all vCPUs when running thread=single | 34 | linux-user: Move TARGET_SA_RESTORER out of generic/signal.h |
20 | 35 | ||
21 | Richard Henderson (22): | 36 | Artyom Tarasenko (1): |
22 | tcg/ppc: Introduce Altivec registers | 37 | target/sparc: fake UltraSPARC T1 PCR and PIC registers |
23 | tcg/ppc: Introduce macro VX4() | ||
24 | tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() | ||
25 | tcg/ppc: Create TCGPowerISA and have_isa | ||
26 | tcg/ppc: Replace HAVE_ISA_2_06 | ||
27 | tcg/ppc: Replace HAVE_ISEL macro with a variable | ||
28 | tcg/ppc: Enable tcg backend vector compilation | ||
29 | tcg/ppc: Add support for load/store/logic/comparison | ||
30 | tcg/ppc: Add support for vector maximum/minimum | ||
31 | tcg/ppc: Add support for vector add/subtract | ||
32 | tcg/ppc: Add support for vector saturated add/subtract | ||
33 | tcg/ppc: Support vector shift by immediate | ||
34 | tcg/ppc: Support vector multiply | ||
35 | tcg/ppc: Support vector dup2 | ||
36 | tcg/ppc: Enable Altivec detection | ||
37 | tcg/ppc: Update vector support for VSX | ||
38 | tcg/ppc: Update vector support for v2.07 Altivec | ||
39 | tcg/ppc: Update vector support for v2.07 VSX | ||
40 | tcg/ppc: Update vector support for v2.07 FP | ||
41 | tcg/ppc: Update vector support for v3.00 Altivec | ||
42 | tcg/ppc: Update vector support for v3.00 load/store | ||
43 | tcg/ppc: Update vector support for v3.00 dup/dupi | ||
44 | 38 | ||
45 | tcg/ppc/tcg-target.h | 51 ++- | 39 | Fabiano Rosas (1): |
46 | tcg/ppc/tcg-target.opc.h | 13 + | 40 | elfload: Fix alignment when unmapping excess reservation |
47 | cpus.c | 24 +- | ||
48 | tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++---- | ||
49 | 4 files changed, 1119 insertions(+), 87 deletions(-) | ||
50 | create mode 100644 tcg/ppc/tcg-target.opc.h | ||
51 | 41 | ||
42 | Mikael Szreder (2): | ||
43 | target/sparc: Fix register selection for all F*TOx and FxTO* instructions | ||
44 | target/sparc: Fix gdbstub incorrectly handling registers f32-f62 | ||
45 | |||
46 | Richard Henderson (23): | ||
47 | tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS | ||
48 | tcg: Remove TCG_OVERSIZED_GUEST | ||
49 | tcg: Drop support for two address registers in gen_ldst | ||
50 | tcg: Merge INDEX_op_qemu_*_{a32,a64}_* | ||
51 | tcg/arm: Drop addrhi from prepare_host_addr | ||
52 | tcg/i386: Drop addrhi from prepare_host_addr | ||
53 | tcg/mips: Drop addrhi from prepare_host_addr | ||
54 | tcg/ppc: Drop addrhi from prepare_host_addr | ||
55 | tcg: Replace addr{lo,hi}_reg with addr_reg in TCGLabelQemuLdst | ||
56 | plugins: Fix qemu_plugin_read_memory_vaddr parameters | ||
57 | accel/tcg: Fix tlb_set_page_with_attrs, tlb_set_page | ||
58 | target/loongarch: Use VADDR_PRIx for logging pc_next | ||
59 | target/mips: Use VADDR_PRIx for logging pc_next | ||
60 | include/exec: Change vaddr to uintptr_t | ||
61 | include/exec: Use uintptr_t in CPUTLBEntry | ||
62 | tcg: Introduce the 'z' constraint for a hardware zero register | ||
63 | tcg/aarch64: Use 'z' constraint | ||
64 | tcg/loongarch64: Use 'z' constraint | ||
65 | tcg/mips: Use 'z' constraint | ||
66 | tcg/riscv: Use 'z' constraint | ||
67 | tcg/sparc64: Use 'z' constraint | ||
68 | tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2 | ||
69 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 | ||
70 | |||
71 | include/exec/tlb-common.h | 10 +- | ||
72 | include/exec/vaddr.h | 16 +- | ||
73 | include/qemu/atomic.h | 18 +- | ||
74 | include/tcg/oversized-guest.h | 23 --- | ||
75 | include/tcg/tcg-opc.h | 28 +-- | ||
76 | include/tcg/tcg.h | 3 +- | ||
77 | linux-user/aarch64/target_signal.h | 2 + | ||
78 | linux-user/arm/target_signal.h | 2 + | ||
79 | linux-user/generic/signal.h | 1 - | ||
80 | linux-user/i386/target_signal.h | 2 + | ||
81 | linux-user/m68k/target_signal.h | 1 + | ||
82 | linux-user/microblaze/target_signal.h | 2 + | ||
83 | linux-user/ppc/target_signal.h | 2 + | ||
84 | linux-user/s390x/target_signal.h | 2 + | ||
85 | linux-user/sh4/target_signal.h | 2 + | ||
86 | linux-user/x86_64/target_signal.h | 2 + | ||
87 | linux-user/xtensa/target_signal.h | 2 + | ||
88 | tcg/aarch64/tcg-target-con-set.h | 12 +- | ||
89 | tcg/aarch64/tcg-target.h | 2 + | ||
90 | tcg/loongarch64/tcg-target-con-set.h | 15 +- | ||
91 | tcg/loongarch64/tcg-target-con-str.h | 1 - | ||
92 | tcg/loongarch64/tcg-target-has.h | 2 - | ||
93 | tcg/loongarch64/tcg-target.h | 2 + | ||
94 | tcg/mips/tcg-target-con-set.h | 26 +-- | ||
95 | tcg/mips/tcg-target-con-str.h | 1 - | ||
96 | tcg/mips/tcg-target.h | 2 + | ||
97 | tcg/riscv/tcg-target-con-set.h | 10 +- | ||
98 | tcg/riscv/tcg-target-con-str.h | 1 - | ||
99 | tcg/riscv/tcg-target-has.h | 2 - | ||
100 | tcg/riscv/tcg-target.h | 2 + | ||
101 | tcg/sparc64/tcg-target-con-set.h | 12 +- | ||
102 | tcg/sparc64/tcg-target-con-str.h | 1 - | ||
103 | tcg/sparc64/tcg-target.h | 3 +- | ||
104 | tcg/tci/tcg-target.h | 1 - | ||
105 | accel/tcg/cputlb.c | 32 +--- | ||
106 | accel/tcg/tcg-all.c | 9 +- | ||
107 | linux-user/elfload.c | 4 +- | ||
108 | plugins/api.c | 2 +- | ||
109 | target/arm/ptw.c | 34 ---- | ||
110 | target/loongarch/tcg/translate.c | 2 +- | ||
111 | target/mips/tcg/octeon_translate.c | 4 +- | ||
112 | target/riscv/cpu_helper.c | 13 +- | ||
113 | target/sparc/gdbstub.c | 18 +- | ||
114 | target/sparc/translate.c | 19 +++ | ||
115 | tcg/optimize.c | 21 +-- | ||
116 | tcg/tcg-op-ldst.c | 103 +++-------- | ||
117 | tcg/tcg.c | 97 +++++------ | ||
118 | tcg/tci.c | 119 +++---------- | ||
119 | docs/devel/multi-thread-tcg.rst | 1 - | ||
120 | docs/devel/tcg-ops.rst | 4 +- | ||
121 | target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 2 +- | ||
122 | target/sparc/insns.decode | 19 ++- | ||
123 | tcg/aarch64/tcg-target.c.inc | 86 ++++------ | ||
124 | tcg/arm/tcg-target.c.inc | 114 ++++--------- | ||
125 | tcg/i386/tcg-target.c.inc | 190 +++++---------------- | ||
126 | tcg/loongarch64/tcg-target.c.inc | 72 +++----- | ||
127 | tcg/mips/tcg-target.c.inc | 169 ++++++------------ | ||
128 | tcg/ppc/tcg-target.c.inc | 164 +++++------------- | ||
129 | tcg/riscv/tcg-target.c.inc | 56 +++--- | ||
130 | tcg/s390x/tcg-target.c.inc | 40 ++--- | ||
131 | tcg/sparc64/tcg-target.c.inc | 45 ++--- | ||
132 | tcg/tci/tcg-target.c.inc | 60 ++----- | ||
133 | 62 files changed, 550 insertions(+), 1162 deletions(-) | ||
134 | delete mode 100644 include/tcg/oversized-guest.h | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Altivec supports 32 128-bit vector registers, whose names are | ||
2 | by convention v0 through v31. | ||
3 | 1 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.h | 11 ++++- | ||
8 | tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++--------------- | ||
9 | 2 files changed, 65 insertions(+), 34 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.h | ||
14 | +++ b/tcg/ppc/tcg-target.h | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | # define TCG_TARGET_REG_BITS 32 | ||
17 | #endif | ||
18 | |||
19 | -#define TCG_TARGET_NB_REGS 32 | ||
20 | +#define TCG_TARGET_NB_REGS 64 | ||
21 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
22 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
23 | |||
24 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
25 | TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, | ||
26 | TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, | ||
27 | |||
28 | + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | ||
29 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | ||
30 | + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, | ||
31 | + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, | ||
32 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | ||
33 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | ||
34 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | ||
35 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | ||
36 | + | ||
37 | TCG_REG_CALL_STACK = TCG_REG_R1, | ||
38 | TCG_AREG0 = TCG_REG_R27 | ||
39 | } TCGReg; | ||
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/ppc/tcg-target.inc.c | ||
43 | +++ b/tcg/ppc/tcg-target.inc.c | ||
44 | @@ -XXX,XX +XXX,XX @@ | ||
45 | # define TCG_REG_TMP1 TCG_REG_R12 | ||
46 | #endif | ||
47 | |||
48 | +#define TCG_VEC_TMP1 TCG_REG_V0 | ||
49 | +#define TCG_VEC_TMP2 TCG_REG_V1 | ||
50 | + | ||
51 | #define TCG_REG_TB TCG_REG_R31 | ||
52 | #define USE_REG_TB (TCG_TARGET_REG_BITS == 64) | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ bool have_isa_3_00; | ||
55 | #endif | ||
56 | |||
57 | #ifdef CONFIG_DEBUG_TCG | ||
58 | -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | ||
59 | - "r0", | ||
60 | - "r1", | ||
61 | - "r2", | ||
62 | - "r3", | ||
63 | - "r4", | ||
64 | - "r5", | ||
65 | - "r6", | ||
66 | - "r7", | ||
67 | - "r8", | ||
68 | - "r9", | ||
69 | - "r10", | ||
70 | - "r11", | ||
71 | - "r12", | ||
72 | - "r13", | ||
73 | - "r14", | ||
74 | - "r15", | ||
75 | - "r16", | ||
76 | - "r17", | ||
77 | - "r18", | ||
78 | - "r19", | ||
79 | - "r20", | ||
80 | - "r21", | ||
81 | - "r22", | ||
82 | - "r23", | ||
83 | - "r24", | ||
84 | - "r25", | ||
85 | - "r26", | ||
86 | - "r27", | ||
87 | - "r28", | ||
88 | - "r29", | ||
89 | - "r30", | ||
90 | - "r31" | ||
91 | +static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { | ||
92 | + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", | ||
93 | + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", | ||
94 | + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", | ||
95 | + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", | ||
96 | + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | ||
97 | + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", | ||
98 | + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | ||
99 | + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", | ||
100 | }; | ||
101 | #endif | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
104 | TCG_REG_R5, | ||
105 | TCG_REG_R4, | ||
106 | TCG_REG_R3, | ||
107 | + | ||
108 | + /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ | ||
109 | + TCG_REG_V2, /* call clobbered, vectors */ | ||
110 | + TCG_REG_V3, | ||
111 | + TCG_REG_V4, | ||
112 | + TCG_REG_V5, | ||
113 | + TCG_REG_V6, | ||
114 | + TCG_REG_V7, | ||
115 | + TCG_REG_V8, | ||
116 | + TCG_REG_V9, | ||
117 | + TCG_REG_V10, | ||
118 | + TCG_REG_V11, | ||
119 | + TCG_REG_V12, | ||
120 | + TCG_REG_V13, | ||
121 | + TCG_REG_V14, | ||
122 | + TCG_REG_V15, | ||
123 | + TCG_REG_V16, | ||
124 | + TCG_REG_V17, | ||
125 | + TCG_REG_V18, | ||
126 | + TCG_REG_V19, | ||
127 | }; | ||
128 | |||
129 | static const int tcg_target_call_iarg_regs[] = { | ||
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
131 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); | ||
132 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); | ||
133 | |||
134 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); | ||
135 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); | ||
136 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); | ||
137 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); | ||
138 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); | ||
139 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); | ||
140 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); | ||
141 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); | ||
142 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); | ||
143 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); | ||
144 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); | ||
145 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); | ||
146 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); | ||
147 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); | ||
148 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); | ||
149 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); | ||
150 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); | ||
151 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); | ||
152 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); | ||
153 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); | ||
154 | + | ||
155 | s->reserved_regs = 0; | ||
156 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ | ||
157 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ | ||
158 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
159 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ | ||
160 | #endif | ||
161 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ | ||
162 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); | ||
163 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); | ||
164 | if (USE_REG_TB) { | ||
165 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ | ||
166 | } | ||
167 | -- | ||
168 | 2.17.1 | ||
169 | |||
170 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce macro VX4() used for encoding Altivec instructions. | ||
2 | 1 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | ||
6 | tcg/ppc/tcg-target.inc.c | 1 + | ||
7 | 1 file changed, 1 insertion(+) | ||
8 | |||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/ppc/tcg-target.inc.c | ||
12 | +++ b/tcg/ppc/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
14 | #define XO31(opc) (OPCD(31)|((opc)<<1)) | ||
15 | #define XO58(opc) (OPCD(58)|(opc)) | ||
16 | #define XO62(opc) (OPCD(62)|(opc)) | ||
17 | +#define VX4(opc) (OPCD(4)|(opc)) | ||
18 | |||
19 | #define B OPCD( 18) | ||
20 | #define BC OPCD( 16) | ||
21 | -- | ||
22 | 2.17.1 | ||
23 | |||
24 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding | ||
2 | elements of Altivec instructions. | ||
3 | 1 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.inc.c | 5 +++++ | ||
8 | 1 file changed, 5 insertions(+) | ||
9 | |||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/ppc/tcg-target.inc.c | ||
13 | +++ b/tcg/ppc/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
15 | #define MB64(b) ((b)<<5) | ||
16 | #define FXM(b) (1 << (19 - (b))) | ||
17 | |||
18 | +#define VRT(r) (((r) & 31) << 21) | ||
19 | +#define VRA(r) (((r) & 31) << 16) | ||
20 | +#define VRB(r) (((r) & 31) << 11) | ||
21 | +#define VRC(r) (((r) & 31) << 6) | ||
22 | + | ||
23 | #define LK 1 | ||
24 | |||
25 | #define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) | ||
26 | -- | ||
27 | 2.17.1 | ||
28 | |||
29 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce an enum to hold base < 2.06 < 3.00. Use macros to | ||
2 | preserve the existing have_isa_2_06 and have_isa_3_00 predicates. | ||
3 | 1 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.h | 12 ++++++++++-- | ||
8 | tcg/ppc/tcg-target.inc.c | 8 ++++---- | ||
9 | 2 files changed, 14 insertions(+), 6 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.h | ||
14 | +++ b/tcg/ppc/tcg-target.h | ||
15 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
16 | TCG_AREG0 = TCG_REG_R27 | ||
17 | } TCGReg; | ||
18 | |||
19 | -extern bool have_isa_2_06; | ||
20 | -extern bool have_isa_3_00; | ||
21 | +typedef enum { | ||
22 | + tcg_isa_base, | ||
23 | + tcg_isa_2_06, | ||
24 | + tcg_isa_3_00, | ||
25 | +} TCGPowerISA; | ||
26 | + | ||
27 | +extern TCGPowerISA have_isa; | ||
28 | + | ||
29 | +#define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
30 | +#define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ | ||
34 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/tcg/ppc/tcg-target.inc.c | ||
37 | +++ b/tcg/ppc/tcg-target.inc.c | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | |||
40 | static tcg_insn_unit *tb_ret_addr; | ||
41 | |||
42 | -bool have_isa_2_06; | ||
43 | -bool have_isa_3_00; | ||
44 | +TCGPowerISA have_isa; | ||
45 | |||
46 | #define HAVE_ISA_2_06 have_isa_2_06 | ||
47 | #define HAVE_ISEL have_isa_2_06 | ||
48 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
49 | unsigned long hwcap = qemu_getauxval(AT_HWCAP); | ||
50 | unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); | ||
51 | |||
52 | + have_isa = tcg_isa_base; | ||
53 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
54 | - have_isa_2_06 = true; | ||
55 | + have_isa = tcg_isa_2_06; | ||
56 | } | ||
57 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
58 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
59 | - have_isa_3_00 = true; | ||
60 | + have_isa = tcg_isa_3_00; | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | -- | ||
65 | 2.17.1 | ||
66 | |||
67 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This is identical to have_isa_2_06, so replace it. | ||
2 | 1 | ||
3 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/ppc/tcg-target.inc.c | 5 ++--- | ||
7 | 1 file changed, 2 insertions(+), 3 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/ppc/tcg-target.inc.c | ||
12 | +++ b/tcg/ppc/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | ||
14 | |||
15 | TCGPowerISA have_isa; | ||
16 | |||
17 | -#define HAVE_ISA_2_06 have_isa_2_06 | ||
18 | #define HAVE_ISEL have_isa_2_06 | ||
19 | |||
20 | #ifndef CONFIG_SOFTMMU | ||
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) | ||
22 | } | ||
23 | } else { | ||
24 | uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; | ||
25 | - if (!HAVE_ISA_2_06 && insn == LDBRX) { | ||
26 | + if (!have_isa_2_06 && insn == LDBRX) { | ||
27 | tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); | ||
28 | tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); | ||
29 | tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); | ||
30 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) | ||
31 | } | ||
32 | } else { | ||
33 | uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; | ||
34 | - if (!HAVE_ISA_2_06 && insn == STDBRX) { | ||
35 | + if (!have_isa_2_06 && insn == STDBRX) { | ||
36 | tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); | ||
37 | tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); | ||
38 | tcg_out_shri64(s, TCG_REG_R0, datalo, 32); | ||
39 | -- | ||
40 | 2.17.1 | ||
41 | |||
42 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Previously we've been hard-coding knowledge that Power7 has ISEL, but | ||
2 | it was an optional instruction before that. Use the AT_HWCAP2 bit, | ||
3 | when present, to properly determine support. | ||
4 | 1 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/ppc/tcg-target.inc.c | 17 ++++++++++++----- | ||
9 | 1 file changed, 12 insertions(+), 5 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.inc.c | ||
14 | +++ b/tcg/ppc/tcg-target.inc.c | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | static tcg_insn_unit *tb_ret_addr; | ||
17 | |||
18 | TCGPowerISA have_isa; | ||
19 | - | ||
20 | -#define HAVE_ISEL have_isa_2_06 | ||
21 | +static bool have_isel; | ||
22 | |||
23 | #ifndef CONFIG_SOFTMMU | ||
24 | #define TCG_GUEST_BASE_REG 30 | ||
25 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, | ||
26 | /* If we have ISEL, we can implement everything with 3 or 4 insns. | ||
27 | All other cases below are also at least 3 insns, so speed up the | ||
28 | code generator by not considering them and always using ISEL. */ | ||
29 | - if (HAVE_ISEL) { | ||
30 | + if (have_isel) { | ||
31 | int isel, tab; | ||
32 | |||
33 | tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); | ||
34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, | ||
35 | |||
36 | tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); | ||
37 | |||
38 | - if (HAVE_ISEL) { | ||
39 | + if (have_isel) { | ||
40 | int isel = tcg_to_isel[cond]; | ||
41 | |||
42 | /* Swap the V operands if the operation indicates inversion. */ | ||
43 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, | ||
44 | } else { | ||
45 | tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); | ||
46 | /* Note that the only other valid constant for a2 is 0. */ | ||
47 | - if (HAVE_ISEL) { | ||
48 | + if (have_isel) { | ||
49 | tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); | ||
50 | tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); | ||
51 | } else if (!const_a2 && a0 == a2) { | ||
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
53 | } | ||
54 | #endif | ||
55 | |||
56 | +#ifdef PPC_FEATURE2_HAS_ISEL | ||
57 | + /* Prefer explicit instruction from the kernel. */ | ||
58 | + have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; | ||
59 | +#else | ||
60 | + /* Fall back to knowing Power7 (2.06) has ISEL. */ | ||
61 | + have_isel = have_isa_2_06; | ||
62 | +#endif | ||
63 | + | ||
64 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
65 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | ||
66 | |||
67 | -- | ||
68 | 2.17.1 | ||
69 | |||
70 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce all of the flags required to enable tcg backend vector support, | ||
2 | and a runtime flag to indicate the host supports Altivec instructions. | ||
3 | 1 | ||
4 | For now, do not actually set have_isa_altivec to true, because we have not | ||
5 | yet added all of the code to actually generate all of the required insns. | ||
6 | However, we must define these flags in order to disable ifndefs that create | ||
7 | stub versions of the functions added here. | ||
8 | |||
9 | The change to tcg_out_movi works around a buglet in tcg.c wherein if we | ||
10 | do not define tcg_out_dupi_vec we get a declared but not defined Werror, | ||
11 | but if we only declare it we get a defined but not used Werror. We need | ||
12 | to this change to tcg_out_movi eventually anyway, so it's no biggie. | ||
13 | |||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
16 | --- | ||
17 | tcg/ppc/tcg-target.h | 25 ++++++++++++++++ | ||
18 | tcg/ppc/tcg-target.opc.h | 5 ++++ | ||
19 | tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++-- | ||
20 | 3 files changed, 89 insertions(+), 3 deletions(-) | ||
21 | create mode 100644 tcg/ppc/tcg-target.opc.h | ||
22 | |||
23 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/tcg/ppc/tcg-target.h | ||
26 | +++ b/tcg/ppc/tcg-target.h | ||
27 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
28 | } TCGPowerISA; | ||
29 | |||
30 | extern TCGPowerISA have_isa; | ||
31 | +extern bool have_altivec; | ||
32 | |||
33 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
34 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
35 | @@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa; | ||
36 | #define TCG_TARGET_HAS_mulsh_i64 1 | ||
37 | #endif | ||
38 | |||
39 | +/* | ||
40 | + * While technically Altivec could support V64, it has no 64-bit store | ||
41 | + * instruction and substituting two 32-bit stores makes the generated | ||
42 | + * code quite large. | ||
43 | + */ | ||
44 | +#define TCG_TARGET_HAS_v64 0 | ||
45 | +#define TCG_TARGET_HAS_v128 have_altivec | ||
46 | +#define TCG_TARGET_HAS_v256 0 | ||
47 | + | ||
48 | +#define TCG_TARGET_HAS_andc_vec 0 | ||
49 | +#define TCG_TARGET_HAS_orc_vec 0 | ||
50 | +#define TCG_TARGET_HAS_not_vec 0 | ||
51 | +#define TCG_TARGET_HAS_neg_vec 0 | ||
52 | +#define TCG_TARGET_HAS_abs_vec 0 | ||
53 | +#define TCG_TARGET_HAS_shi_vec 0 | ||
54 | +#define TCG_TARGET_HAS_shs_vec 0 | ||
55 | +#define TCG_TARGET_HAS_shv_vec 0 | ||
56 | +#define TCG_TARGET_HAS_cmp_vec 0 | ||
57 | +#define TCG_TARGET_HAS_mul_vec 0 | ||
58 | +#define TCG_TARGET_HAS_sat_vec 0 | ||
59 | +#define TCG_TARGET_HAS_minmax_vec 0 | ||
60 | +#define TCG_TARGET_HAS_bitsel_vec 0 | ||
61 | +#define TCG_TARGET_HAS_cmpsel_vec 0 | ||
62 | + | ||
63 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
64 | void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); | ||
65 | |||
66 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | ||
67 | new file mode 100644 | ||
68 | index XXXXXXX..XXXXXXX | ||
69 | --- /dev/null | ||
70 | +++ b/tcg/ppc/tcg-target.opc.h | ||
71 | @@ -XXX,XX +XXX,XX @@ | ||
72 | +/* | ||
73 | + * Target-specific opcodes for host vector expansion. These will be | ||
74 | + * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | ||
75 | + * consider these to be UNSPEC with names. | ||
76 | + */ | ||
77 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/tcg/ppc/tcg-target.inc.c | ||
80 | +++ b/tcg/ppc/tcg-target.inc.c | ||
81 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | ||
82 | |||
83 | TCGPowerISA have_isa; | ||
84 | static bool have_isel; | ||
85 | +bool have_altivec; | ||
86 | |||
87 | #ifndef CONFIG_SOFTMMU | ||
88 | #define TCG_GUEST_BASE_REG 30 | ||
89 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | ||
90 | } | ||
91 | } | ||
92 | |||
93 | -static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
94 | - tcg_target_long arg) | ||
95 | +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
96 | + tcg_target_long val) | ||
97 | { | ||
98 | - tcg_out_movi_int(s, type, ret, arg, false); | ||
99 | + g_assert_not_reached(); | ||
100 | +} | ||
101 | + | ||
102 | +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
103 | + tcg_target_long arg) | ||
104 | +{ | ||
105 | + switch (type) { | ||
106 | + case TCG_TYPE_I32: | ||
107 | + case TCG_TYPE_I64: | ||
108 | + tcg_debug_assert(ret < TCG_REG_V0); | ||
109 | + tcg_out_movi_int(s, type, ret, arg, false); | ||
110 | + break; | ||
111 | + | ||
112 | + case TCG_TYPE_V64: | ||
113 | + case TCG_TYPE_V128: | ||
114 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
115 | + tcg_out_dupi_vec(s, type, ret, arg); | ||
116 | + break; | ||
117 | + | ||
118 | + default: | ||
119 | + g_assert_not_reached(); | ||
120 | + } | ||
121 | } | ||
122 | |||
123 | static bool mask_operand(uint32_t c, int *mb, int *me) | ||
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
125 | } | ||
126 | } | ||
127 | |||
128 | +int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
129 | +{ | ||
130 | + g_assert_not_reached(); | ||
131 | +} | ||
132 | + | ||
133 | +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
134 | + TCGReg dst, TCGReg src) | ||
135 | +{ | ||
136 | + g_assert_not_reached(); | ||
137 | +} | ||
138 | + | ||
139 | +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
140 | + TCGReg out, TCGReg base, intptr_t offset) | ||
141 | +{ | ||
142 | + g_assert_not_reached(); | ||
143 | +} | ||
144 | + | ||
145 | +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
146 | + unsigned vecl, unsigned vece, | ||
147 | + const TCGArg *args, const int *const_args) | ||
148 | +{ | ||
149 | + g_assert_not_reached(); | ||
150 | +} | ||
151 | + | ||
152 | +void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
153 | + TCGArg a0, ...) | ||
154 | +{ | ||
155 | + g_assert_not_reached(); | ||
156 | +} | ||
157 | + | ||
158 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
159 | { | ||
160 | static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; | ||
161 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
162 | |||
163 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
164 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | ||
165 | + if (have_altivec) { | ||
166 | + tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; | ||
167 | + tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; | ||
168 | + } | ||
169 | |||
170 | tcg_target_call_clobber_regs = 0; | ||
171 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); | ||
172 | -- | ||
173 | 2.17.1 | ||
174 | |||
175 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add various bits and peaces related mostly to load and store | ||
2 | operations. In that context, logic, compare, and splat Altivec | ||
3 | instructions are used, and, therefore, the support for emitting | ||
4 | them is included in this patch too. | ||
5 | 1 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | ||
9 | tcg/ppc/tcg-target.h | 6 +- | ||
10 | tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++--- | ||
11 | 2 files changed, 442 insertions(+), 36 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/ppc/tcg-target.h | ||
16 | +++ b/tcg/ppc/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
18 | #define TCG_TARGET_HAS_v128 have_altivec | ||
19 | #define TCG_TARGET_HAS_v256 0 | ||
20 | |||
21 | -#define TCG_TARGET_HAS_andc_vec 0 | ||
22 | +#define TCG_TARGET_HAS_andc_vec 1 | ||
23 | #define TCG_TARGET_HAS_orc_vec 0 | ||
24 | -#define TCG_TARGET_HAS_not_vec 0 | ||
25 | +#define TCG_TARGET_HAS_not_vec 1 | ||
26 | #define TCG_TARGET_HAS_neg_vec 0 | ||
27 | #define TCG_TARGET_HAS_abs_vec 0 | ||
28 | #define TCG_TARGET_HAS_shi_vec 0 | ||
29 | #define TCG_TARGET_HAS_shs_vec 0 | ||
30 | #define TCG_TARGET_HAS_shv_vec 0 | ||
31 | -#define TCG_TARGET_HAS_cmp_vec 0 | ||
32 | +#define TCG_TARGET_HAS_cmp_vec 1 | ||
33 | #define TCG_TARGET_HAS_mul_vec 0 | ||
34 | #define TCG_TARGET_HAS_sat_vec 0 | ||
35 | #define TCG_TARGET_HAS_minmax_vec 0 | ||
36 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tcg/ppc/tcg-target.inc.c | ||
39 | +++ b/tcg/ppc/tcg-target.inc.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
41 | ct->ct |= TCG_CT_REG; | ||
42 | ct->u.regs = 0xffffffff; | ||
43 | break; | ||
44 | + case 'v': | ||
45 | + ct->ct |= TCG_CT_REG; | ||
46 | + ct->u.regs = 0xffffffff00000000ull; | ||
47 | + break; | ||
48 | case 'L': /* qemu_ld constraint */ | ||
49 | ct->ct |= TCG_CT_REG; | ||
50 | ct->u.regs = 0xffffffff; | ||
51 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
52 | |||
53 | #define NOP ORI /* ori 0,0,0 */ | ||
54 | |||
55 | +#define LVX XO31(103) | ||
56 | +#define LVEBX XO31(7) | ||
57 | +#define LVEHX XO31(39) | ||
58 | +#define LVEWX XO31(71) | ||
59 | + | ||
60 | +#define STVX XO31(231) | ||
61 | +#define STVEWX XO31(199) | ||
62 | + | ||
63 | +#define VCMPEQUB VX4(6) | ||
64 | +#define VCMPEQUH VX4(70) | ||
65 | +#define VCMPEQUW VX4(134) | ||
66 | +#define VCMPGTSB VX4(774) | ||
67 | +#define VCMPGTSH VX4(838) | ||
68 | +#define VCMPGTSW VX4(902) | ||
69 | +#define VCMPGTUB VX4(518) | ||
70 | +#define VCMPGTUH VX4(582) | ||
71 | +#define VCMPGTUW VX4(646) | ||
72 | + | ||
73 | +#define VAND VX4(1028) | ||
74 | +#define VANDC VX4(1092) | ||
75 | +#define VNOR VX4(1284) | ||
76 | +#define VOR VX4(1156) | ||
77 | +#define VXOR VX4(1220) | ||
78 | + | ||
79 | +#define VSPLTB VX4(524) | ||
80 | +#define VSPLTH VX4(588) | ||
81 | +#define VSPLTW VX4(652) | ||
82 | +#define VSPLTISB VX4(780) | ||
83 | +#define VSPLTISH VX4(844) | ||
84 | +#define VSPLTISW VX4(908) | ||
85 | + | ||
86 | +#define VSLDOI VX4(44) | ||
87 | + | ||
88 | #define RT(r) ((r)<<21) | ||
89 | #define RS(r) ((r)<<21) | ||
90 | #define RA(r) ((r)<<16) | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
92 | intptr_t value, intptr_t addend) | ||
93 | { | ||
94 | tcg_insn_unit *target; | ||
95 | + int16_t lo; | ||
96 | + int32_t hi; | ||
97 | |||
98 | value += addend; | ||
99 | target = (tcg_insn_unit *)value; | ||
100 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
101 | } | ||
102 | *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); | ||
103 | break; | ||
104 | + case R_PPC_ADDR32: | ||
105 | + /* | ||
106 | + * We are abusing this relocation type. Again, this points to | ||
107 | + * a pair of insns, lis + load. This is an absolute address | ||
108 | + * relocation for PPC32 so the lis cannot be removed. | ||
109 | + */ | ||
110 | + lo = value; | ||
111 | + hi = value - lo; | ||
112 | + if (hi + lo != value) { | ||
113 | + return false; | ||
114 | + } | ||
115 | + code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); | ||
116 | + code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); | ||
117 | + break; | ||
118 | default: | ||
119 | g_assert_not_reached(); | ||
120 | } | ||
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
122 | |||
123 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | ||
124 | { | ||
125 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
126 | - if (ret != arg) { | ||
127 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
128 | + if (ret == arg) { | ||
129 | + return true; | ||
130 | + } | ||
131 | + switch (type) { | ||
132 | + case TCG_TYPE_I64: | ||
133 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
134 | + /* fallthru */ | ||
135 | + case TCG_TYPE_I32: | ||
136 | + if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | ||
137 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
138 | + break; | ||
139 | + } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | ||
140 | + /* Altivec does not support vector/integer moves. */ | ||
141 | + return false; | ||
142 | + } | ||
143 | + /* fallthru */ | ||
144 | + case TCG_TYPE_V64: | ||
145 | + case TCG_TYPE_V128: | ||
146 | + tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); | ||
147 | + tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); | ||
148 | + break; | ||
149 | + default: | ||
150 | + g_assert_not_reached(); | ||
151 | } | ||
152 | return true; | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | ||
155 | static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
156 | tcg_target_long val) | ||
157 | { | ||
158 | - g_assert_not_reached(); | ||
159 | + uint32_t load_insn; | ||
160 | + int rel, low; | ||
161 | + intptr_t add; | ||
162 | + | ||
163 | + low = (int8_t)val; | ||
164 | + if (low >= -16 && low < 16) { | ||
165 | + if (val == (tcg_target_long)dup_const(MO_8, low)) { | ||
166 | + tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); | ||
167 | + return; | ||
168 | + } | ||
169 | + if (val == (tcg_target_long)dup_const(MO_16, low)) { | ||
170 | + tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); | ||
171 | + return; | ||
172 | + } | ||
173 | + if (val == (tcg_target_long)dup_const(MO_32, low)) { | ||
174 | + tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); | ||
175 | + return; | ||
176 | + } | ||
177 | + } | ||
178 | + | ||
179 | + /* | ||
180 | + * Otherwise we must load the value from the constant pool. | ||
181 | + */ | ||
182 | + if (USE_REG_TB) { | ||
183 | + rel = R_PPC_ADDR16; | ||
184 | + add = -(intptr_t)s->code_gen_ptr; | ||
185 | + } else { | ||
186 | + rel = R_PPC_ADDR32; | ||
187 | + add = 0; | ||
188 | + } | ||
189 | + | ||
190 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
191 | + if (TCG_TARGET_REG_BITS == 64) { | ||
192 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
193 | + } else { | ||
194 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
195 | + } | ||
196 | + | ||
197 | + if (USE_REG_TB) { | ||
198 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); | ||
199 | + load_insn |= RA(TCG_REG_TB); | ||
200 | + } else { | ||
201 | + tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); | ||
202 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); | ||
203 | + } | ||
204 | + tcg_out32(s, load_insn); | ||
205 | } | ||
206 | |||
207 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
208 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
209 | align = 3; | ||
210 | /* FALLTHRU */ | ||
211 | default: | ||
212 | - if (rt != TCG_REG_R0) { | ||
213 | + if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { | ||
214 | rs = rt; | ||
215 | break; | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
218 | } | ||
219 | |||
220 | /* For unaligned, or very large offsets, use the indexed form. */ | ||
221 | - if (offset & align || offset != (int32_t)offset) { | ||
222 | + if (offset & align || offset != (int32_t)offset || opi == 0) { | ||
223 | if (rs == base) { | ||
224 | rs = TCG_REG_R0; | ||
225 | } | ||
226 | tcg_debug_assert(!is_store || rs != rt); | ||
227 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
228 | - tcg_out32(s, opx | TAB(rt, base, rs)); | ||
229 | + tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
230 | return; | ||
231 | } | ||
232 | |||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
234 | base = rs; | ||
235 | } | ||
236 | if (opi != ADDI || base != rt || l0 != 0) { | ||
237 | - tcg_out32(s, opi | TAI(rt, base, l0)); | ||
238 | + tcg_out32(s, opi | TAI(rt & 31, base, l0)); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
243 | - TCGReg arg1, intptr_t arg2) | ||
244 | +static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, | ||
245 | + TCGReg va, TCGReg vb, int shb) | ||
246 | { | ||
247 | - int opi, opx; | ||
248 | - | ||
249 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
250 | - if (type == TCG_TYPE_I32) { | ||
251 | - opi = LWZ, opx = LWZX; | ||
252 | - } else { | ||
253 | - opi = LD, opx = LDX; | ||
254 | - } | ||
255 | - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); | ||
256 | + tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); | ||
257 | } | ||
258 | |||
259 | -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
260 | - TCGReg arg1, intptr_t arg2) | ||
261 | +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
262 | + TCGReg base, intptr_t offset) | ||
263 | { | ||
264 | - int opi, opx; | ||
265 | + int shift; | ||
266 | |||
267 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
268 | - if (type == TCG_TYPE_I32) { | ||
269 | - opi = STW, opx = STWX; | ||
270 | - } else { | ||
271 | - opi = STD, opx = STDX; | ||
272 | + switch (type) { | ||
273 | + case TCG_TYPE_I32: | ||
274 | + if (ret < TCG_REG_V0) { | ||
275 | + tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | ||
276 | + break; | ||
277 | + } | ||
278 | + tcg_debug_assert((offset & 3) == 0); | ||
279 | + tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
280 | + shift = (offset - 4) & 0xc; | ||
281 | + if (shift) { | ||
282 | + tcg_out_vsldoi(s, ret, ret, ret, shift); | ||
283 | + } | ||
284 | + break; | ||
285 | + case TCG_TYPE_I64: | ||
286 | + if (ret < TCG_REG_V0) { | ||
287 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
288 | + tcg_out_mem_long(s, LD, LDX, ret, base, offset); | ||
289 | + break; | ||
290 | + } | ||
291 | + /* fallthru */ | ||
292 | + case TCG_TYPE_V64: | ||
293 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
294 | + tcg_debug_assert((offset & 7) == 0); | ||
295 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
296 | + if (offset & 8) { | ||
297 | + tcg_out_vsldoi(s, ret, ret, ret, 8); | ||
298 | + } | ||
299 | + break; | ||
300 | + case TCG_TYPE_V128: | ||
301 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
302 | + tcg_debug_assert((offset & 15) == 0); | ||
303 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
304 | + break; | ||
305 | + default: | ||
306 | + g_assert_not_reached(); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
311 | + TCGReg base, intptr_t offset) | ||
312 | +{ | ||
313 | + int shift; | ||
314 | + | ||
315 | + switch (type) { | ||
316 | + case TCG_TYPE_I32: | ||
317 | + if (arg < TCG_REG_V0) { | ||
318 | + tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
319 | + break; | ||
320 | + } | ||
321 | + tcg_debug_assert((offset & 3) == 0); | ||
322 | + shift = (offset - 4) & 0xc; | ||
323 | + if (shift) { | ||
324 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); | ||
325 | + arg = TCG_VEC_TMP1; | ||
326 | + } | ||
327 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
328 | + break; | ||
329 | + case TCG_TYPE_I64: | ||
330 | + if (arg < TCG_REG_V0) { | ||
331 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
332 | + tcg_out_mem_long(s, STD, STDX, arg, base, offset); | ||
333 | + break; | ||
334 | + } | ||
335 | + /* fallthru */ | ||
336 | + case TCG_TYPE_V64: | ||
337 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
338 | + tcg_debug_assert((offset & 7) == 0); | ||
339 | + if (offset & 8) { | ||
340 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
341 | + arg = TCG_VEC_TMP1; | ||
342 | + } | ||
343 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
344 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); | ||
345 | + break; | ||
346 | + case TCG_TYPE_V128: | ||
347 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
348 | + tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
349 | + break; | ||
350 | + default: | ||
351 | + g_assert_not_reached(); | ||
352 | } | ||
353 | - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); | ||
354 | } | ||
355 | |||
356 | static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | ||
357 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
358 | |||
359 | int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
360 | { | ||
361 | - g_assert_not_reached(); | ||
362 | + switch (opc) { | ||
363 | + case INDEX_op_and_vec: | ||
364 | + case INDEX_op_or_vec: | ||
365 | + case INDEX_op_xor_vec: | ||
366 | + case INDEX_op_andc_vec: | ||
367 | + case INDEX_op_not_vec: | ||
368 | + return 1; | ||
369 | + case INDEX_op_cmp_vec: | ||
370 | + return vece <= MO_32 ? -1 : 0; | ||
371 | + default: | ||
372 | + return 0; | ||
373 | + } | ||
374 | } | ||
375 | |||
376 | static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
377 | TCGReg dst, TCGReg src) | ||
378 | { | ||
379 | - g_assert_not_reached(); | ||
380 | + tcg_debug_assert(dst >= TCG_REG_V0); | ||
381 | + tcg_debug_assert(src >= TCG_REG_V0); | ||
382 | + | ||
383 | + /* | ||
384 | + * Recall we use (or emulate) VSX integer loads, so the integer is | ||
385 | + * right justified within the left (zero-index) double-word. | ||
386 | + */ | ||
387 | + switch (vece) { | ||
388 | + case MO_8: | ||
389 | + tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); | ||
390 | + break; | ||
391 | + case MO_16: | ||
392 | + tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); | ||
393 | + break; | ||
394 | + case MO_32: | ||
395 | + tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
396 | + break; | ||
397 | + case MO_64: | ||
398 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
399 | + tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
400 | + break; | ||
401 | + default: | ||
402 | + g_assert_not_reached(); | ||
403 | + } | ||
404 | + return true; | ||
405 | } | ||
406 | |||
407 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
408 | TCGReg out, TCGReg base, intptr_t offset) | ||
409 | { | ||
410 | - g_assert_not_reached(); | ||
411 | + int elt; | ||
412 | + | ||
413 | + tcg_debug_assert(out >= TCG_REG_V0); | ||
414 | + switch (vece) { | ||
415 | + case MO_8: | ||
416 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
417 | + elt = extract32(offset, 0, 4); | ||
418 | +#ifndef HOST_WORDS_BIGENDIAN | ||
419 | + elt ^= 15; | ||
420 | +#endif | ||
421 | + tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); | ||
422 | + break; | ||
423 | + case MO_16: | ||
424 | + tcg_debug_assert((offset & 1) == 0); | ||
425 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
426 | + elt = extract32(offset, 1, 3); | ||
427 | +#ifndef HOST_WORDS_BIGENDIAN | ||
428 | + elt ^= 7; | ||
429 | +#endif | ||
430 | + tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
431 | + break; | ||
432 | + case MO_32: | ||
433 | + tcg_debug_assert((offset & 3) == 0); | ||
434 | + tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
435 | + elt = extract32(offset, 2, 2); | ||
436 | +#ifndef HOST_WORDS_BIGENDIAN | ||
437 | + elt ^= 3; | ||
438 | +#endif | ||
439 | + tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
440 | + break; | ||
441 | + case MO_64: | ||
442 | + tcg_debug_assert((offset & 7) == 0); | ||
443 | + tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
444 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
445 | + elt = extract32(offset, 3, 1); | ||
446 | +#ifndef HOST_WORDS_BIGENDIAN | ||
447 | + elt = !elt; | ||
448 | +#endif | ||
449 | + if (elt) { | ||
450 | + tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); | ||
451 | + } else { | ||
452 | + tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); | ||
453 | + } | ||
454 | + break; | ||
455 | + default: | ||
456 | + g_assert_not_reached(); | ||
457 | + } | ||
458 | + return true; | ||
459 | } | ||
460 | |||
461 | static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
462 | unsigned vecl, unsigned vece, | ||
463 | const TCGArg *args, const int *const_args) | ||
464 | { | ||
465 | - g_assert_not_reached(); | ||
466 | + static const uint32_t | ||
467 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
468 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
469 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
470 | + | ||
471 | + TCGType type = vecl + TCG_TYPE_V64; | ||
472 | + TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
473 | + uint32_t insn; | ||
474 | + | ||
475 | + switch (opc) { | ||
476 | + case INDEX_op_ld_vec: | ||
477 | + tcg_out_ld(s, type, a0, a1, a2); | ||
478 | + return; | ||
479 | + case INDEX_op_st_vec: | ||
480 | + tcg_out_st(s, type, a0, a1, a2); | ||
481 | + return; | ||
482 | + case INDEX_op_dupm_vec: | ||
483 | + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
484 | + return; | ||
485 | + | ||
486 | + case INDEX_op_and_vec: | ||
487 | + insn = VAND; | ||
488 | + break; | ||
489 | + case INDEX_op_or_vec: | ||
490 | + insn = VOR; | ||
491 | + break; | ||
492 | + case INDEX_op_xor_vec: | ||
493 | + insn = VXOR; | ||
494 | + break; | ||
495 | + case INDEX_op_andc_vec: | ||
496 | + insn = VANDC; | ||
497 | + break; | ||
498 | + case INDEX_op_not_vec: | ||
499 | + insn = VNOR; | ||
500 | + a2 = a1; | ||
501 | + break; | ||
502 | + | ||
503 | + case INDEX_op_cmp_vec: | ||
504 | + switch (args[3]) { | ||
505 | + case TCG_COND_EQ: | ||
506 | + insn = eq_op[vece]; | ||
507 | + break; | ||
508 | + case TCG_COND_GT: | ||
509 | + insn = gts_op[vece]; | ||
510 | + break; | ||
511 | + case TCG_COND_GTU: | ||
512 | + insn = gtu_op[vece]; | ||
513 | + break; | ||
514 | + default: | ||
515 | + g_assert_not_reached(); | ||
516 | + } | ||
517 | + break; | ||
518 | + | ||
519 | + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
520 | + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
521 | + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
522 | + default: | ||
523 | + g_assert_not_reached(); | ||
524 | + } | ||
525 | + | ||
526 | + tcg_debug_assert(insn != 0); | ||
527 | + tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
528 | +} | ||
529 | + | ||
530 | +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
531 | + TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
532 | +{ | ||
533 | + bool need_swap = false, need_inv = false; | ||
534 | + | ||
535 | + tcg_debug_assert(vece <= MO_32); | ||
536 | + | ||
537 | + switch (cond) { | ||
538 | + case TCG_COND_EQ: | ||
539 | + case TCG_COND_GT: | ||
540 | + case TCG_COND_GTU: | ||
541 | + break; | ||
542 | + case TCG_COND_NE: | ||
543 | + case TCG_COND_LE: | ||
544 | + case TCG_COND_LEU: | ||
545 | + need_inv = true; | ||
546 | + break; | ||
547 | + case TCG_COND_LT: | ||
548 | + case TCG_COND_LTU: | ||
549 | + need_swap = true; | ||
550 | + break; | ||
551 | + case TCG_COND_GE: | ||
552 | + case TCG_COND_GEU: | ||
553 | + need_swap = need_inv = true; | ||
554 | + break; | ||
555 | + default: | ||
556 | + g_assert_not_reached(); | ||
557 | + } | ||
558 | + | ||
559 | + if (need_inv) { | ||
560 | + cond = tcg_invert_cond(cond); | ||
561 | + } | ||
562 | + if (need_swap) { | ||
563 | + TCGv_vec t1; | ||
564 | + t1 = v1, v1 = v2, v2 = t1; | ||
565 | + cond = tcg_swap_cond(cond); | ||
566 | + } | ||
567 | + | ||
568 | + vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), | ||
569 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); | ||
570 | + | ||
571 | + if (need_inv) { | ||
572 | + tcg_gen_not_vec(vece, v0, v0); | ||
573 | + } | ||
574 | } | ||
575 | |||
576 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
577 | TCGArg a0, ...) | ||
578 | { | ||
579 | - g_assert_not_reached(); | ||
580 | + va_list va; | ||
581 | + TCGv_vec v0, v1, v2; | ||
582 | + | ||
583 | + va_start(va, a0); | ||
584 | + v0 = temp_tcgv_vec(arg_temp(a0)); | ||
585 | + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
586 | + v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
587 | + | ||
588 | + switch (opc) { | ||
589 | + case INDEX_op_cmp_vec: | ||
590 | + expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
591 | + break; | ||
592 | + default: | ||
593 | + g_assert_not_reached(); | ||
594 | + } | ||
595 | + va_end(va); | ||
596 | } | ||
597 | |||
598 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
599 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
600 | = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } }; | ||
601 | static const TCGTargetOpDef sub2 | ||
602 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
603 | + static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
604 | + static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
605 | + static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
606 | |||
607 | switch (op) { | ||
608 | case INDEX_op_goto_ptr: | ||
609 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
610 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
611 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
612 | |||
613 | + case INDEX_op_and_vec: | ||
614 | + case INDEX_op_or_vec: | ||
615 | + case INDEX_op_xor_vec: | ||
616 | + case INDEX_op_andc_vec: | ||
617 | + case INDEX_op_orc_vec: | ||
618 | + case INDEX_op_cmp_vec: | ||
619 | + return &v_v_v; | ||
620 | + case INDEX_op_not_vec: | ||
621 | + case INDEX_op_dup_vec: | ||
622 | + return &v_v; | ||
623 | + case INDEX_op_ld_vec: | ||
624 | + case INDEX_op_st_vec: | ||
625 | + case INDEX_op_dupm_vec: | ||
626 | + return &v_r; | ||
627 | + | ||
628 | default: | ||
629 | return NULL; | ||
630 | } | ||
631 | -- | ||
632 | 2.17.1 | ||
633 | |||
634 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add support for vector maximum/minimum using Altivec instructions | ||
2 | VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and | ||
3 | VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW. | ||
4 | 1 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
7 | --- | ||
8 | tcg/ppc/tcg-target.h | 2 +- | ||
9 | tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++- | ||
10 | 2 files changed, 40 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/ppc/tcg-target.h | ||
15 | +++ b/tcg/ppc/tcg-target.h | ||
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
17 | #define TCG_TARGET_HAS_cmp_vec 1 | ||
18 | #define TCG_TARGET_HAS_mul_vec 0 | ||
19 | #define TCG_TARGET_HAS_sat_vec 0 | ||
20 | -#define TCG_TARGET_HAS_minmax_vec 0 | ||
21 | +#define TCG_TARGET_HAS_minmax_vec 1 | ||
22 | #define TCG_TARGET_HAS_bitsel_vec 0 | ||
23 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
24 | |||
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/ppc/tcg-target.inc.c | ||
28 | +++ b/tcg/ppc/tcg-target.inc.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
30 | #define STVX XO31(231) | ||
31 | #define STVEWX XO31(199) | ||
32 | |||
33 | +#define VMAXSB VX4(258) | ||
34 | +#define VMAXSH VX4(322) | ||
35 | +#define VMAXSW VX4(386) | ||
36 | +#define VMAXUB VX4(2) | ||
37 | +#define VMAXUH VX4(66) | ||
38 | +#define VMAXUW VX4(130) | ||
39 | +#define VMINSB VX4(770) | ||
40 | +#define VMINSH VX4(834) | ||
41 | +#define VMINSW VX4(898) | ||
42 | +#define VMINUB VX4(514) | ||
43 | +#define VMINUH VX4(578) | ||
44 | +#define VMINUW VX4(642) | ||
45 | + | ||
46 | #define VCMPEQUB VX4(6) | ||
47 | #define VCMPEQUH VX4(70) | ||
48 | #define VCMPEQUW VX4(134) | ||
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
50 | case INDEX_op_andc_vec: | ||
51 | case INDEX_op_not_vec: | ||
52 | return 1; | ||
53 | + case INDEX_op_smax_vec: | ||
54 | + case INDEX_op_smin_vec: | ||
55 | + case INDEX_op_umax_vec: | ||
56 | + case INDEX_op_umin_vec: | ||
57 | + return vece <= MO_32; | ||
58 | case INDEX_op_cmp_vec: | ||
59 | return vece <= MO_32 ? -1 : 0; | ||
60 | default: | ||
61 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
62 | static const uint32_t | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
64 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
65 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
66 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
67 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
68 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
69 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
70 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
71 | |||
72 | TCGType type = vecl + TCG_TYPE_V64; | ||
73 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
75 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
76 | return; | ||
77 | |||
78 | + case INDEX_op_smin_vec: | ||
79 | + insn = smin_op[vece]; | ||
80 | + break; | ||
81 | + case INDEX_op_umin_vec: | ||
82 | + insn = umin_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_smax_vec: | ||
85 | + insn = smax_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_umax_vec: | ||
88 | + insn = umax_op[vece]; | ||
89 | + break; | ||
90 | case INDEX_op_and_vec: | ||
91 | insn = VAND; | ||
92 | break; | ||
93 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
94 | case INDEX_op_andc_vec: | ||
95 | case INDEX_op_orc_vec: | ||
96 | case INDEX_op_cmp_vec: | ||
97 | + case INDEX_op_smax_vec: | ||
98 | + case INDEX_op_smin_vec: | ||
99 | + case INDEX_op_umax_vec: | ||
100 | + case INDEX_op_umin_vec: | ||
101 | return &v_v_v; | ||
102 | case INDEX_op_not_vec: | ||
103 | case INDEX_op_dup_vec: | ||
104 | -- | ||
105 | 2.17.1 | ||
106 | |||
107 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add support for vector add/subtract using Altivec instructions: | ||
2 | VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM. | ||
3 | 1 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++ | ||
8 | 1 file changed, 20 insertions(+) | ||
9 | |||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/ppc/tcg-target.inc.c | ||
13 | +++ b/tcg/ppc/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
15 | #define STVX XO31(231) | ||
16 | #define STVEWX XO31(199) | ||
17 | |||
18 | +#define VADDUBM VX4(0) | ||
19 | +#define VADDUHM VX4(64) | ||
20 | +#define VADDUWM VX4(128) | ||
21 | + | ||
22 | +#define VSUBUBM VX4(1024) | ||
23 | +#define VSUBUHM VX4(1088) | ||
24 | +#define VSUBUWM VX4(1152) | ||
25 | + | ||
26 | #define VMAXSB VX4(258) | ||
27 | #define VMAXSH VX4(322) | ||
28 | #define VMAXSW VX4(386) | ||
29 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
30 | case INDEX_op_andc_vec: | ||
31 | case INDEX_op_not_vec: | ||
32 | return 1; | ||
33 | + case INDEX_op_add_vec: | ||
34 | + case INDEX_op_sub_vec: | ||
35 | case INDEX_op_smax_vec: | ||
36 | case INDEX_op_smin_vec: | ||
37 | case INDEX_op_umax_vec: | ||
38 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
39 | const TCGArg *args, const int *const_args) | ||
40 | { | ||
41 | static const uint32_t | ||
42 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | ||
43 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | ||
44 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
45 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
46 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
48 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
49 | return; | ||
50 | |||
51 | + case INDEX_op_add_vec: | ||
52 | + insn = add_op[vece]; | ||
53 | + break; | ||
54 | + case INDEX_op_sub_vec: | ||
55 | + insn = sub_op[vece]; | ||
56 | + break; | ||
57 | case INDEX_op_smin_vec: | ||
58 | insn = smin_op[vece]; | ||
59 | break; | ||
60 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
61 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
62 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
63 | |||
64 | + case INDEX_op_add_vec: | ||
65 | + case INDEX_op_sub_vec: | ||
66 | case INDEX_op_and_vec: | ||
67 | case INDEX_op_or_vec: | ||
68 | case INDEX_op_xor_vec: | ||
69 | -- | ||
70 | 2.17.1 | ||
71 | |||
72 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add support for vector saturated add/subtract using Altivec | ||
2 | instructions: | ||
3 | VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and | ||
4 | VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS. | ||
5 | 1 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | ||
9 | tcg/ppc/tcg-target.h | 2 +- | ||
10 | tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++ | ||
11 | 2 files changed, 37 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/ppc/tcg-target.h | ||
16 | +++ b/tcg/ppc/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
18 | #define TCG_TARGET_HAS_shv_vec 0 | ||
19 | #define TCG_TARGET_HAS_cmp_vec 1 | ||
20 | #define TCG_TARGET_HAS_mul_vec 0 | ||
21 | -#define TCG_TARGET_HAS_sat_vec 0 | ||
22 | +#define TCG_TARGET_HAS_sat_vec 1 | ||
23 | #define TCG_TARGET_HAS_minmax_vec 1 | ||
24 | #define TCG_TARGET_HAS_bitsel_vec 0 | ||
25 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
26 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/tcg/ppc/tcg-target.inc.c | ||
29 | +++ b/tcg/ppc/tcg-target.inc.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
31 | #define STVX XO31(231) | ||
32 | #define STVEWX XO31(199) | ||
33 | |||
34 | +#define VADDSBS VX4(768) | ||
35 | +#define VADDUBS VX4(512) | ||
36 | #define VADDUBM VX4(0) | ||
37 | +#define VADDSHS VX4(832) | ||
38 | +#define VADDUHS VX4(576) | ||
39 | #define VADDUHM VX4(64) | ||
40 | +#define VADDSWS VX4(896) | ||
41 | +#define VADDUWS VX4(640) | ||
42 | #define VADDUWM VX4(128) | ||
43 | |||
44 | +#define VSUBSBS VX4(1792) | ||
45 | +#define VSUBUBS VX4(1536) | ||
46 | #define VSUBUBM VX4(1024) | ||
47 | +#define VSUBSHS VX4(1856) | ||
48 | +#define VSUBUHS VX4(1600) | ||
49 | #define VSUBUHM VX4(1088) | ||
50 | +#define VSUBSWS VX4(1920) | ||
51 | +#define VSUBUWS VX4(1664) | ||
52 | #define VSUBUWM VX4(1152) | ||
53 | |||
54 | #define VMAXSB VX4(258) | ||
55 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
56 | case INDEX_op_smin_vec: | ||
57 | case INDEX_op_umax_vec: | ||
58 | case INDEX_op_umin_vec: | ||
59 | + case INDEX_op_ssadd_vec: | ||
60 | + case INDEX_op_sssub_vec: | ||
61 | + case INDEX_op_usadd_vec: | ||
62 | + case INDEX_op_ussub_vec: | ||
63 | return vece <= MO_32; | ||
64 | case INDEX_op_cmp_vec: | ||
65 | return vece <= MO_32 ? -1 : 0; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
67 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
68 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
69 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
70 | + ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
71 | + usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
72 | + sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
73 | + ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
74 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
75 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
76 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
78 | case INDEX_op_sub_vec: | ||
79 | insn = sub_op[vece]; | ||
80 | break; | ||
81 | + case INDEX_op_ssadd_vec: | ||
82 | + insn = ssadd_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_sssub_vec: | ||
85 | + insn = sssub_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_usadd_vec: | ||
88 | + insn = usadd_op[vece]; | ||
89 | + break; | ||
90 | + case INDEX_op_ussub_vec: | ||
91 | + insn = ussub_op[vece]; | ||
92 | + break; | ||
93 | case INDEX_op_smin_vec: | ||
94 | insn = smin_op[vece]; | ||
95 | break; | ||
96 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
97 | case INDEX_op_andc_vec: | ||
98 | case INDEX_op_orc_vec: | ||
99 | case INDEX_op_cmp_vec: | ||
100 | + case INDEX_op_ssadd_vec: | ||
101 | + case INDEX_op_sssub_vec: | ||
102 | + case INDEX_op_usadd_vec: | ||
103 | + case INDEX_op_ussub_vec: | ||
104 | case INDEX_op_smax_vec: | ||
105 | case INDEX_op_smin_vec: | ||
106 | case INDEX_op_umax_vec: | ||
107 | -- | ||
108 | 2.17.1 | ||
109 | |||
110 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For Altivec, this is done via vector shift by vector, | ||
2 | and loading the immediate into a register. | ||
3 | 1 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.h | 2 +- | ||
8 | tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++-- | ||
9 | 2 files changed, 57 insertions(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.h | ||
14 | +++ b/tcg/ppc/tcg-target.h | ||
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
16 | #define TCG_TARGET_HAS_abs_vec 0 | ||
17 | #define TCG_TARGET_HAS_shi_vec 0 | ||
18 | #define TCG_TARGET_HAS_shs_vec 0 | ||
19 | -#define TCG_TARGET_HAS_shv_vec 0 | ||
20 | +#define TCG_TARGET_HAS_shv_vec 1 | ||
21 | #define TCG_TARGET_HAS_cmp_vec 1 | ||
22 | #define TCG_TARGET_HAS_mul_vec 0 | ||
23 | #define TCG_TARGET_HAS_sat_vec 1 | ||
24 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/tcg/ppc/tcg-target.inc.c | ||
27 | +++ b/tcg/ppc/tcg-target.inc.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
29 | #define VCMPGTUH VX4(582) | ||
30 | #define VCMPGTUW VX4(646) | ||
31 | |||
32 | +#define VSLB VX4(260) | ||
33 | +#define VSLH VX4(324) | ||
34 | +#define VSLW VX4(388) | ||
35 | +#define VSRB VX4(516) | ||
36 | +#define VSRH VX4(580) | ||
37 | +#define VSRW VX4(644) | ||
38 | +#define VSRAB VX4(772) | ||
39 | +#define VSRAH VX4(836) | ||
40 | +#define VSRAW VX4(900) | ||
41 | + | ||
42 | #define VAND VX4(1028) | ||
43 | #define VANDC VX4(1092) | ||
44 | #define VNOR VX4(1284) | ||
45 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
46 | case INDEX_op_sssub_vec: | ||
47 | case INDEX_op_usadd_vec: | ||
48 | case INDEX_op_ussub_vec: | ||
49 | + case INDEX_op_shlv_vec: | ||
50 | + case INDEX_op_shrv_vec: | ||
51 | + case INDEX_op_sarv_vec: | ||
52 | return vece <= MO_32; | ||
53 | case INDEX_op_cmp_vec: | ||
54 | + case INDEX_op_shli_vec: | ||
55 | + case INDEX_op_shri_vec: | ||
56 | + case INDEX_op_sari_vec: | ||
57 | return vece <= MO_32 ? -1 : 0; | ||
58 | default: | ||
59 | return 0; | ||
60 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
61 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
62 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
63 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
64 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
65 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
66 | + shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
67 | + shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
68 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
69 | |||
70 | TCGType type = vecl + TCG_TYPE_V64; | ||
71 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
73 | case INDEX_op_umax_vec: | ||
74 | insn = umax_op[vece]; | ||
75 | break; | ||
76 | + case INDEX_op_shlv_vec: | ||
77 | + insn = shlv_op[vece]; | ||
78 | + break; | ||
79 | + case INDEX_op_shrv_vec: | ||
80 | + insn = shrv_op[vece]; | ||
81 | + break; | ||
82 | + case INDEX_op_sarv_vec: | ||
83 | + insn = sarv_op[vece]; | ||
84 | + break; | ||
85 | case INDEX_op_and_vec: | ||
86 | insn = VAND; | ||
87 | break; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
89 | tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
90 | } | ||
91 | |||
92 | +static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, | ||
93 | + TCGv_vec v1, TCGArg imm, TCGOpcode opci) | ||
94 | +{ | ||
95 | + TCGv_vec t1 = tcg_temp_new_vec(type); | ||
96 | + | ||
97 | + /* Splat w/bytes for xxspltib. */ | ||
98 | + tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1)); | ||
99 | + vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), | ||
100 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); | ||
101 | + tcg_temp_free_vec(t1); | ||
102 | +} | ||
103 | + | ||
104 | static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
105 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
106 | { | ||
107 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
108 | { | ||
109 | va_list va; | ||
110 | TCGv_vec v0, v1, v2; | ||
111 | + TCGArg a2; | ||
112 | |||
113 | va_start(va, a0); | ||
114 | v0 = temp_tcgv_vec(arg_temp(a0)); | ||
115 | v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
116 | - v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
117 | + a2 = va_arg(va, TCGArg); | ||
118 | |||
119 | switch (opc) { | ||
120 | + case INDEX_op_shli_vec: | ||
121 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); | ||
122 | + break; | ||
123 | + case INDEX_op_shri_vec: | ||
124 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); | ||
125 | + break; | ||
126 | + case INDEX_op_sari_vec: | ||
127 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); | ||
128 | + break; | ||
129 | case INDEX_op_cmp_vec: | ||
130 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
131 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
132 | break; | ||
133 | default: | ||
134 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
135 | case INDEX_op_smin_vec: | ||
136 | case INDEX_op_umax_vec: | ||
137 | case INDEX_op_umin_vec: | ||
138 | + case INDEX_op_shlv_vec: | ||
139 | + case INDEX_op_shrv_vec: | ||
140 | + case INDEX_op_sarv_vec: | ||
141 | return &v_v_v; | ||
142 | case INDEX_op_not_vec: | ||
143 | case INDEX_op_dup_vec: | ||
144 | -- | ||
145 | 2.17.1 | ||
146 | |||
147 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For Altivec, this is always an expansion. | ||
2 | 1 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | ||
6 | tcg/ppc/tcg-target.h | 2 +- | ||
7 | tcg/ppc/tcg-target.opc.h | 8 +++ | ||
8 | tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++- | ||
9 | 3 files changed, 121 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.h | ||
14 | +++ b/tcg/ppc/tcg-target.h | ||
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
16 | #define TCG_TARGET_HAS_shs_vec 0 | ||
17 | #define TCG_TARGET_HAS_shv_vec 1 | ||
18 | #define TCG_TARGET_HAS_cmp_vec 1 | ||
19 | -#define TCG_TARGET_HAS_mul_vec 0 | ||
20 | +#define TCG_TARGET_HAS_mul_vec 1 | ||
21 | #define TCG_TARGET_HAS_sat_vec 1 | ||
22 | #define TCG_TARGET_HAS_minmax_vec 1 | ||
23 | #define TCG_TARGET_HAS_bitsel_vec 0 | ||
24 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/tcg/ppc/tcg-target.opc.h | ||
27 | +++ b/tcg/ppc/tcg-target.opc.h | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | ||
30 | * consider these to be UNSPEC with names. | ||
31 | */ | ||
32 | + | ||
33 | +DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC) | ||
34 | +DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC) | ||
35 | +DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) | ||
36 | +DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) | ||
37 | +DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) | ||
38 | +DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) | ||
39 | +DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) | ||
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/ppc/tcg-target.inc.c | ||
43 | +++ b/tcg/ppc/tcg-target.inc.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
45 | #define VSRAB VX4(772) | ||
46 | #define VSRAH VX4(836) | ||
47 | #define VSRAW VX4(900) | ||
48 | +#define VRLB VX4(4) | ||
49 | +#define VRLH VX4(68) | ||
50 | +#define VRLW VX4(132) | ||
51 | + | ||
52 | +#define VMULEUB VX4(520) | ||
53 | +#define VMULEUH VX4(584) | ||
54 | +#define VMULOUB VX4(8) | ||
55 | +#define VMULOUH VX4(72) | ||
56 | +#define VMSUMUHM VX4(38) | ||
57 | + | ||
58 | +#define VMRGHB VX4(12) | ||
59 | +#define VMRGHH VX4(76) | ||
60 | +#define VMRGHW VX4(140) | ||
61 | +#define VMRGLB VX4(268) | ||
62 | +#define VMRGLH VX4(332) | ||
63 | +#define VMRGLW VX4(396) | ||
64 | + | ||
65 | +#define VPKUHUM VX4(14) | ||
66 | +#define VPKUWUM VX4(78) | ||
67 | |||
68 | #define VAND VX4(1028) | ||
69 | #define VANDC VX4(1092) | ||
70 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
71 | case INDEX_op_sarv_vec: | ||
72 | return vece <= MO_32; | ||
73 | case INDEX_op_cmp_vec: | ||
74 | + case INDEX_op_mul_vec: | ||
75 | case INDEX_op_shli_vec: | ||
76 | case INDEX_op_shri_vec: | ||
77 | case INDEX_op_sari_vec: | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
79 | smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
80 | shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
81 | shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
82 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
83 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
84 | + mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
85 | + mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
86 | + muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
87 | + mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
88 | + pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
89 | + rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
90 | |||
91 | TCGType type = vecl + TCG_TYPE_V64; | ||
92 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
94 | } | ||
95 | break; | ||
96 | |||
97 | + case INDEX_op_ppc_mrgh_vec: | ||
98 | + insn = mrgh_op[vece]; | ||
99 | + break; | ||
100 | + case INDEX_op_ppc_mrgl_vec: | ||
101 | + insn = mrgl_op[vece]; | ||
102 | + break; | ||
103 | + case INDEX_op_ppc_muleu_vec: | ||
104 | + insn = muleu_op[vece]; | ||
105 | + break; | ||
106 | + case INDEX_op_ppc_mulou_vec: | ||
107 | + insn = mulou_op[vece]; | ||
108 | + break; | ||
109 | + case INDEX_op_ppc_pkum_vec: | ||
110 | + insn = pkum_op[vece]; | ||
111 | + break; | ||
112 | + case INDEX_op_ppc_rotl_vec: | ||
113 | + insn = rotl_op[vece]; | ||
114 | + break; | ||
115 | + case INDEX_op_ppc_msum_vec: | ||
116 | + tcg_debug_assert(vece == MO_16); | ||
117 | + tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); | ||
118 | + return; | ||
119 | + | ||
120 | case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
121 | case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
122 | case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
123 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
124 | } | ||
125 | } | ||
126 | |||
127 | +static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
128 | + TCGv_vec v1, TCGv_vec v2) | ||
129 | +{ | ||
130 | + TCGv_vec t1 = tcg_temp_new_vec(type); | ||
131 | + TCGv_vec t2 = tcg_temp_new_vec(type); | ||
132 | + TCGv_vec t3, t4; | ||
133 | + | ||
134 | + switch (vece) { | ||
135 | + case MO_8: | ||
136 | + case MO_16: | ||
137 | + vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), | ||
138 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
139 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), | ||
140 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
141 | + vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), | ||
142 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | ||
143 | + vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), | ||
144 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | ||
145 | + vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), | ||
146 | + tcgv_vec_arg(v0), tcgv_vec_arg(t1)); | ||
147 | + break; | ||
148 | + | ||
149 | + case MO_32: | ||
150 | + t3 = tcg_temp_new_vec(type); | ||
151 | + t4 = tcg_temp_new_vec(type); | ||
152 | + tcg_gen_dupi_vec(MO_8, t4, -16); | ||
153 | + vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1), | ||
154 | + tcgv_vec_arg(v2), tcgv_vec_arg(t4)); | ||
155 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), | ||
156 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
157 | + tcg_gen_dupi_vec(MO_8, t3, 0); | ||
158 | + vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3), | ||
159 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); | ||
160 | + vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3), | ||
161 | + tcgv_vec_arg(t3), tcgv_vec_arg(t4)); | ||
162 | + tcg_gen_add_vec(MO_32, v0, t2, t3); | ||
163 | + tcg_temp_free_vec(t3); | ||
164 | + tcg_temp_free_vec(t4); | ||
165 | + break; | ||
166 | + | ||
167 | + default: | ||
168 | + g_assert_not_reached(); | ||
169 | + } | ||
170 | + tcg_temp_free_vec(t1); | ||
171 | + tcg_temp_free_vec(t2); | ||
172 | +} | ||
173 | + | ||
174 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
175 | TCGArg a0, ...) | ||
176 | { | ||
177 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
178 | v2 = temp_tcgv_vec(arg_temp(a2)); | ||
179 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
180 | break; | ||
181 | + case INDEX_op_mul_vec: | ||
182 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
183 | + expand_vec_mul(type, vece, v0, v1, v2); | ||
184 | + break; | ||
185 | default: | ||
186 | g_assert_not_reached(); | ||
187 | } | ||
188 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
189 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
190 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
191 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
192 | + static const TCGTargetOpDef v_v_v_v | ||
193 | + = { .args_ct_str = { "v", "v", "v", "v" } }; | ||
194 | |||
195 | switch (op) { | ||
196 | case INDEX_op_goto_ptr: | ||
197 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
198 | |||
199 | case INDEX_op_add_vec: | ||
200 | case INDEX_op_sub_vec: | ||
201 | + case INDEX_op_mul_vec: | ||
202 | case INDEX_op_and_vec: | ||
203 | case INDEX_op_or_vec: | ||
204 | case INDEX_op_xor_vec: | ||
205 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
206 | case INDEX_op_shlv_vec: | ||
207 | case INDEX_op_shrv_vec: | ||
208 | case INDEX_op_sarv_vec: | ||
209 | + case INDEX_op_ppc_mrgh_vec: | ||
210 | + case INDEX_op_ppc_mrgl_vec: | ||
211 | + case INDEX_op_ppc_muleu_vec: | ||
212 | + case INDEX_op_ppc_mulou_vec: | ||
213 | + case INDEX_op_ppc_pkum_vec: | ||
214 | + case INDEX_op_ppc_rotl_vec: | ||
215 | return &v_v_v; | ||
216 | case INDEX_op_not_vec: | ||
217 | case INDEX_op_dup_vec: | ||
218 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
219 | case INDEX_op_st_vec: | ||
220 | case INDEX_op_dupm_vec: | ||
221 | return &v_r; | ||
222 | + case INDEX_op_ppc_msum_vec: | ||
223 | + return &v_v_v_v; | ||
224 | |||
225 | default: | ||
226 | return NULL; | ||
227 | -- | ||
228 | 2.17.1 | ||
229 | |||
230 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This is only used for 32-bit hosts. | ||
2 | 1 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | ||
6 | tcg/ppc/tcg-target.inc.c | 9 +++++++++ | ||
7 | 1 file changed, 9 insertions(+) | ||
8 | |||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/ppc/tcg-target.inc.c | ||
12 | +++ b/tcg/ppc/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
14 | } | ||
15 | break; | ||
16 | |||
17 | + case INDEX_op_dup2_vec: | ||
18 | + assert(TCG_TARGET_REG_BITS == 32); | ||
19 | + /* With inputs a1 = xLxx, a2 = xHxx */ | ||
20 | + tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ | ||
21 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ | ||
22 | + tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ | ||
23 | + return; | ||
24 | + | ||
25 | case INDEX_op_ppc_mrgh_vec: | ||
26 | insn = mrgh_op[vece]; | ||
27 | break; | ||
28 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
29 | case INDEX_op_ppc_mulou_vec: | ||
30 | case INDEX_op_ppc_pkum_vec: | ||
31 | case INDEX_op_ppc_rotl_vec: | ||
32 | + case INDEX_op_dup2_vec: | ||
33 | return &v_v_v; | ||
34 | case INDEX_op_not_vec: | ||
35 | case INDEX_op_dup_vec: | ||
36 | -- | ||
37 | 2.17.1 | ||
38 | |||
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Now that we have implemented the required tcg operations, | ||
2 | we can enable detection of host vector support. | ||
3 | 1 | ||
4 | Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32) | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/ppc/tcg-target.inc.c | 4 ++++ | ||
9 | 1 file changed, 4 insertions(+) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.inc.c | ||
14 | +++ b/tcg/ppc/tcg-target.inc.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
16 | have_isel = have_isa_2_06; | ||
17 | #endif | ||
18 | |||
19 | + if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | ||
20 | + have_altivec = true; | ||
21 | + } | ||
22 | + | ||
23 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
24 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | ||
25 | if (have_altivec) { | ||
26 | -- | ||
27 | 2.17.1 | ||
28 | |||
29 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The VSX instruction set instructions include double-word loads and | ||
2 | stores, double-word load and splat, double-word permute, and bit | ||
3 | select. All of which require multiple operations in the Altivec | ||
4 | instruction set. | ||
5 | 1 | ||
6 | Because the VSX registers map %vsr32 to %vr0, and we have no current | ||
7 | intention or need to use vector registers outside %vr0-%vr19, force | ||
8 | on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't | ||
9 | have to otherwise modify the VR[TABC] macros. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
13 | --- | ||
14 | tcg/ppc/tcg-target.h | 5 ++-- | ||
15 | tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++---- | ||
16 | 2 files changed, 51 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/tcg/ppc/tcg-target.h | ||
21 | +++ b/tcg/ppc/tcg-target.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
23 | |||
24 | extern TCGPowerISA have_isa; | ||
25 | extern bool have_altivec; | ||
26 | +extern bool have_vsx; | ||
27 | |||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
29 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
30 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
31 | * instruction and substituting two 32-bit stores makes the generated | ||
32 | * code quite large. | ||
33 | */ | ||
34 | -#define TCG_TARGET_HAS_v64 0 | ||
35 | +#define TCG_TARGET_HAS_v64 have_vsx | ||
36 | #define TCG_TARGET_HAS_v128 have_altivec | ||
37 | #define TCG_TARGET_HAS_v256 0 | ||
38 | |||
39 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
40 | #define TCG_TARGET_HAS_mul_vec 1 | ||
41 | #define TCG_TARGET_HAS_sat_vec 1 | ||
42 | #define TCG_TARGET_HAS_minmax_vec 1 | ||
43 | -#define TCG_TARGET_HAS_bitsel_vec 0 | ||
44 | +#define TCG_TARGET_HAS_bitsel_vec have_vsx | ||
45 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
46 | |||
47 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
48 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/tcg/ppc/tcg-target.inc.c | ||
51 | +++ b/tcg/ppc/tcg-target.inc.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | ||
53 | TCGPowerISA have_isa; | ||
54 | static bool have_isel; | ||
55 | bool have_altivec; | ||
56 | +bool have_vsx; | ||
57 | |||
58 | #ifndef CONFIG_SOFTMMU | ||
59 | #define TCG_GUEST_BASE_REG 30 | ||
60 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
61 | #define LVEBX XO31(7) | ||
62 | #define LVEHX XO31(39) | ||
63 | #define LVEWX XO31(71) | ||
64 | +#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | ||
65 | +#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | ||
66 | |||
67 | #define STVX XO31(231) | ||
68 | #define STVEWX XO31(199) | ||
69 | +#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | ||
70 | |||
71 | #define VADDSBS VX4(768) | ||
72 | #define VADDUBS VX4(512) | ||
73 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
74 | |||
75 | #define VSLDOI VX4(44) | ||
76 | |||
77 | +#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | ||
78 | +#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | ||
79 | + | ||
80 | #define RT(r) ((r)<<21) | ||
81 | #define RS(r) ((r)<<21) | ||
82 | #define RA(r) ((r)<<16) | ||
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
84 | add = 0; | ||
85 | } | ||
86 | |||
87 | - load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
88 | - if (TCG_TARGET_REG_BITS == 64) { | ||
89 | - new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
90 | + if (have_vsx) { | ||
91 | + load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; | ||
92 | + load_insn |= VRT(ret) | RB(TCG_REG_TMP1); | ||
93 | + if (TCG_TARGET_REG_BITS == 64) { | ||
94 | + new_pool_label(s, val, rel, s->code_ptr, add); | ||
95 | + } else { | ||
96 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
97 | + } | ||
98 | } else { | ||
99 | - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
100 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
101 | + if (TCG_TARGET_REG_BITS == 64) { | ||
102 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
103 | + } else { | ||
104 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
105 | + } | ||
106 | } | ||
107 | |||
108 | if (USE_REG_TB) { | ||
109 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
110 | /* fallthru */ | ||
111 | case TCG_TYPE_V64: | ||
112 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
113 | + if (have_vsx) { | ||
114 | + tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
115 | + break; | ||
116 | + } | ||
117 | tcg_debug_assert((offset & 7) == 0); | ||
118 | tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
119 | if (offset & 8) { | ||
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
121 | /* fallthru */ | ||
122 | case TCG_TYPE_V64: | ||
123 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
124 | + if (have_vsx) { | ||
125 | + tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | ||
126 | + break; | ||
127 | + } | ||
128 | tcg_debug_assert((offset & 7) == 0); | ||
129 | if (offset & 8) { | ||
130 | tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
131 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
132 | case INDEX_op_shri_vec: | ||
133 | case INDEX_op_sari_vec: | ||
134 | return vece <= MO_32 ? -1 : 0; | ||
135 | + case INDEX_op_bitsel_vec: | ||
136 | + return have_vsx; | ||
137 | default: | ||
138 | return 0; | ||
139 | } | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
141 | tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
142 | break; | ||
143 | case MO_64: | ||
144 | + if (have_vsx) { | ||
145 | + tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); | ||
146 | + break; | ||
147 | + } | ||
148 | tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
149 | tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
150 | break; | ||
151 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
152 | tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
153 | break; | ||
154 | case MO_64: | ||
155 | + if (have_vsx) { | ||
156 | + tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); | ||
157 | + break; | ||
158 | + } | ||
159 | tcg_debug_assert((offset & 7) == 0); | ||
160 | tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
161 | tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
162 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
163 | } | ||
164 | break; | ||
165 | |||
166 | + case INDEX_op_bitsel_vec: | ||
167 | + tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); | ||
168 | + return; | ||
169 | + | ||
170 | case INDEX_op_dup2_vec: | ||
171 | assert(TCG_TARGET_REG_BITS == 32); | ||
172 | /* With inputs a1 = xLxx, a2 = xHxx */ | ||
173 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
174 | case INDEX_op_st_vec: | ||
175 | case INDEX_op_dupm_vec: | ||
176 | return &v_r; | ||
177 | + case INDEX_op_bitsel_vec: | ||
178 | case INDEX_op_ppc_msum_vec: | ||
179 | return &v_v_v_v; | ||
180 | |||
181 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
182 | |||
183 | if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | ||
184 | have_altivec = true; | ||
185 | + /* We only care about the portion of VSX that overlaps Altivec. */ | ||
186 | + if (hwcap & PPC_FEATURE_HAS_VSX) { | ||
187 | + have_vsx = true; | ||
188 | + } | ||
189 | } | ||
190 | |||
191 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
192 | -- | ||
193 | 2.17.1 | ||
194 | |||
195 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are conditional only on MSR.VEC and | ||
2 | are thus part of the Altivec instruction set, and not VSX. | ||
3 | This includes lots of double-word arithmetic and a few extra | ||
4 | logical operations. | ||
5 | 1 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/ppc/tcg-target.h | 4 +- | ||
10 | tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++---------- | ||
11 | 2 files changed, 67 insertions(+), 22 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/ppc/tcg-target.h | ||
16 | +++ b/tcg/ppc/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
18 | typedef enum { | ||
19 | tcg_isa_base, | ||
20 | tcg_isa_2_06, | ||
21 | + tcg_isa_2_07, | ||
22 | tcg_isa_3_00, | ||
23 | } TCGPowerISA; | ||
24 | |||
25 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
26 | extern bool have_vsx; | ||
27 | |||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
29 | +#define have_isa_2_07 (have_isa >= tcg_isa_2_07) | ||
30 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | ||
34 | #define TCG_TARGET_HAS_v256 0 | ||
35 | |||
36 | #define TCG_TARGET_HAS_andc_vec 1 | ||
37 | -#define TCG_TARGET_HAS_orc_vec 0 | ||
38 | +#define TCG_TARGET_HAS_orc_vec have_isa_2_07 | ||
39 | #define TCG_TARGET_HAS_not_vec 1 | ||
40 | #define TCG_TARGET_HAS_neg_vec 0 | ||
41 | #define TCG_TARGET_HAS_abs_vec 0 | ||
42 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/ppc/tcg-target.inc.c | ||
45 | +++ b/tcg/ppc/tcg-target.inc.c | ||
46 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
47 | #define VADDSWS VX4(896) | ||
48 | #define VADDUWS VX4(640) | ||
49 | #define VADDUWM VX4(128) | ||
50 | +#define VADDUDM VX4(192) /* v2.07 */ | ||
51 | |||
52 | #define VSUBSBS VX4(1792) | ||
53 | #define VSUBUBS VX4(1536) | ||
54 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
55 | #define VSUBSWS VX4(1920) | ||
56 | #define VSUBUWS VX4(1664) | ||
57 | #define VSUBUWM VX4(1152) | ||
58 | +#define VSUBUDM VX4(1216) /* v2.07 */ | ||
59 | |||
60 | #define VMAXSB VX4(258) | ||
61 | #define VMAXSH VX4(322) | ||
62 | #define VMAXSW VX4(386) | ||
63 | +#define VMAXSD VX4(450) /* v2.07 */ | ||
64 | #define VMAXUB VX4(2) | ||
65 | #define VMAXUH VX4(66) | ||
66 | #define VMAXUW VX4(130) | ||
67 | +#define VMAXUD VX4(194) /* v2.07 */ | ||
68 | #define VMINSB VX4(770) | ||
69 | #define VMINSH VX4(834) | ||
70 | #define VMINSW VX4(898) | ||
71 | +#define VMINSD VX4(962) /* v2.07 */ | ||
72 | #define VMINUB VX4(514) | ||
73 | #define VMINUH VX4(578) | ||
74 | #define VMINUW VX4(642) | ||
75 | +#define VMINUD VX4(706) /* v2.07 */ | ||
76 | |||
77 | #define VCMPEQUB VX4(6) | ||
78 | #define VCMPEQUH VX4(70) | ||
79 | #define VCMPEQUW VX4(134) | ||
80 | +#define VCMPEQUD VX4(199) /* v2.07 */ | ||
81 | #define VCMPGTSB VX4(774) | ||
82 | #define VCMPGTSH VX4(838) | ||
83 | #define VCMPGTSW VX4(902) | ||
84 | +#define VCMPGTSD VX4(967) /* v2.07 */ | ||
85 | #define VCMPGTUB VX4(518) | ||
86 | #define VCMPGTUH VX4(582) | ||
87 | #define VCMPGTUW VX4(646) | ||
88 | +#define VCMPGTUD VX4(711) /* v2.07 */ | ||
89 | |||
90 | #define VSLB VX4(260) | ||
91 | #define VSLH VX4(324) | ||
92 | #define VSLW VX4(388) | ||
93 | +#define VSLD VX4(1476) /* v2.07 */ | ||
94 | #define VSRB VX4(516) | ||
95 | #define VSRH VX4(580) | ||
96 | #define VSRW VX4(644) | ||
97 | +#define VSRD VX4(1732) /* v2.07 */ | ||
98 | #define VSRAB VX4(772) | ||
99 | #define VSRAH VX4(836) | ||
100 | #define VSRAW VX4(900) | ||
101 | +#define VSRAD VX4(964) /* v2.07 */ | ||
102 | #define VRLB VX4(4) | ||
103 | #define VRLH VX4(68) | ||
104 | #define VRLW VX4(132) | ||
105 | +#define VRLD VX4(196) /* v2.07 */ | ||
106 | |||
107 | #define VMULEUB VX4(520) | ||
108 | #define VMULEUH VX4(584) | ||
109 | +#define VMULEUW VX4(648) /* v2.07 */ | ||
110 | #define VMULOUB VX4(8) | ||
111 | #define VMULOUH VX4(72) | ||
112 | +#define VMULOUW VX4(136) /* v2.07 */ | ||
113 | +#define VMULUWM VX4(137) /* v2.07 */ | ||
114 | #define VMSUMUHM VX4(38) | ||
115 | |||
116 | #define VMRGHB VX4(12) | ||
117 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
118 | #define VNOR VX4(1284) | ||
119 | #define VOR VX4(1156) | ||
120 | #define VXOR VX4(1220) | ||
121 | +#define VEQV VX4(1668) /* v2.07 */ | ||
122 | +#define VNAND VX4(1412) /* v2.07 */ | ||
123 | +#define VORC VX4(1348) /* v2.07 */ | ||
124 | |||
125 | #define VSPLTB VX4(524) | ||
126 | #define VSPLTH VX4(588) | ||
127 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
128 | case INDEX_op_andc_vec: | ||
129 | case INDEX_op_not_vec: | ||
130 | return 1; | ||
131 | + case INDEX_op_orc_vec: | ||
132 | + return have_isa_2_07; | ||
133 | case INDEX_op_add_vec: | ||
134 | case INDEX_op_sub_vec: | ||
135 | case INDEX_op_smax_vec: | ||
136 | case INDEX_op_smin_vec: | ||
137 | case INDEX_op_umax_vec: | ||
138 | case INDEX_op_umin_vec: | ||
139 | + case INDEX_op_shlv_vec: | ||
140 | + case INDEX_op_shrv_vec: | ||
141 | + case INDEX_op_sarv_vec: | ||
142 | + return vece <= MO_32 || have_isa_2_07; | ||
143 | case INDEX_op_ssadd_vec: | ||
144 | case INDEX_op_sssub_vec: | ||
145 | case INDEX_op_usadd_vec: | ||
146 | case INDEX_op_ussub_vec: | ||
147 | - case INDEX_op_shlv_vec: | ||
148 | - case INDEX_op_shrv_vec: | ||
149 | - case INDEX_op_sarv_vec: | ||
150 | return vece <= MO_32; | ||
151 | case INDEX_op_cmp_vec: | ||
152 | - case INDEX_op_mul_vec: | ||
153 | case INDEX_op_shli_vec: | ||
154 | case INDEX_op_shri_vec: | ||
155 | case INDEX_op_sari_vec: | ||
156 | - return vece <= MO_32 ? -1 : 0; | ||
157 | + return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
158 | + case INDEX_op_mul_vec: | ||
159 | + switch (vece) { | ||
160 | + case MO_8: | ||
161 | + case MO_16: | ||
162 | + return -1; | ||
163 | + case MO_32: | ||
164 | + return have_isa_2_07 ? 1 : -1; | ||
165 | + } | ||
166 | + return 0; | ||
167 | case INDEX_op_bitsel_vec: | ||
168 | return have_vsx; | ||
169 | default: | ||
170 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
171 | const TCGArg *args, const int *const_args) | ||
172 | { | ||
173 | static const uint32_t | ||
174 | - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | ||
175 | - sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | ||
176 | - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
177 | - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
178 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
179 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
180 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
181 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
182 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
183 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
184 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
185 | usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
186 | sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
187 | ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
188 | - umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
189 | - smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
190 | - umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
191 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
192 | - shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
193 | - shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
194 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
195 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, | ||
196 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, | ||
197 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, | ||
198 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, | ||
199 | + shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, | ||
200 | + shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, | ||
201 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, | ||
202 | mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
203 | mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
204 | - muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
205 | - mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
206 | + muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, | ||
207 | + mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, | ||
208 | pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
209 | - rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
210 | + rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; | ||
211 | |||
212 | TCGType type = vecl + TCG_TYPE_V64; | ||
213 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
214 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
215 | case INDEX_op_sub_vec: | ||
216 | insn = sub_op[vece]; | ||
217 | break; | ||
218 | + case INDEX_op_mul_vec: | ||
219 | + tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
220 | + insn = VMULUWM; | ||
221 | + break; | ||
222 | case INDEX_op_ssadd_vec: | ||
223 | insn = ssadd_op[vece]; | ||
224 | break; | ||
225 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
226 | insn = VNOR; | ||
227 | a2 = a1; | ||
228 | break; | ||
229 | + case INDEX_op_orc_vec: | ||
230 | + insn = VORC; | ||
231 | + break; | ||
232 | |||
233 | case INDEX_op_cmp_vec: | ||
234 | switch (args[3]) { | ||
235 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
236 | { | ||
237 | bool need_swap = false, need_inv = false; | ||
238 | |||
239 | - tcg_debug_assert(vece <= MO_32); | ||
240 | + tcg_debug_assert(vece <= MO_32 || have_isa_2_07); | ||
241 | |||
242 | switch (cond) { | ||
243 | case TCG_COND_EQ: | ||
244 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
245 | break; | ||
246 | |||
247 | case MO_32: | ||
248 | + tcg_debug_assert(!have_isa_2_07); | ||
249 | t3 = tcg_temp_new_vec(type); | ||
250 | t4 = tcg_temp_new_vec(type); | ||
251 | tcg_gen_dupi_vec(MO_8, t4, -16); | ||
252 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
253 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
254 | have_isa = tcg_isa_2_06; | ||
255 | } | ||
256 | +#ifdef PPC_FEATURE2_ARCH_2_07 | ||
257 | + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { | ||
258 | + have_isa = tcg_isa_2_07; | ||
259 | + } | ||
260 | +#endif | ||
261 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
262 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
263 | have_isa = tcg_isa_3_00; | ||
264 | -- | ||
265 | 2.17.1 | ||
266 | |||
267 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are conditional only on MSR.VSX and | ||
2 | are thus part of the VSX instruction set, and not Altivec. | ||
3 | This includes double-word loads and stores. | ||
4 | 1 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/ppc/tcg-target.inc.c | 11 +++++++++++ | ||
9 | 1 file changed, 11 insertions(+) | ||
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/ppc/tcg-target.inc.c | ||
14 | +++ b/tcg/ppc/tcg-target.inc.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
16 | #define LVEWX XO31(71) | ||
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | ||
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | ||
19 | +#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | ||
20 | |||
21 | #define STVX XO31(231) | ||
22 | #define STVEWX XO31(199) | ||
23 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | ||
24 | +#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | ||
25 | |||
26 | #define VADDSBS VX4(768) | ||
27 | #define VADDUBS VX4(512) | ||
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
29 | tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | ||
30 | break; | ||
31 | } | ||
32 | + if (have_isa_2_07 && have_vsx) { | ||
33 | + tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); | ||
34 | + break; | ||
35 | + } | ||
36 | tcg_debug_assert((offset & 3) == 0); | ||
37 | tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
38 | shift = (offset - 4) & 0xc; | ||
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
40 | tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
41 | break; | ||
42 | } | ||
43 | + if (have_isa_2_07 && have_vsx) { | ||
44 | + tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); | ||
45 | + break; | ||
46 | + } | ||
47 | + assert((offset & 3) == 0); | ||
48 | tcg_debug_assert((offset & 3) == 0); | ||
49 | shift = (offset - 4) & 0xc; | ||
50 | if (shift) { | ||
51 | -- | ||
52 | 2.17.1 | ||
53 | |||
54 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are conditional on MSR.FP when TX=0 and | ||
2 | MSR.VEC when TX=1. Since we only care about the Altivec registers, | ||
3 | and force TX=1, we can consider these to be Altivec instructions. | ||
4 | Since Altivec is true for any use of vector types, we only need | ||
5 | test have_isa_2_07. | ||
6 | 1 | ||
7 | This includes moves to and from the integer registers. | ||
8 | |||
9 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------ | ||
13 | 1 file changed, 26 insertions(+), 6 deletions(-) | ||
14 | |||
15 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/tcg/ppc/tcg-target.inc.c | ||
18 | +++ b/tcg/ppc/tcg-target.inc.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
20 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | ||
21 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | ||
22 | |||
23 | +#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | ||
24 | +#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | ||
25 | +#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | ||
26 | +#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | ||
27 | + | ||
28 | #define RT(r) ((r)<<21) | ||
29 | #define RS(r) ((r)<<21) | ||
30 | #define RA(r) ((r)<<16) | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | ||
32 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
33 | /* fallthru */ | ||
34 | case TCG_TYPE_I32: | ||
35 | - if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | ||
36 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
37 | - break; | ||
38 | - } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | ||
39 | - /* Altivec does not support vector/integer moves. */ | ||
40 | - return false; | ||
41 | + if (ret < TCG_REG_V0) { | ||
42 | + if (arg < TCG_REG_V0) { | ||
43 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
44 | + break; | ||
45 | + } else if (have_isa_2_07) { | ||
46 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) | ||
47 | + | VRT(arg) | RA(ret)); | ||
48 | + break; | ||
49 | + } else { | ||
50 | + /* Altivec does not support vector->integer moves. */ | ||
51 | + return false; | ||
52 | + } | ||
53 | + } else if (arg < TCG_REG_V0) { | ||
54 | + if (have_isa_2_07) { | ||
55 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) | ||
56 | + | VRT(ret) | RA(arg)); | ||
57 | + break; | ||
58 | + } else { | ||
59 | + /* Altivec does not support integer->vector moves. */ | ||
60 | + return false; | ||
61 | + } | ||
62 | } | ||
63 | /* fallthru */ | ||
64 | case TCG_TYPE_V64: | ||
65 | -- | ||
66 | 2.17.1 | ||
67 | |||
68 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are conditional only on MSR.VEC and | ||
2 | are thus part of the Altivec instruction set, and not VSX. | ||
3 | This includes negation and compare not equal. | ||
4 | 1 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/ppc/tcg-target.h | 2 +- | ||
9 | tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++ | ||
10 | 2 files changed, 24 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/ppc/tcg-target.h | ||
15 | +++ b/tcg/ppc/tcg-target.h | ||
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | ||
17 | #define TCG_TARGET_HAS_andc_vec 1 | ||
18 | #define TCG_TARGET_HAS_orc_vec have_isa_2_07 | ||
19 | #define TCG_TARGET_HAS_not_vec 1 | ||
20 | -#define TCG_TARGET_HAS_neg_vec 0 | ||
21 | +#define TCG_TARGET_HAS_neg_vec have_isa_3_00 | ||
22 | #define TCG_TARGET_HAS_abs_vec 0 | ||
23 | #define TCG_TARGET_HAS_shi_vec 0 | ||
24 | #define TCG_TARGET_HAS_shs_vec 0 | ||
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/ppc/tcg-target.inc.c | ||
28 | +++ b/tcg/ppc/tcg-target.inc.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
30 | #define VSUBUWM VX4(1152) | ||
31 | #define VSUBUDM VX4(1216) /* v2.07 */ | ||
32 | |||
33 | +#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ | ||
34 | +#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ | ||
35 | + | ||
36 | #define VMAXSB VX4(258) | ||
37 | #define VMAXSH VX4(322) | ||
38 | #define VMAXSW VX4(386) | ||
39 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
40 | #define VCMPGTUH VX4(582) | ||
41 | #define VCMPGTUW VX4(646) | ||
42 | #define VCMPGTUD VX4(711) /* v2.07 */ | ||
43 | +#define VCMPNEB VX4(7) /* v3.00 */ | ||
44 | +#define VCMPNEH VX4(71) /* v3.00 */ | ||
45 | +#define VCMPNEW VX4(135) /* v3.00 */ | ||
46 | |||
47 | #define VSLB VX4(260) | ||
48 | #define VSLH VX4(324) | ||
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
50 | case INDEX_op_shri_vec: | ||
51 | case INDEX_op_sari_vec: | ||
52 | return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
53 | + case INDEX_op_neg_vec: | ||
54 | + return vece >= MO_32 && have_isa_3_00; | ||
55 | case INDEX_op_mul_vec: | ||
56 | switch (vece) { | ||
57 | case MO_8: | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
59 | static const uint32_t | ||
60 | add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
61 | sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
62 | + neg_op[4] = { 0, 0, VNEGW, VNEGD }, | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
64 | + ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, | ||
65 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
66 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
67 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
69 | case INDEX_op_sub_vec: | ||
70 | insn = sub_op[vece]; | ||
71 | break; | ||
72 | + case INDEX_op_neg_vec: | ||
73 | + insn = neg_op[vece]; | ||
74 | + a2 = a1; | ||
75 | + a1 = 0; | ||
76 | + break; | ||
77 | case INDEX_op_mul_vec: | ||
78 | tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
79 | insn = VMULUWM; | ||
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
81 | case TCG_COND_EQ: | ||
82 | insn = eq_op[vece]; | ||
83 | break; | ||
84 | + case TCG_COND_NE: | ||
85 | + insn = ne_op[vece]; | ||
86 | + break; | ||
87 | case TCG_COND_GT: | ||
88 | insn = gts_op[vece]; | ||
89 | break; | ||
90 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
91 | case TCG_COND_GTU: | ||
92 | break; | ||
93 | case TCG_COND_NE: | ||
94 | + if (have_isa_3_00 && vece <= MO_32) { | ||
95 | + break; | ||
96 | + } | ||
97 | + /* fall through */ | ||
98 | case TCG_COND_LE: | ||
99 | case TCG_COND_LEU: | ||
100 | need_inv = true; | ||
101 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
102 | case INDEX_op_dup2_vec: | ||
103 | return &v_v_v; | ||
104 | case INDEX_op_not_vec: | ||
105 | + case INDEX_op_neg_vec: | ||
106 | case INDEX_op_dup_vec: | ||
107 | return &v_v; | ||
108 | case INDEX_op_ld_vec: | ||
109 | -- | ||
110 | 2.17.1 | ||
111 | |||
112 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are a mix of those like LXSD that are | ||
2 | only conditional only on MSR.VEC and those like LXV that are | ||
3 | conditional on MSR.VEC for TX=1. Thus, in the end, we can | ||
4 | consider all of these as Altivec instructions. | ||
5 | 1 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++-------- | ||
10 | 1 file changed, 38 insertions(+), 9 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/ppc/tcg-target.inc.c | ||
15 | +++ b/tcg/ppc/tcg-target.inc.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | ||
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | ||
19 | #define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | ||
20 | +#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ | ||
21 | +#define LXSD (OPCD(57) | 2) /* v3.00 */ | ||
22 | +#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ | ||
23 | |||
24 | #define STVX XO31(231) | ||
25 | #define STVEWX XO31(199) | ||
26 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | ||
27 | #define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | ||
28 | +#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ | ||
29 | +#define STXSD (OPCD(61) | 2) /* v3.00 */ | ||
30 | |||
31 | #define VADDSBS VX4(768) | ||
32 | #define VADDUBS VX4(512) | ||
33 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
34 | TCGReg base, tcg_target_long offset) | ||
35 | { | ||
36 | tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; | ||
37 | - bool is_store = false; | ||
38 | + bool is_int_store = false; | ||
39 | TCGReg rs = TCG_REG_TMP1; | ||
40 | |||
41 | switch (opi) { | ||
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
43 | break; | ||
44 | } | ||
45 | break; | ||
46 | + case LXSD: | ||
47 | + case STXSD: | ||
48 | + align = 3; | ||
49 | + break; | ||
50 | + case LXV: | ||
51 | + case STXV: | ||
52 | + align = 15; | ||
53 | + break; | ||
54 | case STD: | ||
55 | align = 3; | ||
56 | /* FALLTHRU */ | ||
57 | case STB: case STH: case STW: | ||
58 | - is_store = true; | ||
59 | + is_int_store = true; | ||
60 | break; | ||
61 | } | ||
62 | |||
63 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
64 | if (rs == base) { | ||
65 | rs = TCG_REG_R0; | ||
66 | } | ||
67 | - tcg_debug_assert(!is_store || rs != rt); | ||
68 | + tcg_debug_assert(!is_int_store || rs != rt); | ||
69 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
70 | tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
71 | return; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
73 | case TCG_TYPE_V64: | ||
74 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
75 | if (have_vsx) { | ||
76 | - tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
77 | + tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, | ||
78 | + ret, base, offset); | ||
79 | break; | ||
80 | } | ||
81 | tcg_debug_assert((offset & 7) == 0); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
83 | case TCG_TYPE_V128: | ||
84 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
85 | tcg_debug_assert((offset & 15) == 0); | ||
86 | - tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
87 | + tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, | ||
88 | + LVX, ret, base, offset); | ||
89 | break; | ||
90 | default: | ||
91 | g_assert_not_reached(); | ||
92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
93 | case TCG_TYPE_V64: | ||
94 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
95 | if (have_vsx) { | ||
96 | - tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | ||
97 | + tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, | ||
98 | + STXSDX, arg, base, offset); | ||
99 | break; | ||
100 | } | ||
101 | tcg_debug_assert((offset & 7) == 0); | ||
102 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
103 | break; | ||
104 | case TCG_TYPE_V128: | ||
105 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
106 | - tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
107 | + tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, | ||
108 | + STVX, arg, base, offset); | ||
109 | break; | ||
110 | default: | ||
111 | g_assert_not_reached(); | ||
112 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
113 | tcg_debug_assert(out >= TCG_REG_V0); | ||
114 | switch (vece) { | ||
115 | case MO_8: | ||
116 | - tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
117 | + if (have_isa_3_00) { | ||
118 | + tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); | ||
119 | + } else { | ||
120 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
121 | + } | ||
122 | elt = extract32(offset, 0, 4); | ||
123 | #ifndef HOST_WORDS_BIGENDIAN | ||
124 | elt ^= 15; | ||
125 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
126 | break; | ||
127 | case MO_16: | ||
128 | tcg_debug_assert((offset & 1) == 0); | ||
129 | - tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
130 | + if (have_isa_3_00) { | ||
131 | + tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); | ||
132 | + } else { | ||
133 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
134 | + } | ||
135 | elt = extract32(offset, 1, 3); | ||
136 | #ifndef HOST_WORDS_BIGENDIAN | ||
137 | elt ^= 7; | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
139 | tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
140 | break; | ||
141 | case MO_32: | ||
142 | + if (have_isa_3_00) { | ||
143 | + tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); | ||
144 | + break; | ||
145 | + } | ||
146 | tcg_debug_assert((offset & 3) == 0); | ||
147 | tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
148 | elt = extract32(offset, 2, 2); | ||
149 | -- | ||
150 | 2.17.1 | ||
151 | |||
152 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These new instructions are conditional on MSR.VEC for TX=1, | ||
2 | so we can consider these Altivec instructions. | ||
3 | 1 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++-- | ||
8 | 1 file changed, 26 insertions(+), 2 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/ppc/tcg-target.inc.c | ||
13 | +++ b/tcg/ppc/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
15 | |||
16 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | ||
17 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | ||
18 | +#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ | ||
19 | |||
20 | #define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | ||
21 | #define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | ||
22 | #define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | ||
23 | #define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | ||
24 | +#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ | ||
25 | +#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ | ||
26 | |||
27 | #define RT(r) ((r)<<21) | ||
28 | #define RS(r) ((r)<<21) | ||
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
30 | return; | ||
31 | } | ||
32 | } | ||
33 | + if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { | ||
34 | + tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); | ||
35 | + return; | ||
36 | + } | ||
37 | |||
38 | /* | ||
39 | * Otherwise we must load the value from the constant pool. | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
41 | TCGReg dst, TCGReg src) | ||
42 | { | ||
43 | tcg_debug_assert(dst >= TCG_REG_V0); | ||
44 | - tcg_debug_assert(src >= TCG_REG_V0); | ||
45 | + | ||
46 | + /* Splat from integer reg allowed via constraints for v3.00. */ | ||
47 | + if (src < TCG_REG_V0) { | ||
48 | + tcg_debug_assert(have_isa_3_00); | ||
49 | + switch (vece) { | ||
50 | + case MO_64: | ||
51 | + tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); | ||
52 | + return true; | ||
53 | + case MO_32: | ||
54 | + tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); | ||
55 | + return true; | ||
56 | + default: | ||
57 | + /* Fail, so that we fall back on either dupm or mov+dup. */ | ||
58 | + return false; | ||
59 | + } | ||
60 | + } | ||
61 | |||
62 | /* | ||
63 | * Recall we use (or emulate) VSX integer loads, so the integer is | ||
64 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
65 | static const TCGTargetOpDef sub2 | ||
66 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
67 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
68 | + static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } }; | ||
69 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
70 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
71 | static const TCGTargetOpDef v_v_v_v | ||
72 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
73 | return &v_v_v; | ||
74 | case INDEX_op_not_vec: | ||
75 | case INDEX_op_neg_vec: | ||
76 | - case INDEX_op_dup_vec: | ||
77 | return &v_v; | ||
78 | + case INDEX_op_dup_vec: | ||
79 | + return have_isa_3_00 ? &v_vr : &v_v; | ||
80 | case INDEX_op_ld_vec: | ||
81 | case INDEX_op_st_vec: | ||
82 | case INDEX_op_dupm_vec: | ||
83 | -- | ||
84 | 2.17.1 | ||
85 | |||
86 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | DisasContextBase.pc_next has type vaddr; use the correct log format. |
---|---|---|---|
2 | 2 | ||
3 | qemu_cpu_kick is used for a number of reasons including to indicate | 3 | Fixes: 85c19af63e7 ("include/exec: Use vaddr in DisasContextBase for virtual addresses") |
4 | there is work to be done. However when thread=single the old | ||
5 | qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one | ||
6 | which can lead to a hang in the case that: | ||
7 | |||
8 | a) the kick is from outside the vCPUs (e.g. iothread) | ||
9 | b) the timers are paused (i.e. iothread calling run_on_cpu) | ||
10 | |||
11 | To avoid this lets split qemu_cpu_kick_rr into two functions. One for | ||
12 | the timer which continues to advance to the next timeslice and another | ||
13 | for all other kicks. | ||
14 | |||
15 | Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org> | ||
16 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
20 | --- | 5 | --- |
21 | cpus.c | 24 ++++++++++++++++++------ | 6 | target/mips/tcg/octeon_translate.c | 4 ++-- |
22 | 1 file changed, 18 insertions(+), 6 deletions(-) | 7 | 1 file changed, 2 insertions(+), 2 deletions(-) |
23 | 8 | ||
24 | diff --git a/cpus.c b/cpus.c | 9 | diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c |
25 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/cpus.c | 11 | --- a/target/mips/tcg/octeon_translate.c |
27 | +++ b/cpus.c | 12 | +++ b/target/mips/tcg/octeon_translate.c |
28 | @@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void) | 13 | @@ -XXX,XX +XXX,XX @@ static bool trans_BBIT(DisasContext *ctx, arg_BBIT *a) |
29 | return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; | 14 | TCGv p; |
30 | } | 15 | |
31 | 16 | if (ctx->hflags & MIPS_HFLAG_BMASK) { | |
32 | -/* Kick the currently round-robin scheduled vCPU */ | 17 | - LOG_DISAS("Branch in delay / forbidden slot at PC 0x" |
33 | -static void qemu_cpu_kick_rr_cpu(void) | 18 | - TARGET_FMT_lx "\n", ctx->base.pc_next); |
34 | +/* Kick the currently round-robin scheduled vCPU to next */ | 19 | + LOG_DISAS("Branch in delay / forbidden slot at PC 0x%" VADDR_PRIx "\n", |
35 | +static void qemu_cpu_kick_rr_next_cpu(void) | 20 | + ctx->base.pc_next); |
36 | { | 21 | generate_exception_end(ctx, EXCP_RI); |
37 | CPUState *cpu; | 22 | return true; |
38 | do { | 23 | } |
39 | @@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void) | ||
40 | } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); | ||
41 | } | ||
42 | |||
43 | +/* Kick all RR vCPUs */ | ||
44 | +static void qemu_cpu_kick_rr_cpus(void) | ||
45 | +{ | ||
46 | + CPUState *cpu; | ||
47 | + | ||
48 | + CPU_FOREACH(cpu) { | ||
49 | + cpu_exit(cpu); | ||
50 | + }; | ||
51 | +} | ||
52 | + | ||
53 | static void do_nothing(CPUState *cpu, run_on_cpu_data unused) | ||
54 | { | ||
55 | } | ||
56 | @@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type) | ||
57 | static void kick_tcg_thread(void *opaque) | ||
58 | { | ||
59 | timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); | ||
60 | - qemu_cpu_kick_rr_cpu(); | ||
61 | + qemu_cpu_kick_rr_next_cpu(); | ||
62 | } | ||
63 | |||
64 | static void start_tcg_kick_timer(void) | ||
65 | @@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu) | ||
66 | { | ||
67 | qemu_cond_broadcast(cpu->halt_cond); | ||
68 | if (tcg_enabled()) { | ||
69 | - cpu_exit(cpu); | ||
70 | - /* NOP unless doing single-thread RR */ | ||
71 | - qemu_cpu_kick_rr_cpu(); | ||
72 | + if (qemu_tcg_mttcg_enabled()) { | ||
73 | + cpu_exit(cpu); | ||
74 | + } else { | ||
75 | + qemu_cpu_kick_rr_cpus(); | ||
76 | + } | ||
77 | } else { | ||
78 | if (hax_enabled()) { | ||
79 | /* | ||
80 | -- | 24 | -- |
81 | 2.17.1 | 25 | 2.43.0 |
82 | |||
83 | diff view generated by jsdifflib |