1
The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb:
1
v3: One more try to fix macos issues.
2
2
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100)
3
4
r~
5
6
7
8
The following changes since commit e0209297cddd5e10a07e15fac5cca7aa1a8e0e59:
9
10
Merge tag 'pull-ufs-20250217' of https://gitlab.com/jeuk20.kim/qemu into staging (2025-02-18 10:58:48 +0800)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20191013
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250215-3
8
15
9
for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328:
16
for you to fetch changes up to e726f65867087d86436de05e9f372a86ec1381a6:
10
17
11
cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400)
18
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 (2025-02-18 08:29:03 -0800)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Host vector support for tcg/ppc.
21
tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS
15
Fix thread=single cpu kicking.
22
tcg: Cleanups after disallowing 64-on-32
23
tcg: Introduce constraint for zero register
24
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64
25
tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2
26
linux-user: Move TARGET_SA_RESTORER out of generic/signal.h
27
linux-user: Fix alignment when unmapping excess reservation
28
target/sparc: Fix register selection for all F*TOx and FxTO* instructions
29
target/sparc: Fix gdbstub incorrectly handling registers f32-f62
30
target/sparc: fake UltraSPARC T1 PCR and PIC registers
16
31
17
----------------------------------------------------------------
32
----------------------------------------------------------------
18
Alex Bennée (1):
33
Andreas Schwab (1):
19
cpus: kick all vCPUs when running thread=single
34
linux-user: Move TARGET_SA_RESTORER out of generic/signal.h
20
35
21
Richard Henderson (22):
36
Artyom Tarasenko (1):
22
tcg/ppc: Introduce Altivec registers
37
target/sparc: fake UltraSPARC T1 PCR and PIC registers
23
tcg/ppc: Introduce macro VX4()
24
tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC()
25
tcg/ppc: Create TCGPowerISA and have_isa
26
tcg/ppc: Replace HAVE_ISA_2_06
27
tcg/ppc: Replace HAVE_ISEL macro with a variable
28
tcg/ppc: Enable tcg backend vector compilation
29
tcg/ppc: Add support for load/store/logic/comparison
30
tcg/ppc: Add support for vector maximum/minimum
31
tcg/ppc: Add support for vector add/subtract
32
tcg/ppc: Add support for vector saturated add/subtract
33
tcg/ppc: Support vector shift by immediate
34
tcg/ppc: Support vector multiply
35
tcg/ppc: Support vector dup2
36
tcg/ppc: Enable Altivec detection
37
tcg/ppc: Update vector support for VSX
38
tcg/ppc: Update vector support for v2.07 Altivec
39
tcg/ppc: Update vector support for v2.07 VSX
40
tcg/ppc: Update vector support for v2.07 FP
41
tcg/ppc: Update vector support for v3.00 Altivec
42
tcg/ppc: Update vector support for v3.00 load/store
43
tcg/ppc: Update vector support for v3.00 dup/dupi
44
38
45
tcg/ppc/tcg-target.h | 51 ++-
39
Fabiano Rosas (1):
46
tcg/ppc/tcg-target.opc.h | 13 +
40
elfload: Fix alignment when unmapping excess reservation
47
cpus.c | 24 +-
48
tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++----
49
4 files changed, 1119 insertions(+), 87 deletions(-)
50
create mode 100644 tcg/ppc/tcg-target.opc.h
51
41
42
Mikael Szreder (2):
43
target/sparc: Fix register selection for all F*TOx and FxTO* instructions
44
target/sparc: Fix gdbstub incorrectly handling registers f32-f62
45
46
Richard Henderson (23):
47
tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS
48
tcg: Remove TCG_OVERSIZED_GUEST
49
tcg: Drop support for two address registers in gen_ldst
50
tcg: Merge INDEX_op_qemu_*_{a32,a64}_*
51
tcg/arm: Drop addrhi from prepare_host_addr
52
tcg/i386: Drop addrhi from prepare_host_addr
53
tcg/mips: Drop addrhi from prepare_host_addr
54
tcg/ppc: Drop addrhi from prepare_host_addr
55
tcg: Replace addr{lo,hi}_reg with addr_reg in TCGLabelQemuLdst
56
plugins: Fix qemu_plugin_read_memory_vaddr parameters
57
accel/tcg: Fix tlb_set_page_with_attrs, tlb_set_page
58
target/loongarch: Use VADDR_PRIx for logging pc_next
59
target/mips: Use VADDR_PRIx for logging pc_next
60
include/exec: Change vaddr to uintptr_t
61
include/exec: Use uintptr_t in CPUTLBEntry
62
tcg: Introduce the 'z' constraint for a hardware zero register
63
tcg/aarch64: Use 'z' constraint
64
tcg/loongarch64: Use 'z' constraint
65
tcg/mips: Use 'z' constraint
66
tcg/riscv: Use 'z' constraint
67
tcg/sparc64: Use 'z' constraint
68
tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2
69
tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64
70
71
include/exec/tlb-common.h | 10 +-
72
include/exec/vaddr.h | 16 +-
73
include/qemu/atomic.h | 18 +-
74
include/tcg/oversized-guest.h | 23 ---
75
include/tcg/tcg-opc.h | 28 +--
76
include/tcg/tcg.h | 3 +-
77
linux-user/aarch64/target_signal.h | 2 +
78
linux-user/arm/target_signal.h | 2 +
79
linux-user/generic/signal.h | 1 -
80
linux-user/i386/target_signal.h | 2 +
81
linux-user/m68k/target_signal.h | 1 +
82
linux-user/microblaze/target_signal.h | 2 +
83
linux-user/ppc/target_signal.h | 2 +
84
linux-user/s390x/target_signal.h | 2 +
85
linux-user/sh4/target_signal.h | 2 +
86
linux-user/x86_64/target_signal.h | 2 +
87
linux-user/xtensa/target_signal.h | 2 +
88
tcg/aarch64/tcg-target-con-set.h | 12 +-
89
tcg/aarch64/tcg-target.h | 2 +
90
tcg/loongarch64/tcg-target-con-set.h | 15 +-
91
tcg/loongarch64/tcg-target-con-str.h | 1 -
92
tcg/loongarch64/tcg-target-has.h | 2 -
93
tcg/loongarch64/tcg-target.h | 2 +
94
tcg/mips/tcg-target-con-set.h | 26 +--
95
tcg/mips/tcg-target-con-str.h | 1 -
96
tcg/mips/tcg-target.h | 2 +
97
tcg/riscv/tcg-target-con-set.h | 10 +-
98
tcg/riscv/tcg-target-con-str.h | 1 -
99
tcg/riscv/tcg-target-has.h | 2 -
100
tcg/riscv/tcg-target.h | 2 +
101
tcg/sparc64/tcg-target-con-set.h | 12 +-
102
tcg/sparc64/tcg-target-con-str.h | 1 -
103
tcg/sparc64/tcg-target.h | 3 +-
104
tcg/tci/tcg-target.h | 1 -
105
accel/tcg/cputlb.c | 32 +---
106
accel/tcg/tcg-all.c | 9 +-
107
linux-user/elfload.c | 4 +-
108
plugins/api.c | 2 +-
109
target/arm/ptw.c | 34 ----
110
target/loongarch/tcg/translate.c | 2 +-
111
target/mips/tcg/octeon_translate.c | 4 +-
112
target/riscv/cpu_helper.c | 13 +-
113
target/sparc/gdbstub.c | 18 +-
114
target/sparc/translate.c | 19 +++
115
tcg/optimize.c | 21 +--
116
tcg/tcg-op-ldst.c | 103 +++--------
117
tcg/tcg.c | 97 +++++------
118
tcg/tci.c | 119 +++----------
119
docs/devel/multi-thread-tcg.rst | 1 -
120
docs/devel/tcg-ops.rst | 4 +-
121
target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 2 +-
122
target/sparc/insns.decode | 19 ++-
123
tcg/aarch64/tcg-target.c.inc | 86 ++++------
124
tcg/arm/tcg-target.c.inc | 114 ++++---------
125
tcg/i386/tcg-target.c.inc | 190 +++++----------------
126
tcg/loongarch64/tcg-target.c.inc | 72 +++-----
127
tcg/mips/tcg-target.c.inc | 169 ++++++------------
128
tcg/ppc/tcg-target.c.inc | 164 +++++-------------
129
tcg/riscv/tcg-target.c.inc | 56 +++---
130
tcg/s390x/tcg-target.c.inc | 40 ++---
131
tcg/sparc64/tcg-target.c.inc | 45 ++---
132
tcg/tci/tcg-target.c.inc | 60 ++-----
133
62 files changed, 550 insertions(+), 1162 deletions(-)
134
delete mode 100644 include/tcg/oversized-guest.h
diff view generated by jsdifflib
Deleted patch
1
Altivec supports 32 128-bit vector registers, whose names are
2
by convention v0 through v31.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.h | 11 ++++-
8
tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++---------------
9
2 files changed, 65 insertions(+), 34 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@
16
# define TCG_TARGET_REG_BITS 32
17
#endif
18
19
-#define TCG_TARGET_NB_REGS 32
20
+#define TCG_TARGET_NB_REGS 64
21
#define TCG_TARGET_INSN_UNIT_SIZE 4
22
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
23
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
26
TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
27
28
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
29
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
30
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
31
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
32
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
33
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
34
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
35
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
36
+
37
TCG_REG_CALL_STACK = TCG_REG_R1,
38
TCG_AREG0 = TCG_REG_R27
39
} TCGReg;
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
43
+++ b/tcg/ppc/tcg-target.inc.c
44
@@ -XXX,XX +XXX,XX @@
45
# define TCG_REG_TMP1 TCG_REG_R12
46
#endif
47
48
+#define TCG_VEC_TMP1 TCG_REG_V0
49
+#define TCG_VEC_TMP2 TCG_REG_V1
50
+
51
#define TCG_REG_TB TCG_REG_R31
52
#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
53
54
@@ -XXX,XX +XXX,XX @@ bool have_isa_3_00;
55
#endif
56
57
#ifdef CONFIG_DEBUG_TCG
58
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
59
- "r0",
60
- "r1",
61
- "r2",
62
- "r3",
63
- "r4",
64
- "r5",
65
- "r6",
66
- "r7",
67
- "r8",
68
- "r9",
69
- "r10",
70
- "r11",
71
- "r12",
72
- "r13",
73
- "r14",
74
- "r15",
75
- "r16",
76
- "r17",
77
- "r18",
78
- "r19",
79
- "r20",
80
- "r21",
81
- "r22",
82
- "r23",
83
- "r24",
84
- "r25",
85
- "r26",
86
- "r27",
87
- "r28",
88
- "r29",
89
- "r30",
90
- "r31"
91
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
92
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
93
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
94
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
95
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
96
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
97
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
98
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
99
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
100
};
101
#endif
102
103
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
104
TCG_REG_R5,
105
TCG_REG_R4,
106
TCG_REG_R3,
107
+
108
+ /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
109
+ TCG_REG_V2, /* call clobbered, vectors */
110
+ TCG_REG_V3,
111
+ TCG_REG_V4,
112
+ TCG_REG_V5,
113
+ TCG_REG_V6,
114
+ TCG_REG_V7,
115
+ TCG_REG_V8,
116
+ TCG_REG_V9,
117
+ TCG_REG_V10,
118
+ TCG_REG_V11,
119
+ TCG_REG_V12,
120
+ TCG_REG_V13,
121
+ TCG_REG_V14,
122
+ TCG_REG_V15,
123
+ TCG_REG_V16,
124
+ TCG_REG_V17,
125
+ TCG_REG_V18,
126
+ TCG_REG_V19,
127
};
128
129
static const int tcg_target_call_iarg_regs[] = {
130
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
131
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
132
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
133
134
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
135
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
136
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
137
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
138
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
139
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
140
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
141
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
142
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
143
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
144
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
145
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
146
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
147
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
148
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
149
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
150
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
151
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
152
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
153
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
154
+
155
s->reserved_regs = 0;
156
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
157
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
158
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
159
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
160
#endif
161
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
162
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
163
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
164
if (USE_REG_TB) {
165
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
166
}
167
--
168
2.17.1
169
170
diff view generated by jsdifflib
Deleted patch
1
Introduce macro VX4() used for encoding Altivec instructions.
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
6
tcg/ppc/tcg-target.inc.c | 1 +
7
1 file changed, 1 insertion(+)
8
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
12
+++ b/tcg/ppc/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
14
#define XO31(opc) (OPCD(31)|((opc)<<1))
15
#define XO58(opc) (OPCD(58)|(opc))
16
#define XO62(opc) (OPCD(62)|(opc))
17
+#define VX4(opc) (OPCD(4)|(opc))
18
19
#define B OPCD( 18)
20
#define BC OPCD( 16)
21
--
22
2.17.1
23
24
diff view generated by jsdifflib
Deleted patch
1
Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding
2
elements of Altivec instructions.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.inc.c | 5 +++++
8
1 file changed, 5 insertions(+)
9
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
15
#define MB64(b) ((b)<<5)
16
#define FXM(b) (1 << (19 - (b)))
17
18
+#define VRT(r) (((r) & 31) << 21)
19
+#define VRA(r) (((r) & 31) << 16)
20
+#define VRB(r) (((r) & 31) << 11)
21
+#define VRC(r) (((r) & 31) << 6)
22
+
23
#define LK 1
24
25
#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
26
--
27
2.17.1
28
29
diff view generated by jsdifflib
Deleted patch
1
Introduce an enum to hold base < 2.06 < 3.00. Use macros to
2
preserve the existing have_isa_2_06 and have_isa_3_00 predicates.
3
1
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target.h | 12 ++++++++++--
8
tcg/ppc/tcg-target.inc.c | 8 ++++----
9
2 files changed, 14 insertions(+), 6 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ typedef enum {
16
TCG_AREG0 = TCG_REG_R27
17
} TCGReg;
18
19
-extern bool have_isa_2_06;
20
-extern bool have_isa_3_00;
21
+typedef enum {
22
+ tcg_isa_base,
23
+ tcg_isa_2_06,
24
+ tcg_isa_3_00,
25
+} TCGPowerISA;
26
+
27
+extern TCGPowerISA have_isa;
28
+
29
+#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
30
+#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
34
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/ppc/tcg-target.inc.c
37
+++ b/tcg/ppc/tcg-target.inc.c
38
@@ -XXX,XX +XXX,XX @@
39
40
static tcg_insn_unit *tb_ret_addr;
41
42
-bool have_isa_2_06;
43
-bool have_isa_3_00;
44
+TCGPowerISA have_isa;
45
46
#define HAVE_ISA_2_06 have_isa_2_06
47
#define HAVE_ISEL have_isa_2_06
48
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
49
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
50
unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
51
52
+ have_isa = tcg_isa_base;
53
if (hwcap & PPC_FEATURE_ARCH_2_06) {
54
- have_isa_2_06 = true;
55
+ have_isa = tcg_isa_2_06;
56
}
57
#ifdef PPC_FEATURE2_ARCH_3_00
58
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
59
- have_isa_3_00 = true;
60
+ have_isa = tcg_isa_3_00;
61
}
62
#endif
63
64
--
65
2.17.1
66
67
diff view generated by jsdifflib
Deleted patch
1
This is identical to have_isa_2_06, so replace it.
2
1
3
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.inc.c | 5 ++---
7
1 file changed, 2 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
12
+++ b/tcg/ppc/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
14
15
TCGPowerISA have_isa;
16
17
-#define HAVE_ISA_2_06 have_isa_2_06
18
#define HAVE_ISEL have_isa_2_06
19
20
#ifndef CONFIG_SOFTMMU
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
22
}
23
} else {
24
uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
25
- if (!HAVE_ISA_2_06 && insn == LDBRX) {
26
+ if (!have_isa_2_06 && insn == LDBRX) {
27
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
28
tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
29
tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
31
}
32
} else {
33
uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
34
- if (!HAVE_ISA_2_06 && insn == STDBRX) {
35
+ if (!have_isa_2_06 && insn == STDBRX) {
36
tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
37
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
38
tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
39
--
40
2.17.1
41
42
diff view generated by jsdifflib
Deleted patch
1
Previously we've been hard-coding knowledge that Power7 has ISEL, but
2
it was an optional instruction before that. Use the AT_HWCAP2 bit,
3
when present, to properly determine support.
4
1
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.inc.c | 17 ++++++++++++-----
9
1 file changed, 12 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
15
@@ -XXX,XX +XXX,XX @@
16
static tcg_insn_unit *tb_ret_addr;
17
18
TCGPowerISA have_isa;
19
-
20
-#define HAVE_ISEL have_isa_2_06
21
+static bool have_isel;
22
23
#ifndef CONFIG_SOFTMMU
24
#define TCG_GUEST_BASE_REG 30
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
26
/* If we have ISEL, we can implement everything with 3 or 4 insns.
27
All other cases below are also at least 3 insns, so speed up the
28
code generator by not considering them and always using ISEL. */
29
- if (HAVE_ISEL) {
30
+ if (have_isel) {
31
int isel, tab;
32
33
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
35
36
tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
37
38
- if (HAVE_ISEL) {
39
+ if (have_isel) {
40
int isel = tcg_to_isel[cond];
41
42
/* Swap the V operands if the operation indicates inversion. */
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
44
} else {
45
tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
46
/* Note that the only other valid constant for a2 is 0. */
47
- if (HAVE_ISEL) {
48
+ if (have_isel) {
49
tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
50
tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
51
} else if (!const_a2 && a0 == a2) {
52
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
53
}
54
#endif
55
56
+#ifdef PPC_FEATURE2_HAS_ISEL
57
+ /* Prefer explicit instruction from the kernel. */
58
+ have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
59
+#else
60
+ /* Fall back to knowing Power7 (2.06) has ISEL. */
61
+ have_isel = have_isa_2_06;
62
+#endif
63
+
64
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
65
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
66
67
--
68
2.17.1
69
70
diff view generated by jsdifflib
Deleted patch
1
Introduce all of the flags required to enable tcg backend vector support,
2
and a runtime flag to indicate the host supports Altivec instructions.
3
1
4
For now, do not actually set have_isa_altivec to true, because we have not
5
yet added all of the code to actually generate all of the required insns.
6
However, we must define these flags in order to disable ifndefs that create
7
stub versions of the functions added here.
8
9
The change to tcg_out_movi works around a buglet in tcg.c wherein if we
10
do not define tcg_out_dupi_vec we get a declared but not defined Werror,
11
but if we only declare it we get a defined but not used Werror. We need
12
to this change to tcg_out_movi eventually anyway, so it's no biggie.
13
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
16
---
17
tcg/ppc/tcg-target.h | 25 ++++++++++++++++
18
tcg/ppc/tcg-target.opc.h | 5 ++++
19
tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++--
20
3 files changed, 89 insertions(+), 3 deletions(-)
21
create mode 100644 tcg/ppc/tcg-target.opc.h
22
23
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.h
26
+++ b/tcg/ppc/tcg-target.h
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
28
} TCGPowerISA;
29
30
extern TCGPowerISA have_isa;
31
+extern bool have_altivec;
32
33
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
34
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
35
@@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa;
36
#define TCG_TARGET_HAS_mulsh_i64 1
37
#endif
38
39
+/*
40
+ * While technically Altivec could support V64, it has no 64-bit store
41
+ * instruction and substituting two 32-bit stores makes the generated
42
+ * code quite large.
43
+ */
44
+#define TCG_TARGET_HAS_v64 0
45
+#define TCG_TARGET_HAS_v128 have_altivec
46
+#define TCG_TARGET_HAS_v256 0
47
+
48
+#define TCG_TARGET_HAS_andc_vec 0
49
+#define TCG_TARGET_HAS_orc_vec 0
50
+#define TCG_TARGET_HAS_not_vec 0
51
+#define TCG_TARGET_HAS_neg_vec 0
52
+#define TCG_TARGET_HAS_abs_vec 0
53
+#define TCG_TARGET_HAS_shi_vec 0
54
+#define TCG_TARGET_HAS_shs_vec 0
55
+#define TCG_TARGET_HAS_shv_vec 0
56
+#define TCG_TARGET_HAS_cmp_vec 0
57
+#define TCG_TARGET_HAS_mul_vec 0
58
+#define TCG_TARGET_HAS_sat_vec 0
59
+#define TCG_TARGET_HAS_minmax_vec 0
60
+#define TCG_TARGET_HAS_bitsel_vec 0
61
+#define TCG_TARGET_HAS_cmpsel_vec 0
62
+
63
void flush_icache_range(uintptr_t start, uintptr_t stop);
64
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
65
66
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
67
new file mode 100644
68
index XXXXXXX..XXXXXXX
69
--- /dev/null
70
+++ b/tcg/ppc/tcg-target.opc.h
71
@@ -XXX,XX +XXX,XX @@
72
+/*
73
+ * Target-specific opcodes for host vector expansion. These will be
74
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
75
+ * consider these to be UNSPEC with names.
76
+ */
77
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/ppc/tcg-target.inc.c
80
+++ b/tcg/ppc/tcg-target.inc.c
81
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
82
83
TCGPowerISA have_isa;
84
static bool have_isel;
85
+bool have_altivec;
86
87
#ifndef CONFIG_SOFTMMU
88
#define TCG_GUEST_BASE_REG 30
89
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
90
}
91
}
92
93
-static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
94
- tcg_target_long arg)
95
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
96
+ tcg_target_long val)
97
{
98
- tcg_out_movi_int(s, type, ret, arg, false);
99
+ g_assert_not_reached();
100
+}
101
+
102
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
103
+ tcg_target_long arg)
104
+{
105
+ switch (type) {
106
+ case TCG_TYPE_I32:
107
+ case TCG_TYPE_I64:
108
+ tcg_debug_assert(ret < TCG_REG_V0);
109
+ tcg_out_movi_int(s, type, ret, arg, false);
110
+ break;
111
+
112
+ case TCG_TYPE_V64:
113
+ case TCG_TYPE_V128:
114
+ tcg_debug_assert(ret >= TCG_REG_V0);
115
+ tcg_out_dupi_vec(s, type, ret, arg);
116
+ break;
117
+
118
+ default:
119
+ g_assert_not_reached();
120
+ }
121
}
122
123
static bool mask_operand(uint32_t c, int *mb, int *me)
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
125
}
126
}
127
128
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
129
+{
130
+ g_assert_not_reached();
131
+}
132
+
133
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134
+ TCGReg dst, TCGReg src)
135
+{
136
+ g_assert_not_reached();
137
+}
138
+
139
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
140
+ TCGReg out, TCGReg base, intptr_t offset)
141
+{
142
+ g_assert_not_reached();
143
+}
144
+
145
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
146
+ unsigned vecl, unsigned vece,
147
+ const TCGArg *args, const int *const_args)
148
+{
149
+ g_assert_not_reached();
150
+}
151
+
152
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
153
+ TCGArg a0, ...)
154
+{
155
+ g_assert_not_reached();
156
+}
157
+
158
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
159
{
160
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
161
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
162
163
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
164
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
165
+ if (have_altivec) {
166
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
167
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
168
+ }
169
170
tcg_target_call_clobber_regs = 0;
171
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
172
--
173
2.17.1
174
175
diff view generated by jsdifflib
Deleted patch
1
Add various bits and peaces related mostly to load and store
2
operations. In that context, logic, compare, and splat Altivec
3
instructions are used, and, therefore, the support for emitting
4
them is included in this patch too.
5
1
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
9
tcg/ppc/tcg-target.h | 6 +-
10
tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++---
11
2 files changed, 442 insertions(+), 36 deletions(-)
12
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
16
+++ b/tcg/ppc/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
18
#define TCG_TARGET_HAS_v128 have_altivec
19
#define TCG_TARGET_HAS_v256 0
20
21
-#define TCG_TARGET_HAS_andc_vec 0
22
+#define TCG_TARGET_HAS_andc_vec 1
23
#define TCG_TARGET_HAS_orc_vec 0
24
-#define TCG_TARGET_HAS_not_vec 0
25
+#define TCG_TARGET_HAS_not_vec 1
26
#define TCG_TARGET_HAS_neg_vec 0
27
#define TCG_TARGET_HAS_abs_vec 0
28
#define TCG_TARGET_HAS_shi_vec 0
29
#define TCG_TARGET_HAS_shs_vec 0
30
#define TCG_TARGET_HAS_shv_vec 0
31
-#define TCG_TARGET_HAS_cmp_vec 0
32
+#define TCG_TARGET_HAS_cmp_vec 1
33
#define TCG_TARGET_HAS_mul_vec 0
34
#define TCG_TARGET_HAS_sat_vec 0
35
#define TCG_TARGET_HAS_minmax_vec 0
36
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/ppc/tcg-target.inc.c
39
+++ b/tcg/ppc/tcg-target.inc.c
40
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
41
ct->ct |= TCG_CT_REG;
42
ct->u.regs = 0xffffffff;
43
break;
44
+ case 'v':
45
+ ct->ct |= TCG_CT_REG;
46
+ ct->u.regs = 0xffffffff00000000ull;
47
+ break;
48
case 'L': /* qemu_ld constraint */
49
ct->ct |= TCG_CT_REG;
50
ct->u.regs = 0xffffffff;
51
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
52
53
#define NOP ORI /* ori 0,0,0 */
54
55
+#define LVX XO31(103)
56
+#define LVEBX XO31(7)
57
+#define LVEHX XO31(39)
58
+#define LVEWX XO31(71)
59
+
60
+#define STVX XO31(231)
61
+#define STVEWX XO31(199)
62
+
63
+#define VCMPEQUB VX4(6)
64
+#define VCMPEQUH VX4(70)
65
+#define VCMPEQUW VX4(134)
66
+#define VCMPGTSB VX4(774)
67
+#define VCMPGTSH VX4(838)
68
+#define VCMPGTSW VX4(902)
69
+#define VCMPGTUB VX4(518)
70
+#define VCMPGTUH VX4(582)
71
+#define VCMPGTUW VX4(646)
72
+
73
+#define VAND VX4(1028)
74
+#define VANDC VX4(1092)
75
+#define VNOR VX4(1284)
76
+#define VOR VX4(1156)
77
+#define VXOR VX4(1220)
78
+
79
+#define VSPLTB VX4(524)
80
+#define VSPLTH VX4(588)
81
+#define VSPLTW VX4(652)
82
+#define VSPLTISB VX4(780)
83
+#define VSPLTISH VX4(844)
84
+#define VSPLTISW VX4(908)
85
+
86
+#define VSLDOI VX4(44)
87
+
88
#define RT(r) ((r)<<21)
89
#define RS(r) ((r)<<21)
90
#define RA(r) ((r)<<16)
91
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
92
intptr_t value, intptr_t addend)
93
{
94
tcg_insn_unit *target;
95
+ int16_t lo;
96
+ int32_t hi;
97
98
value += addend;
99
target = (tcg_insn_unit *)value;
100
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
101
}
102
*code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
103
break;
104
+ case R_PPC_ADDR32:
105
+ /*
106
+ * We are abusing this relocation type. Again, this points to
107
+ * a pair of insns, lis + load. This is an absolute address
108
+ * relocation for PPC32 so the lis cannot be removed.
109
+ */
110
+ lo = value;
111
+ hi = value - lo;
112
+ if (hi + lo != value) {
113
+ return false;
114
+ }
115
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
116
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
117
+ break;
118
default:
119
g_assert_not_reached();
120
}
121
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
122
123
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
124
{
125
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
126
- if (ret != arg) {
127
- tcg_out32(s, OR | SAB(arg, ret, arg));
128
+ if (ret == arg) {
129
+ return true;
130
+ }
131
+ switch (type) {
132
+ case TCG_TYPE_I64:
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+ /* fallthru */
135
+ case TCG_TYPE_I32:
136
+ if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
137
+ tcg_out32(s, OR | SAB(arg, ret, arg));
138
+ break;
139
+ } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
140
+ /* Altivec does not support vector/integer moves. */
141
+ return false;
142
+ }
143
+ /* fallthru */
144
+ case TCG_TYPE_V64:
145
+ case TCG_TYPE_V128:
146
+ tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
147
+ tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
148
+ break;
149
+ default:
150
+ g_assert_not_reached();
151
}
152
return true;
153
}
154
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
155
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
156
tcg_target_long val)
157
{
158
- g_assert_not_reached();
159
+ uint32_t load_insn;
160
+ int rel, low;
161
+ intptr_t add;
162
+
163
+ low = (int8_t)val;
164
+ if (low >= -16 && low < 16) {
165
+ if (val == (tcg_target_long)dup_const(MO_8, low)) {
166
+ tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
167
+ return;
168
+ }
169
+ if (val == (tcg_target_long)dup_const(MO_16, low)) {
170
+ tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
171
+ return;
172
+ }
173
+ if (val == (tcg_target_long)dup_const(MO_32, low)) {
174
+ tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
175
+ return;
176
+ }
177
+ }
178
+
179
+ /*
180
+ * Otherwise we must load the value from the constant pool.
181
+ */
182
+ if (USE_REG_TB) {
183
+ rel = R_PPC_ADDR16;
184
+ add = -(intptr_t)s->code_gen_ptr;
185
+ } else {
186
+ rel = R_PPC_ADDR32;
187
+ add = 0;
188
+ }
189
+
190
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
191
+ if (TCG_TARGET_REG_BITS == 64) {
192
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
193
+ } else {
194
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
195
+ }
196
+
197
+ if (USE_REG_TB) {
198
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
199
+ load_insn |= RA(TCG_REG_TB);
200
+ } else {
201
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
202
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
203
+ }
204
+ tcg_out32(s, load_insn);
205
}
206
207
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
208
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
209
align = 3;
210
/* FALLTHRU */
211
default:
212
- if (rt != TCG_REG_R0) {
213
+ if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
214
rs = rt;
215
break;
216
}
217
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
218
}
219
220
/* For unaligned, or very large offsets, use the indexed form. */
221
- if (offset & align || offset != (int32_t)offset) {
222
+ if (offset & align || offset != (int32_t)offset || opi == 0) {
223
if (rs == base) {
224
rs = TCG_REG_R0;
225
}
226
tcg_debug_assert(!is_store || rs != rt);
227
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
228
- tcg_out32(s, opx | TAB(rt, base, rs));
229
+ tcg_out32(s, opx | TAB(rt & 31, base, rs));
230
return;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
234
base = rs;
235
}
236
if (opi != ADDI || base != rt || l0 != 0) {
237
- tcg_out32(s, opi | TAI(rt, base, l0));
238
+ tcg_out32(s, opi | TAI(rt & 31, base, l0));
239
}
240
}
241
242
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
243
- TCGReg arg1, intptr_t arg2)
244
+static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
245
+ TCGReg va, TCGReg vb, int shb)
246
{
247
- int opi, opx;
248
-
249
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
250
- if (type == TCG_TYPE_I32) {
251
- opi = LWZ, opx = LWZX;
252
- } else {
253
- opi = LD, opx = LDX;
254
- }
255
- tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
256
+ tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
257
}
258
259
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
260
- TCGReg arg1, intptr_t arg2)
261
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
262
+ TCGReg base, intptr_t offset)
263
{
264
- int opi, opx;
265
+ int shift;
266
267
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
268
- if (type == TCG_TYPE_I32) {
269
- opi = STW, opx = STWX;
270
- } else {
271
- opi = STD, opx = STDX;
272
+ switch (type) {
273
+ case TCG_TYPE_I32:
274
+ if (ret < TCG_REG_V0) {
275
+ tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
276
+ break;
277
+ }
278
+ tcg_debug_assert((offset & 3) == 0);
279
+ tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
280
+ shift = (offset - 4) & 0xc;
281
+ if (shift) {
282
+ tcg_out_vsldoi(s, ret, ret, ret, shift);
283
+ }
284
+ break;
285
+ case TCG_TYPE_I64:
286
+ if (ret < TCG_REG_V0) {
287
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
288
+ tcg_out_mem_long(s, LD, LDX, ret, base, offset);
289
+ break;
290
+ }
291
+ /* fallthru */
292
+ case TCG_TYPE_V64:
293
+ tcg_debug_assert(ret >= TCG_REG_V0);
294
+ tcg_debug_assert((offset & 7) == 0);
295
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
296
+ if (offset & 8) {
297
+ tcg_out_vsldoi(s, ret, ret, ret, 8);
298
+ }
299
+ break;
300
+ case TCG_TYPE_V128:
301
+ tcg_debug_assert(ret >= TCG_REG_V0);
302
+ tcg_debug_assert((offset & 15) == 0);
303
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset);
304
+ break;
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
+}
309
+
310
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
311
+ TCGReg base, intptr_t offset)
312
+{
313
+ int shift;
314
+
315
+ switch (type) {
316
+ case TCG_TYPE_I32:
317
+ if (arg < TCG_REG_V0) {
318
+ tcg_out_mem_long(s, STW, STWX, arg, base, offset);
319
+ break;
320
+ }
321
+ tcg_debug_assert((offset & 3) == 0);
322
+ shift = (offset - 4) & 0xc;
323
+ if (shift) {
324
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
325
+ arg = TCG_VEC_TMP1;
326
+ }
327
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
328
+ break;
329
+ case TCG_TYPE_I64:
330
+ if (arg < TCG_REG_V0) {
331
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
332
+ tcg_out_mem_long(s, STD, STDX, arg, base, offset);
333
+ break;
334
+ }
335
+ /* fallthru */
336
+ case TCG_TYPE_V64:
337
+ tcg_debug_assert(arg >= TCG_REG_V0);
338
+ tcg_debug_assert((offset & 7) == 0);
339
+ if (offset & 8) {
340
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
341
+ arg = TCG_VEC_TMP1;
342
+ }
343
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
344
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
345
+ break;
346
+ case TCG_TYPE_V128:
347
+ tcg_debug_assert(arg >= TCG_REG_V0);
348
+ tcg_out_mem_long(s, 0, STVX, arg, base, offset);
349
+ break;
350
+ default:
351
+ g_assert_not_reached();
352
}
353
- tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
354
}
355
356
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
357
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
358
359
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
360
{
361
- g_assert_not_reached();
362
+ switch (opc) {
363
+ case INDEX_op_and_vec:
364
+ case INDEX_op_or_vec:
365
+ case INDEX_op_xor_vec:
366
+ case INDEX_op_andc_vec:
367
+ case INDEX_op_not_vec:
368
+ return 1;
369
+ case INDEX_op_cmp_vec:
370
+ return vece <= MO_32 ? -1 : 0;
371
+ default:
372
+ return 0;
373
+ }
374
}
375
376
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
377
TCGReg dst, TCGReg src)
378
{
379
- g_assert_not_reached();
380
+ tcg_debug_assert(dst >= TCG_REG_V0);
381
+ tcg_debug_assert(src >= TCG_REG_V0);
382
+
383
+ /*
384
+ * Recall we use (or emulate) VSX integer loads, so the integer is
385
+ * right justified within the left (zero-index) double-word.
386
+ */
387
+ switch (vece) {
388
+ case MO_8:
389
+ tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
390
+ break;
391
+ case MO_16:
392
+ tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
393
+ break;
394
+ case MO_32:
395
+ tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
396
+ break;
397
+ case MO_64:
398
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
399
+ tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
400
+ break;
401
+ default:
402
+ g_assert_not_reached();
403
+ }
404
+ return true;
405
}
406
407
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
408
TCGReg out, TCGReg base, intptr_t offset)
409
{
410
- g_assert_not_reached();
411
+ int elt;
412
+
413
+ tcg_debug_assert(out >= TCG_REG_V0);
414
+ switch (vece) {
415
+ case MO_8:
416
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
417
+ elt = extract32(offset, 0, 4);
418
+#ifndef HOST_WORDS_BIGENDIAN
419
+ elt ^= 15;
420
+#endif
421
+ tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
422
+ break;
423
+ case MO_16:
424
+ tcg_debug_assert((offset & 1) == 0);
425
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
426
+ elt = extract32(offset, 1, 3);
427
+#ifndef HOST_WORDS_BIGENDIAN
428
+ elt ^= 7;
429
+#endif
430
+ tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
431
+ break;
432
+ case MO_32:
433
+ tcg_debug_assert((offset & 3) == 0);
434
+ tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
435
+ elt = extract32(offset, 2, 2);
436
+#ifndef HOST_WORDS_BIGENDIAN
437
+ elt ^= 3;
438
+#endif
439
+ tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
440
+ break;
441
+ case MO_64:
442
+ tcg_debug_assert((offset & 7) == 0);
443
+ tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
444
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
445
+ elt = extract32(offset, 3, 1);
446
+#ifndef HOST_WORDS_BIGENDIAN
447
+ elt = !elt;
448
+#endif
449
+ if (elt) {
450
+ tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
451
+ } else {
452
+ tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
453
+ }
454
+ break;
455
+ default:
456
+ g_assert_not_reached();
457
+ }
458
+ return true;
459
}
460
461
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
462
unsigned vecl, unsigned vece,
463
const TCGArg *args, const int *const_args)
464
{
465
- g_assert_not_reached();
466
+ static const uint32_t
467
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
468
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
469
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
470
+
471
+ TCGType type = vecl + TCG_TYPE_V64;
472
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
473
+ uint32_t insn;
474
+
475
+ switch (opc) {
476
+ case INDEX_op_ld_vec:
477
+ tcg_out_ld(s, type, a0, a1, a2);
478
+ return;
479
+ case INDEX_op_st_vec:
480
+ tcg_out_st(s, type, a0, a1, a2);
481
+ return;
482
+ case INDEX_op_dupm_vec:
483
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
484
+ return;
485
+
486
+ case INDEX_op_and_vec:
487
+ insn = VAND;
488
+ break;
489
+ case INDEX_op_or_vec:
490
+ insn = VOR;
491
+ break;
492
+ case INDEX_op_xor_vec:
493
+ insn = VXOR;
494
+ break;
495
+ case INDEX_op_andc_vec:
496
+ insn = VANDC;
497
+ break;
498
+ case INDEX_op_not_vec:
499
+ insn = VNOR;
500
+ a2 = a1;
501
+ break;
502
+
503
+ case INDEX_op_cmp_vec:
504
+ switch (args[3]) {
505
+ case TCG_COND_EQ:
506
+ insn = eq_op[vece];
507
+ break;
508
+ case TCG_COND_GT:
509
+ insn = gts_op[vece];
510
+ break;
511
+ case TCG_COND_GTU:
512
+ insn = gtu_op[vece];
513
+ break;
514
+ default:
515
+ g_assert_not_reached();
516
+ }
517
+ break;
518
+
519
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
520
+ case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
521
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
522
+ default:
523
+ g_assert_not_reached();
524
+ }
525
+
526
+ tcg_debug_assert(insn != 0);
527
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
528
+}
529
+
530
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
531
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
532
+{
533
+ bool need_swap = false, need_inv = false;
534
+
535
+ tcg_debug_assert(vece <= MO_32);
536
+
537
+ switch (cond) {
538
+ case TCG_COND_EQ:
539
+ case TCG_COND_GT:
540
+ case TCG_COND_GTU:
541
+ break;
542
+ case TCG_COND_NE:
543
+ case TCG_COND_LE:
544
+ case TCG_COND_LEU:
545
+ need_inv = true;
546
+ break;
547
+ case TCG_COND_LT:
548
+ case TCG_COND_LTU:
549
+ need_swap = true;
550
+ break;
551
+ case TCG_COND_GE:
552
+ case TCG_COND_GEU:
553
+ need_swap = need_inv = true;
554
+ break;
555
+ default:
556
+ g_assert_not_reached();
557
+ }
558
+
559
+ if (need_inv) {
560
+ cond = tcg_invert_cond(cond);
561
+ }
562
+ if (need_swap) {
563
+ TCGv_vec t1;
564
+ t1 = v1, v1 = v2, v2 = t1;
565
+ cond = tcg_swap_cond(cond);
566
+ }
567
+
568
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
569
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
570
+
571
+ if (need_inv) {
572
+ tcg_gen_not_vec(vece, v0, v0);
573
+ }
574
}
575
576
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
577
TCGArg a0, ...)
578
{
579
- g_assert_not_reached();
580
+ va_list va;
581
+ TCGv_vec v0, v1, v2;
582
+
583
+ va_start(va, a0);
584
+ v0 = temp_tcgv_vec(arg_temp(a0));
585
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
586
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
587
+
588
+ switch (opc) {
589
+ case INDEX_op_cmp_vec:
590
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
591
+ break;
592
+ default:
593
+ g_assert_not_reached();
594
+ }
595
+ va_end(va);
596
}
597
598
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
599
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
600
= { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
601
static const TCGTargetOpDef sub2
602
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
603
+ static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
604
+ static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
605
+ static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
606
607
switch (op) {
608
case INDEX_op_goto_ptr:
609
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
610
return (TCG_TARGET_REG_BITS == 64 ? &S_S
611
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
612
613
+ case INDEX_op_and_vec:
614
+ case INDEX_op_or_vec:
615
+ case INDEX_op_xor_vec:
616
+ case INDEX_op_andc_vec:
617
+ case INDEX_op_orc_vec:
618
+ case INDEX_op_cmp_vec:
619
+ return &v_v_v;
620
+ case INDEX_op_not_vec:
621
+ case INDEX_op_dup_vec:
622
+ return &v_v;
623
+ case INDEX_op_ld_vec:
624
+ case INDEX_op_st_vec:
625
+ case INDEX_op_dupm_vec:
626
+ return &v_r;
627
+
628
default:
629
return NULL;
630
}
631
--
632
2.17.1
633
634
diff view generated by jsdifflib
Deleted patch
1
Add support for vector maximum/minimum using Altivec instructions
2
VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and
3
VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
---
8
tcg/ppc/tcg-target.h | 2 +-
9
tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++-
10
2 files changed, 40 insertions(+), 2 deletions(-)
11
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
15
+++ b/tcg/ppc/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
17
#define TCG_TARGET_HAS_cmp_vec 1
18
#define TCG_TARGET_HAS_mul_vec 0
19
#define TCG_TARGET_HAS_sat_vec 0
20
-#define TCG_TARGET_HAS_minmax_vec 0
21
+#define TCG_TARGET_HAS_minmax_vec 1
22
#define TCG_TARGET_HAS_bitsel_vec 0
23
#define TCG_TARGET_HAS_cmpsel_vec 0
24
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
28
+++ b/tcg/ppc/tcg-target.inc.c
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
30
#define STVX XO31(231)
31
#define STVEWX XO31(199)
32
33
+#define VMAXSB VX4(258)
34
+#define VMAXSH VX4(322)
35
+#define VMAXSW VX4(386)
36
+#define VMAXUB VX4(2)
37
+#define VMAXUH VX4(66)
38
+#define VMAXUW VX4(130)
39
+#define VMINSB VX4(770)
40
+#define VMINSH VX4(834)
41
+#define VMINSW VX4(898)
42
+#define VMINUB VX4(514)
43
+#define VMINUH VX4(578)
44
+#define VMINUW VX4(642)
45
+
46
#define VCMPEQUB VX4(6)
47
#define VCMPEQUH VX4(70)
48
#define VCMPEQUW VX4(134)
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
50
case INDEX_op_andc_vec:
51
case INDEX_op_not_vec:
52
return 1;
53
+ case INDEX_op_smax_vec:
54
+ case INDEX_op_smin_vec:
55
+ case INDEX_op_umax_vec:
56
+ case INDEX_op_umin_vec:
57
+ return vece <= MO_32;
58
case INDEX_op_cmp_vec:
59
return vece <= MO_32 ? -1 : 0;
60
default:
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
62
static const uint32_t
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
64
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
65
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
66
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
67
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
68
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
69
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
70
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
71
72
TCGType type = vecl + TCG_TYPE_V64;
73
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
76
return;
77
78
+ case INDEX_op_smin_vec:
79
+ insn = smin_op[vece];
80
+ break;
81
+ case INDEX_op_umin_vec:
82
+ insn = umin_op[vece];
83
+ break;
84
+ case INDEX_op_smax_vec:
85
+ insn = smax_op[vece];
86
+ break;
87
+ case INDEX_op_umax_vec:
88
+ insn = umax_op[vece];
89
+ break;
90
case INDEX_op_and_vec:
91
insn = VAND;
92
break;
93
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
case INDEX_op_andc_vec:
95
case INDEX_op_orc_vec:
96
case INDEX_op_cmp_vec:
97
+ case INDEX_op_smax_vec:
98
+ case INDEX_op_smin_vec:
99
+ case INDEX_op_umax_vec:
100
+ case INDEX_op_umin_vec:
101
return &v_v_v;
102
case INDEX_op_not_vec:
103
case INDEX_op_dup_vec:
104
--
105
2.17.1
106
107
diff view generated by jsdifflib
Deleted patch
1
Add support for vector add/subtract using Altivec instructions:
2
VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++
8
1 file changed, 20 insertions(+)
9
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
15
#define STVX XO31(231)
16
#define STVEWX XO31(199)
17
18
+#define VADDUBM VX4(0)
19
+#define VADDUHM VX4(64)
20
+#define VADDUWM VX4(128)
21
+
22
+#define VSUBUBM VX4(1024)
23
+#define VSUBUHM VX4(1088)
24
+#define VSUBUWM VX4(1152)
25
+
26
#define VMAXSB VX4(258)
27
#define VMAXSH VX4(322)
28
#define VMAXSW VX4(386)
29
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
30
case INDEX_op_andc_vec:
31
case INDEX_op_not_vec:
32
return 1;
33
+ case INDEX_op_add_vec:
34
+ case INDEX_op_sub_vec:
35
case INDEX_op_smax_vec:
36
case INDEX_op_smin_vec:
37
case INDEX_op_umax_vec:
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
39
const TCGArg *args, const int *const_args)
40
{
41
static const uint32_t
42
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
43
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
44
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
45
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
46
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
48
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
49
return;
50
51
+ case INDEX_op_add_vec:
52
+ insn = add_op[vece];
53
+ break;
54
+ case INDEX_op_sub_vec:
55
+ insn = sub_op[vece];
56
+ break;
57
case INDEX_op_smin_vec:
58
insn = smin_op[vece];
59
break;
60
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
61
return (TCG_TARGET_REG_BITS == 64 ? &S_S
62
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
63
64
+ case INDEX_op_add_vec:
65
+ case INDEX_op_sub_vec:
66
case INDEX_op_and_vec:
67
case INDEX_op_or_vec:
68
case INDEX_op_xor_vec:
69
--
70
2.17.1
71
72
diff view generated by jsdifflib
Deleted patch
1
Add support for vector saturated add/subtract using Altivec
2
instructions:
3
VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and
4
VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS.
5
1
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
9
tcg/ppc/tcg-target.h | 2 +-
10
tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++
11
2 files changed, 37 insertions(+), 1 deletion(-)
12
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
16
+++ b/tcg/ppc/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
18
#define TCG_TARGET_HAS_shv_vec 0
19
#define TCG_TARGET_HAS_cmp_vec 1
20
#define TCG_TARGET_HAS_mul_vec 0
21
-#define TCG_TARGET_HAS_sat_vec 0
22
+#define TCG_TARGET_HAS_sat_vec 1
23
#define TCG_TARGET_HAS_minmax_vec 1
24
#define TCG_TARGET_HAS_bitsel_vec 0
25
#define TCG_TARGET_HAS_cmpsel_vec 0
26
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/ppc/tcg-target.inc.c
29
+++ b/tcg/ppc/tcg-target.inc.c
30
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
31
#define STVX XO31(231)
32
#define STVEWX XO31(199)
33
34
+#define VADDSBS VX4(768)
35
+#define VADDUBS VX4(512)
36
#define VADDUBM VX4(0)
37
+#define VADDSHS VX4(832)
38
+#define VADDUHS VX4(576)
39
#define VADDUHM VX4(64)
40
+#define VADDSWS VX4(896)
41
+#define VADDUWS VX4(640)
42
#define VADDUWM VX4(128)
43
44
+#define VSUBSBS VX4(1792)
45
+#define VSUBUBS VX4(1536)
46
#define VSUBUBM VX4(1024)
47
+#define VSUBSHS VX4(1856)
48
+#define VSUBUHS VX4(1600)
49
#define VSUBUHM VX4(1088)
50
+#define VSUBSWS VX4(1920)
51
+#define VSUBUWS VX4(1664)
52
#define VSUBUWM VX4(1152)
53
54
#define VMAXSB VX4(258)
55
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
56
case INDEX_op_smin_vec:
57
case INDEX_op_umax_vec:
58
case INDEX_op_umin_vec:
59
+ case INDEX_op_ssadd_vec:
60
+ case INDEX_op_sssub_vec:
61
+ case INDEX_op_usadd_vec:
62
+ case INDEX_op_ussub_vec:
63
return vece <= MO_32;
64
case INDEX_op_cmp_vec:
65
return vece <= MO_32 ? -1 : 0;
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
67
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
68
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
69
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
70
+ ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
71
+ usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
72
+ sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
73
+ ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
74
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
75
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
76
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
78
case INDEX_op_sub_vec:
79
insn = sub_op[vece];
80
break;
81
+ case INDEX_op_ssadd_vec:
82
+ insn = ssadd_op[vece];
83
+ break;
84
+ case INDEX_op_sssub_vec:
85
+ insn = sssub_op[vece];
86
+ break;
87
+ case INDEX_op_usadd_vec:
88
+ insn = usadd_op[vece];
89
+ break;
90
+ case INDEX_op_ussub_vec:
91
+ insn = ussub_op[vece];
92
+ break;
93
case INDEX_op_smin_vec:
94
insn = smin_op[vece];
95
break;
96
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
97
case INDEX_op_andc_vec:
98
case INDEX_op_orc_vec:
99
case INDEX_op_cmp_vec:
100
+ case INDEX_op_ssadd_vec:
101
+ case INDEX_op_sssub_vec:
102
+ case INDEX_op_usadd_vec:
103
+ case INDEX_op_ussub_vec:
104
case INDEX_op_smax_vec:
105
case INDEX_op_smin_vec:
106
case INDEX_op_umax_vec:
107
--
108
2.17.1
109
110
diff view generated by jsdifflib
Deleted patch
1
For Altivec, this is done via vector shift by vector,
2
and loading the immediate into a register.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.h | 2 +-
8
tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++--
9
2 files changed, 57 insertions(+), 3 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
16
#define TCG_TARGET_HAS_abs_vec 0
17
#define TCG_TARGET_HAS_shi_vec 0
18
#define TCG_TARGET_HAS_shs_vec 0
19
-#define TCG_TARGET_HAS_shv_vec 0
20
+#define TCG_TARGET_HAS_shv_vec 1
21
#define TCG_TARGET_HAS_cmp_vec 1
22
#define TCG_TARGET_HAS_mul_vec 0
23
#define TCG_TARGET_HAS_sat_vec 1
24
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.inc.c
27
+++ b/tcg/ppc/tcg-target.inc.c
28
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
29
#define VCMPGTUH VX4(582)
30
#define VCMPGTUW VX4(646)
31
32
+#define VSLB VX4(260)
33
+#define VSLH VX4(324)
34
+#define VSLW VX4(388)
35
+#define VSRB VX4(516)
36
+#define VSRH VX4(580)
37
+#define VSRW VX4(644)
38
+#define VSRAB VX4(772)
39
+#define VSRAH VX4(836)
40
+#define VSRAW VX4(900)
41
+
42
#define VAND VX4(1028)
43
#define VANDC VX4(1092)
44
#define VNOR VX4(1284)
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
46
case INDEX_op_sssub_vec:
47
case INDEX_op_usadd_vec:
48
case INDEX_op_ussub_vec:
49
+ case INDEX_op_shlv_vec:
50
+ case INDEX_op_shrv_vec:
51
+ case INDEX_op_sarv_vec:
52
return vece <= MO_32;
53
case INDEX_op_cmp_vec:
54
+ case INDEX_op_shli_vec:
55
+ case INDEX_op_shri_vec:
56
+ case INDEX_op_sari_vec:
57
return vece <= MO_32 ? -1 : 0;
58
default:
59
return 0;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
61
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
62
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
63
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
64
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
65
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
66
+ shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
67
+ shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
68
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
69
70
TCGType type = vecl + TCG_TYPE_V64;
71
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
73
case INDEX_op_umax_vec:
74
insn = umax_op[vece];
75
break;
76
+ case INDEX_op_shlv_vec:
77
+ insn = shlv_op[vece];
78
+ break;
79
+ case INDEX_op_shrv_vec:
80
+ insn = shrv_op[vece];
81
+ break;
82
+ case INDEX_op_sarv_vec:
83
+ insn = sarv_op[vece];
84
+ break;
85
case INDEX_op_and_vec:
86
insn = VAND;
87
break;
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
89
tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
90
}
91
92
+static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
93
+ TCGv_vec v1, TCGArg imm, TCGOpcode opci)
94
+{
95
+ TCGv_vec t1 = tcg_temp_new_vec(type);
96
+
97
+ /* Splat w/bytes for xxspltib. */
98
+ tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
99
+ vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
100
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
101
+ tcg_temp_free_vec(t1);
102
+}
103
+
104
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
105
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
106
{
107
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
108
{
109
va_list va;
110
TCGv_vec v0, v1, v2;
111
+ TCGArg a2;
112
113
va_start(va, a0);
114
v0 = temp_tcgv_vec(arg_temp(a0));
115
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
116
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
117
+ a2 = va_arg(va, TCGArg);
118
119
switch (opc) {
120
+ case INDEX_op_shli_vec:
121
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
122
+ break;
123
+ case INDEX_op_shri_vec:
124
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
125
+ break;
126
+ case INDEX_op_sari_vec:
127
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
128
+ break;
129
case INDEX_op_cmp_vec:
130
+ v2 = temp_tcgv_vec(arg_temp(a2));
131
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
132
break;
133
default:
134
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
135
case INDEX_op_smin_vec:
136
case INDEX_op_umax_vec:
137
case INDEX_op_umin_vec:
138
+ case INDEX_op_shlv_vec:
139
+ case INDEX_op_shrv_vec:
140
+ case INDEX_op_sarv_vec:
141
return &v_v_v;
142
case INDEX_op_not_vec:
143
case INDEX_op_dup_vec:
144
--
145
2.17.1
146
147
diff view generated by jsdifflib
Deleted patch
1
For Altivec, this is always an expansion.
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
6
tcg/ppc/tcg-target.h | 2 +-
7
tcg/ppc/tcg-target.opc.h | 8 +++
8
tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++-
9
3 files changed, 121 insertions(+), 2 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
16
#define TCG_TARGET_HAS_shs_vec 0
17
#define TCG_TARGET_HAS_shv_vec 1
18
#define TCG_TARGET_HAS_cmp_vec 1
19
-#define TCG_TARGET_HAS_mul_vec 0
20
+#define TCG_TARGET_HAS_mul_vec 1
21
#define TCG_TARGET_HAS_sat_vec 1
22
#define TCG_TARGET_HAS_minmax_vec 1
23
#define TCG_TARGET_HAS_bitsel_vec 0
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.opc.h
27
+++ b/tcg/ppc/tcg-target.opc.h
28
@@ -XXX,XX +XXX,XX @@
29
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
30
* consider these to be UNSPEC with names.
31
*/
32
+
33
+DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
34
+DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
35
+DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
36
+DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
37
+DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
38
+DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
39
+DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
43
+++ b/tcg/ppc/tcg-target.inc.c
44
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
45
#define VSRAB VX4(772)
46
#define VSRAH VX4(836)
47
#define VSRAW VX4(900)
48
+#define VRLB VX4(4)
49
+#define VRLH VX4(68)
50
+#define VRLW VX4(132)
51
+
52
+#define VMULEUB VX4(520)
53
+#define VMULEUH VX4(584)
54
+#define VMULOUB VX4(8)
55
+#define VMULOUH VX4(72)
56
+#define VMSUMUHM VX4(38)
57
+
58
+#define VMRGHB VX4(12)
59
+#define VMRGHH VX4(76)
60
+#define VMRGHW VX4(140)
61
+#define VMRGLB VX4(268)
62
+#define VMRGLH VX4(332)
63
+#define VMRGLW VX4(396)
64
+
65
+#define VPKUHUM VX4(14)
66
+#define VPKUWUM VX4(78)
67
68
#define VAND VX4(1028)
69
#define VANDC VX4(1092)
70
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
71
case INDEX_op_sarv_vec:
72
return vece <= MO_32;
73
case INDEX_op_cmp_vec:
74
+ case INDEX_op_mul_vec:
75
case INDEX_op_shli_vec:
76
case INDEX_op_shri_vec:
77
case INDEX_op_sari_vec:
78
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
79
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
80
shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
81
shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
82
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
83
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
84
+ mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
85
+ mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
86
+ muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
87
+ mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
88
+ pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
89
+ rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
90
91
TCGType type = vecl + TCG_TYPE_V64;
92
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
93
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
94
}
95
break;
96
97
+ case INDEX_op_ppc_mrgh_vec:
98
+ insn = mrgh_op[vece];
99
+ break;
100
+ case INDEX_op_ppc_mrgl_vec:
101
+ insn = mrgl_op[vece];
102
+ break;
103
+ case INDEX_op_ppc_muleu_vec:
104
+ insn = muleu_op[vece];
105
+ break;
106
+ case INDEX_op_ppc_mulou_vec:
107
+ insn = mulou_op[vece];
108
+ break;
109
+ case INDEX_op_ppc_pkum_vec:
110
+ insn = pkum_op[vece];
111
+ break;
112
+ case INDEX_op_ppc_rotl_vec:
113
+ insn = rotl_op[vece];
114
+ break;
115
+ case INDEX_op_ppc_msum_vec:
116
+ tcg_debug_assert(vece == MO_16);
117
+ tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
118
+ return;
119
+
120
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
121
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
122
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
123
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
124
}
125
}
126
127
+static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
128
+ TCGv_vec v1, TCGv_vec v2)
129
+{
130
+ TCGv_vec t1 = tcg_temp_new_vec(type);
131
+ TCGv_vec t2 = tcg_temp_new_vec(type);
132
+ TCGv_vec t3, t4;
133
+
134
+ switch (vece) {
135
+ case MO_8:
136
+ case MO_16:
137
+ vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
138
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
139
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
140
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
141
+ vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
142
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
143
+ vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
144
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
145
+ vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
146
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1));
147
+    break;
148
+
149
+ case MO_32:
150
+ t3 = tcg_temp_new_vec(type);
151
+ t4 = tcg_temp_new_vec(type);
152
+ tcg_gen_dupi_vec(MO_8, t4, -16);
153
+ vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
154
+ tcgv_vec_arg(v2), tcgv_vec_arg(t4));
155
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
156
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
157
+ tcg_gen_dupi_vec(MO_8, t3, 0);
158
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
159
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
160
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
161
+ tcgv_vec_arg(t3), tcgv_vec_arg(t4));
162
+ tcg_gen_add_vec(MO_32, v0, t2, t3);
163
+ tcg_temp_free_vec(t3);
164
+ tcg_temp_free_vec(t4);
165
+ break;
166
+
167
+ default:
168
+ g_assert_not_reached();
169
+ }
170
+ tcg_temp_free_vec(t1);
171
+ tcg_temp_free_vec(t2);
172
+}
173
+
174
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
175
TCGArg a0, ...)
176
{
177
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
178
v2 = temp_tcgv_vec(arg_temp(a2));
179
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
180
break;
181
+ case INDEX_op_mul_vec:
182
+ v2 = temp_tcgv_vec(arg_temp(a2));
183
+ expand_vec_mul(type, vece, v0, v1, v2);
184
+ break;
185
default:
186
g_assert_not_reached();
187
}
188
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
189
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
190
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
191
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
192
+ static const TCGTargetOpDef v_v_v_v
193
+ = { .args_ct_str = { "v", "v", "v", "v" } };
194
195
switch (op) {
196
case INDEX_op_goto_ptr:
197
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
198
199
case INDEX_op_add_vec:
200
case INDEX_op_sub_vec:
201
+ case INDEX_op_mul_vec:
202
case INDEX_op_and_vec:
203
case INDEX_op_or_vec:
204
case INDEX_op_xor_vec:
205
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
206
case INDEX_op_shlv_vec:
207
case INDEX_op_shrv_vec:
208
case INDEX_op_sarv_vec:
209
+ case INDEX_op_ppc_mrgh_vec:
210
+ case INDEX_op_ppc_mrgl_vec:
211
+ case INDEX_op_ppc_muleu_vec:
212
+ case INDEX_op_ppc_mulou_vec:
213
+ case INDEX_op_ppc_pkum_vec:
214
+ case INDEX_op_ppc_rotl_vec:
215
return &v_v_v;
216
case INDEX_op_not_vec:
217
case INDEX_op_dup_vec:
218
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
219
case INDEX_op_st_vec:
220
case INDEX_op_dupm_vec:
221
return &v_r;
222
+ case INDEX_op_ppc_msum_vec:
223
+ return &v_v_v_v;
224
225
default:
226
return NULL;
227
--
228
2.17.1
229
230
diff view generated by jsdifflib
Deleted patch
1
This is only used for 32-bit hosts.
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
6
tcg/ppc/tcg-target.inc.c | 9 +++++++++
7
1 file changed, 9 insertions(+)
8
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
12
+++ b/tcg/ppc/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
14
}
15
break;
16
17
+ case INDEX_op_dup2_vec:
18
+ assert(TCG_TARGET_REG_BITS == 32);
19
+ /* With inputs a1 = xLxx, a2 = xHxx */
20
+ tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
21
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
22
+ tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
23
+ return;
24
+
25
case INDEX_op_ppc_mrgh_vec:
26
insn = mrgh_op[vece];
27
break;
28
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
29
case INDEX_op_ppc_mulou_vec:
30
case INDEX_op_ppc_pkum_vec:
31
case INDEX_op_ppc_rotl_vec:
32
+ case INDEX_op_dup2_vec:
33
return &v_v_v;
34
case INDEX_op_not_vec:
35
case INDEX_op_dup_vec:
36
--
37
2.17.1
38
39
diff view generated by jsdifflib
Deleted patch
1
Now that we have implemented the required tcg operations,
2
we can enable detection of host vector support.
3
1
4
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32)
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.inc.c | 4 ++++
9
1 file changed, 4 insertions(+)
10
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
15
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
16
have_isel = have_isa_2_06;
17
#endif
18
19
+ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
20
+ have_altivec = true;
21
+ }
22
+
23
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
24
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
25
if (have_altivec) {
26
--
27
2.17.1
28
29
diff view generated by jsdifflib
Deleted patch
1
The VSX instruction set instructions include double-word loads and
2
stores, double-word load and splat, double-word permute, and bit
3
select. All of which require multiple operations in the Altivec
4
instruction set.
5
1
6
Because the VSX registers map %vsr32 to %vr0, and we have no current
7
intention or need to use vector registers outside %vr0-%vr19, force
8
on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't
9
have to otherwise modify the VR[TABC] macros.
10
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
---
14
tcg/ppc/tcg-target.h | 5 ++--
15
tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++----
16
2 files changed, 51 insertions(+), 6 deletions(-)
17
18
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/ppc/tcg-target.h
21
+++ b/tcg/ppc/tcg-target.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
23
24
extern TCGPowerISA have_isa;
25
extern bool have_altivec;
26
+extern bool have_vsx;
27
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
29
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
30
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
31
* instruction and substituting two 32-bit stores makes the generated
32
* code quite large.
33
*/
34
-#define TCG_TARGET_HAS_v64 0
35
+#define TCG_TARGET_HAS_v64 have_vsx
36
#define TCG_TARGET_HAS_v128 have_altivec
37
#define TCG_TARGET_HAS_v256 0
38
39
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
40
#define TCG_TARGET_HAS_mul_vec 1
41
#define TCG_TARGET_HAS_sat_vec 1
42
#define TCG_TARGET_HAS_minmax_vec 1
43
-#define TCG_TARGET_HAS_bitsel_vec 0
44
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
45
#define TCG_TARGET_HAS_cmpsel_vec 0
46
47
void flush_icache_range(uintptr_t start, uintptr_t stop);
48
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/ppc/tcg-target.inc.c
51
+++ b/tcg/ppc/tcg-target.inc.c
52
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
53
TCGPowerISA have_isa;
54
static bool have_isel;
55
bool have_altivec;
56
+bool have_vsx;
57
58
#ifndef CONFIG_SOFTMMU
59
#define TCG_GUEST_BASE_REG 30
60
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
61
#define LVEBX XO31(7)
62
#define LVEHX XO31(39)
63
#define LVEWX XO31(71)
64
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
65
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
66
67
#define STVX XO31(231)
68
#define STVEWX XO31(199)
69
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
70
71
#define VADDSBS VX4(768)
72
#define VADDUBS VX4(512)
73
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
74
75
#define VSLDOI VX4(44)
76
77
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
78
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
79
+
80
#define RT(r) ((r)<<21)
81
#define RS(r) ((r)<<21)
82
#define RA(r) ((r)<<16)
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
84
add = 0;
85
}
86
87
- load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
88
- if (TCG_TARGET_REG_BITS == 64) {
89
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
90
+ if (have_vsx) {
91
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
92
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
93
+ if (TCG_TARGET_REG_BITS == 64) {
94
+ new_pool_label(s, val, rel, s->code_ptr, add);
95
+ } else {
96
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
97
+ }
98
} else {
99
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
100
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
101
+ if (TCG_TARGET_REG_BITS == 64) {
102
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
103
+ } else {
104
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
105
+ }
106
}
107
108
if (USE_REG_TB) {
109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
110
/* fallthru */
111
case TCG_TYPE_V64:
112
tcg_debug_assert(ret >= TCG_REG_V0);
113
+ if (have_vsx) {
114
+ tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
115
+ break;
116
+ }
117
tcg_debug_assert((offset & 7) == 0);
118
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
119
if (offset & 8) {
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
121
/* fallthru */
122
case TCG_TYPE_V64:
123
tcg_debug_assert(arg >= TCG_REG_V0);
124
+ if (have_vsx) {
125
+ tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
126
+ break;
127
+ }
128
tcg_debug_assert((offset & 7) == 0);
129
if (offset & 8) {
130
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
131
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
132
case INDEX_op_shri_vec:
133
case INDEX_op_sari_vec:
134
return vece <= MO_32 ? -1 : 0;
135
+ case INDEX_op_bitsel_vec:
136
+ return have_vsx;
137
default:
138
return 0;
139
}
140
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
142
break;
143
case MO_64:
144
+ if (have_vsx) {
145
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
146
+ break;
147
+ }
148
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
149
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
150
break;
151
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
153
break;
154
case MO_64:
155
+ if (have_vsx) {
156
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
157
+ break;
158
+ }
159
tcg_debug_assert((offset & 7) == 0);
160
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
161
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
162
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
163
}
164
break;
165
166
+ case INDEX_op_bitsel_vec:
167
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
168
+ return;
169
+
170
case INDEX_op_dup2_vec:
171
assert(TCG_TARGET_REG_BITS == 32);
172
/* With inputs a1 = xLxx, a2 = xHxx */
173
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
174
case INDEX_op_st_vec:
175
case INDEX_op_dupm_vec:
176
return &v_r;
177
+ case INDEX_op_bitsel_vec:
178
case INDEX_op_ppc_msum_vec:
179
return &v_v_v_v;
180
181
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
182
183
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
184
have_altivec = true;
185
+ /* We only care about the portion of VSX that overlaps Altivec. */
186
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
187
+ have_vsx = true;
188
+ }
189
}
190
191
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
192
--
193
2.17.1
194
195
diff view generated by jsdifflib
Deleted patch
1
These new instructions are conditional only on MSR.VEC and
2
are thus part of the Altivec instruction set, and not VSX.
3
This includes lots of double-word arithmetic and a few extra
4
logical operations.
5
1
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/ppc/tcg-target.h | 4 +-
10
tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++----------
11
2 files changed, 67 insertions(+), 22 deletions(-)
12
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
16
+++ b/tcg/ppc/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
18
typedef enum {
19
tcg_isa_base,
20
tcg_isa_2_06,
21
+ tcg_isa_2_07,
22
tcg_isa_3_00,
23
} TCGPowerISA;
24
25
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
26
extern bool have_vsx;
27
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
29
+#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
30
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
34
#define TCG_TARGET_HAS_v256 0
35
36
#define TCG_TARGET_HAS_andc_vec 1
37
-#define TCG_TARGET_HAS_orc_vec 0
38
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
39
#define TCG_TARGET_HAS_not_vec 1
40
#define TCG_TARGET_HAS_neg_vec 0
41
#define TCG_TARGET_HAS_abs_vec 0
42
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/ppc/tcg-target.inc.c
45
+++ b/tcg/ppc/tcg-target.inc.c
46
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
47
#define VADDSWS VX4(896)
48
#define VADDUWS VX4(640)
49
#define VADDUWM VX4(128)
50
+#define VADDUDM VX4(192) /* v2.07 */
51
52
#define VSUBSBS VX4(1792)
53
#define VSUBUBS VX4(1536)
54
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
55
#define VSUBSWS VX4(1920)
56
#define VSUBUWS VX4(1664)
57
#define VSUBUWM VX4(1152)
58
+#define VSUBUDM VX4(1216) /* v2.07 */
59
60
#define VMAXSB VX4(258)
61
#define VMAXSH VX4(322)
62
#define VMAXSW VX4(386)
63
+#define VMAXSD VX4(450) /* v2.07 */
64
#define VMAXUB VX4(2)
65
#define VMAXUH VX4(66)
66
#define VMAXUW VX4(130)
67
+#define VMAXUD VX4(194) /* v2.07 */
68
#define VMINSB VX4(770)
69
#define VMINSH VX4(834)
70
#define VMINSW VX4(898)
71
+#define VMINSD VX4(962) /* v2.07 */
72
#define VMINUB VX4(514)
73
#define VMINUH VX4(578)
74
#define VMINUW VX4(642)
75
+#define VMINUD VX4(706) /* v2.07 */
76
77
#define VCMPEQUB VX4(6)
78
#define VCMPEQUH VX4(70)
79
#define VCMPEQUW VX4(134)
80
+#define VCMPEQUD VX4(199) /* v2.07 */
81
#define VCMPGTSB VX4(774)
82
#define VCMPGTSH VX4(838)
83
#define VCMPGTSW VX4(902)
84
+#define VCMPGTSD VX4(967) /* v2.07 */
85
#define VCMPGTUB VX4(518)
86
#define VCMPGTUH VX4(582)
87
#define VCMPGTUW VX4(646)
88
+#define VCMPGTUD VX4(711) /* v2.07 */
89
90
#define VSLB VX4(260)
91
#define VSLH VX4(324)
92
#define VSLW VX4(388)
93
+#define VSLD VX4(1476) /* v2.07 */
94
#define VSRB VX4(516)
95
#define VSRH VX4(580)
96
#define VSRW VX4(644)
97
+#define VSRD VX4(1732) /* v2.07 */
98
#define VSRAB VX4(772)
99
#define VSRAH VX4(836)
100
#define VSRAW VX4(900)
101
+#define VSRAD VX4(964) /* v2.07 */
102
#define VRLB VX4(4)
103
#define VRLH VX4(68)
104
#define VRLW VX4(132)
105
+#define VRLD VX4(196) /* v2.07 */
106
107
#define VMULEUB VX4(520)
108
#define VMULEUH VX4(584)
109
+#define VMULEUW VX4(648) /* v2.07 */
110
#define VMULOUB VX4(8)
111
#define VMULOUH VX4(72)
112
+#define VMULOUW VX4(136) /* v2.07 */
113
+#define VMULUWM VX4(137) /* v2.07 */
114
#define VMSUMUHM VX4(38)
115
116
#define VMRGHB VX4(12)
117
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
118
#define VNOR VX4(1284)
119
#define VOR VX4(1156)
120
#define VXOR VX4(1220)
121
+#define VEQV VX4(1668) /* v2.07 */
122
+#define VNAND VX4(1412) /* v2.07 */
123
+#define VORC VX4(1348) /* v2.07 */
124
125
#define VSPLTB VX4(524)
126
#define VSPLTH VX4(588)
127
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
128
case INDEX_op_andc_vec:
129
case INDEX_op_not_vec:
130
return 1;
131
+ case INDEX_op_orc_vec:
132
+ return have_isa_2_07;
133
case INDEX_op_add_vec:
134
case INDEX_op_sub_vec:
135
case INDEX_op_smax_vec:
136
case INDEX_op_smin_vec:
137
case INDEX_op_umax_vec:
138
case INDEX_op_umin_vec:
139
+ case INDEX_op_shlv_vec:
140
+ case INDEX_op_shrv_vec:
141
+ case INDEX_op_sarv_vec:
142
+ return vece <= MO_32 || have_isa_2_07;
143
case INDEX_op_ssadd_vec:
144
case INDEX_op_sssub_vec:
145
case INDEX_op_usadd_vec:
146
case INDEX_op_ussub_vec:
147
- case INDEX_op_shlv_vec:
148
- case INDEX_op_shrv_vec:
149
- case INDEX_op_sarv_vec:
150
return vece <= MO_32;
151
case INDEX_op_cmp_vec:
152
- case INDEX_op_mul_vec:
153
case INDEX_op_shli_vec:
154
case INDEX_op_shri_vec:
155
case INDEX_op_sari_vec:
156
- return vece <= MO_32 ? -1 : 0;
157
+ return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
158
+ case INDEX_op_mul_vec:
159
+ switch (vece) {
160
+ case MO_8:
161
+ case MO_16:
162
+ return -1;
163
+ case MO_32:
164
+ return have_isa_2_07 ? 1 : -1;
165
+ }
166
+ return 0;
167
case INDEX_op_bitsel_vec:
168
return have_vsx;
169
default:
170
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
171
const TCGArg *args, const int *const_args)
172
{
173
static const uint32_t
174
- add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
175
- sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
176
- eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
177
- gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
178
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
179
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
180
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
181
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
182
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
183
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
184
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
185
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
186
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
187
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
188
- umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
189
- smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
190
- umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
191
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
192
- shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
193
- shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
194
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
195
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
196
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
197
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
198
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
199
+ shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
200
+ shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
201
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
202
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
203
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
204
- muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
205
- mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
206
+ muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
207
+ mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
208
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
209
- rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
210
+ rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
211
212
TCGType type = vecl + TCG_TYPE_V64;
213
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
215
case INDEX_op_sub_vec:
216
insn = sub_op[vece];
217
break;
218
+ case INDEX_op_mul_vec:
219
+ tcg_debug_assert(vece == MO_32 && have_isa_2_07);
220
+ insn = VMULUWM;
221
+ break;
222
case INDEX_op_ssadd_vec:
223
insn = ssadd_op[vece];
224
break;
225
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
226
insn = VNOR;
227
a2 = a1;
228
break;
229
+ case INDEX_op_orc_vec:
230
+ insn = VORC;
231
+ break;
232
233
case INDEX_op_cmp_vec:
234
switch (args[3]) {
235
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
236
{
237
bool need_swap = false, need_inv = false;
238
239
- tcg_debug_assert(vece <= MO_32);
240
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
241
242
switch (cond) {
243
case TCG_COND_EQ:
244
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
245
    break;
246
247
case MO_32:
248
+ tcg_debug_assert(!have_isa_2_07);
249
t3 = tcg_temp_new_vec(type);
250
t4 = tcg_temp_new_vec(type);
251
tcg_gen_dupi_vec(MO_8, t4, -16);
252
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
253
if (hwcap & PPC_FEATURE_ARCH_2_06) {
254
have_isa = tcg_isa_2_06;
255
}
256
+#ifdef PPC_FEATURE2_ARCH_2_07
257
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
258
+ have_isa = tcg_isa_2_07;
259
+ }
260
+#endif
261
#ifdef PPC_FEATURE2_ARCH_3_00
262
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
263
have_isa = tcg_isa_3_00;
264
--
265
2.17.1
266
267
diff view generated by jsdifflib
Deleted patch
1
These new instructions are conditional only on MSR.VSX and
2
are thus part of the VSX instruction set, and not Altivec.
3
This includes double-word loads and stores.
4
1
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.inc.c | 11 +++++++++++
9
1 file changed, 11 insertions(+)
10
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
15
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
16
#define LVEWX XO31(71)
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
19
+#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
20
21
#define STVX XO31(231)
22
#define STVEWX XO31(199)
23
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
24
+#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
25
26
#define VADDSBS VX4(768)
27
#define VADDUBS VX4(512)
28
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
29
tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
30
break;
31
}
32
+ if (have_isa_2_07 && have_vsx) {
33
+ tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
34
+ break;
35
+ }
36
tcg_debug_assert((offset & 3) == 0);
37
tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
38
shift = (offset - 4) & 0xc;
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
40
tcg_out_mem_long(s, STW, STWX, arg, base, offset);
41
break;
42
}
43
+ if (have_isa_2_07 && have_vsx) {
44
+ tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
45
+ break;
46
+ }
47
+ assert((offset & 3) == 0);
48
tcg_debug_assert((offset & 3) == 0);
49
shift = (offset - 4) & 0xc;
50
if (shift) {
51
--
52
2.17.1
53
54
diff view generated by jsdifflib
Deleted patch
1
These new instructions are conditional on MSR.FP when TX=0 and
2
MSR.VEC when TX=1. Since we only care about the Altivec registers,
3
and force TX=1, we can consider these to be Altivec instructions.
4
Since Altivec is true for any use of vector types, we only need
5
test have_isa_2_07.
6
1
7
This includes moves to and from the integer registers.
8
9
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------
13
1 file changed, 26 insertions(+), 6 deletions(-)
14
15
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/ppc/tcg-target.inc.c
18
+++ b/tcg/ppc/tcg-target.inc.c
19
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
20
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
21
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
22
23
+#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
24
+#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
25
+#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
26
+#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
27
+
28
#define RT(r) ((r)<<21)
29
#define RS(r) ((r)<<21)
30
#define RA(r) ((r)<<16)
31
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
32
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
33
/* fallthru */
34
case TCG_TYPE_I32:
35
- if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
36
- tcg_out32(s, OR | SAB(arg, ret, arg));
37
- break;
38
- } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
39
- /* Altivec does not support vector/integer moves. */
40
- return false;
41
+ if (ret < TCG_REG_V0) {
42
+ if (arg < TCG_REG_V0) {
43
+ tcg_out32(s, OR | SAB(arg, ret, arg));
44
+ break;
45
+ } else if (have_isa_2_07) {
46
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
47
+ | VRT(arg) | RA(ret));
48
+ break;
49
+ } else {
50
+ /* Altivec does not support vector->integer moves. */
51
+ return false;
52
+ }
53
+ } else if (arg < TCG_REG_V0) {
54
+ if (have_isa_2_07) {
55
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
56
+ | VRT(ret) | RA(arg));
57
+ break;
58
+ } else {
59
+ /* Altivec does not support integer->vector moves. */
60
+ return false;
61
+ }
62
}
63
/* fallthru */
64
case TCG_TYPE_V64:
65
--
66
2.17.1
67
68
diff view generated by jsdifflib
Deleted patch
1
These new instructions are conditional only on MSR.VEC and
2
are thus part of the Altivec instruction set, and not VSX.
3
This includes negation and compare not equal.
4
1
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.h | 2 +-
9
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++
10
2 files changed, 24 insertions(+), 1 deletion(-)
11
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
15
+++ b/tcg/ppc/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
17
#define TCG_TARGET_HAS_andc_vec 1
18
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
19
#define TCG_TARGET_HAS_not_vec 1
20
-#define TCG_TARGET_HAS_neg_vec 0
21
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
22
#define TCG_TARGET_HAS_abs_vec 0
23
#define TCG_TARGET_HAS_shi_vec 0
24
#define TCG_TARGET_HAS_shs_vec 0
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
28
+++ b/tcg/ppc/tcg-target.inc.c
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
30
#define VSUBUWM VX4(1152)
31
#define VSUBUDM VX4(1216) /* v2.07 */
32
33
+#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
34
+#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
35
+
36
#define VMAXSB VX4(258)
37
#define VMAXSH VX4(322)
38
#define VMAXSW VX4(386)
39
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
40
#define VCMPGTUH VX4(582)
41
#define VCMPGTUW VX4(646)
42
#define VCMPGTUD VX4(711) /* v2.07 */
43
+#define VCMPNEB VX4(7) /* v3.00 */
44
+#define VCMPNEH VX4(71) /* v3.00 */
45
+#define VCMPNEW VX4(135) /* v3.00 */
46
47
#define VSLB VX4(260)
48
#define VSLH VX4(324)
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
50
case INDEX_op_shri_vec:
51
case INDEX_op_sari_vec:
52
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
53
+ case INDEX_op_neg_vec:
54
+ return vece >= MO_32 && have_isa_3_00;
55
case INDEX_op_mul_vec:
56
switch (vece) {
57
case MO_8:
58
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
59
static const uint32_t
60
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
61
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
62
+ neg_op[4] = { 0, 0, VNEGW, VNEGD },
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
64
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
65
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
66
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
67
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
69
case INDEX_op_sub_vec:
70
insn = sub_op[vece];
71
break;
72
+ case INDEX_op_neg_vec:
73
+ insn = neg_op[vece];
74
+ a2 = a1;
75
+ a1 = 0;
76
+ break;
77
case INDEX_op_mul_vec:
78
tcg_debug_assert(vece == MO_32 && have_isa_2_07);
79
insn = VMULUWM;
80
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
81
case TCG_COND_EQ:
82
insn = eq_op[vece];
83
break;
84
+ case TCG_COND_NE:
85
+ insn = ne_op[vece];
86
+ break;
87
case TCG_COND_GT:
88
insn = gts_op[vece];
89
break;
90
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
91
case TCG_COND_GTU:
92
break;
93
case TCG_COND_NE:
94
+ if (have_isa_3_00 && vece <= MO_32) {
95
+ break;
96
+ }
97
+ /* fall through */
98
case TCG_COND_LE:
99
case TCG_COND_LEU:
100
need_inv = true;
101
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
102
case INDEX_op_dup2_vec:
103
return &v_v_v;
104
case INDEX_op_not_vec:
105
+ case INDEX_op_neg_vec:
106
case INDEX_op_dup_vec:
107
return &v_v;
108
case INDEX_op_ld_vec:
109
--
110
2.17.1
111
112
diff view generated by jsdifflib
Deleted patch
1
These new instructions are a mix of those like LXSD that are
2
only conditional only on MSR.VEC and those like LXV that are
3
conditional on MSR.VEC for TX=1. Thus, in the end, we can
4
consider all of these as Altivec instructions.
5
1
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++--------
10
1 file changed, 38 insertions(+), 9 deletions(-)
11
12
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.inc.c
15
+++ b/tcg/ppc/tcg-target.inc.c
16
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
19
#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
20
+#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
21
+#define LXSD (OPCD(57) | 2) /* v3.00 */
22
+#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
23
24
#define STVX XO31(231)
25
#define STVEWX XO31(199)
26
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
27
#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
28
+#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
29
+#define STXSD (OPCD(61) | 2) /* v3.00 */
30
31
#define VADDSBS VX4(768)
32
#define VADDUBS VX4(512)
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
34
TCGReg base, tcg_target_long offset)
35
{
36
tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
37
- bool is_store = false;
38
+ bool is_int_store = false;
39
TCGReg rs = TCG_REG_TMP1;
40
41
switch (opi) {
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
43
break;
44
}
45
break;
46
+ case LXSD:
47
+ case STXSD:
48
+ align = 3;
49
+ break;
50
+ case LXV:
51
+ case STXV:
52
+ align = 15;
53
+ break;
54
case STD:
55
align = 3;
56
/* FALLTHRU */
57
case STB: case STH: case STW:
58
- is_store = true;
59
+ is_int_store = true;
60
break;
61
}
62
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
64
if (rs == base) {
65
rs = TCG_REG_R0;
66
}
67
- tcg_debug_assert(!is_store || rs != rt);
68
+ tcg_debug_assert(!is_int_store || rs != rt);
69
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
70
tcg_out32(s, opx | TAB(rt & 31, base, rs));
71
return;
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
73
case TCG_TYPE_V64:
74
tcg_debug_assert(ret >= TCG_REG_V0);
75
if (have_vsx) {
76
- tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
77
+ tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
78
+ ret, base, offset);
79
break;
80
}
81
tcg_debug_assert((offset & 7) == 0);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
83
case TCG_TYPE_V128:
84
tcg_debug_assert(ret >= TCG_REG_V0);
85
tcg_debug_assert((offset & 15) == 0);
86
- tcg_out_mem_long(s, 0, LVX, ret, base, offset);
87
+ tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
88
+ LVX, ret, base, offset);
89
break;
90
default:
91
g_assert_not_reached();
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
93
case TCG_TYPE_V64:
94
tcg_debug_assert(arg >= TCG_REG_V0);
95
if (have_vsx) {
96
- tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
97
+ tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
98
+ STXSDX, arg, base, offset);
99
break;
100
}
101
tcg_debug_assert((offset & 7) == 0);
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
103
break;
104
case TCG_TYPE_V128:
105
tcg_debug_assert(arg >= TCG_REG_V0);
106
- tcg_out_mem_long(s, 0, STVX, arg, base, offset);
107
+ tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
108
+ STVX, arg, base, offset);
109
break;
110
default:
111
g_assert_not_reached();
112
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
113
tcg_debug_assert(out >= TCG_REG_V0);
114
switch (vece) {
115
case MO_8:
116
- tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
117
+ if (have_isa_3_00) {
118
+ tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
119
+ } else {
120
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
121
+ }
122
elt = extract32(offset, 0, 4);
123
#ifndef HOST_WORDS_BIGENDIAN
124
elt ^= 15;
125
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
126
break;
127
case MO_16:
128
tcg_debug_assert((offset & 1) == 0);
129
- tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
130
+ if (have_isa_3_00) {
131
+ tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
132
+ } else {
133
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
134
+ }
135
elt = extract32(offset, 1, 3);
136
#ifndef HOST_WORDS_BIGENDIAN
137
elt ^= 7;
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
139
tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
140
break;
141
case MO_32:
142
+ if (have_isa_3_00) {
143
+ tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
144
+ break;
145
+ }
146
tcg_debug_assert((offset & 3) == 0);
147
tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
148
elt = extract32(offset, 2, 2);
149
--
150
2.17.1
151
152
diff view generated by jsdifflib
Deleted patch
1
These new instructions are conditional on MSR.VEC for TX=1,
2
so we can consider these Altivec instructions.
3
1
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++--
8
1 file changed, 26 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
15
16
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
17
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
18
+#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
19
20
#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
21
#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
22
#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
23
#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
24
+#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
25
+#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
26
27
#define RT(r) ((r)<<21)
28
#define RS(r) ((r)<<21)
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
30
return;
31
}
32
}
33
+ if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
34
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
35
+ return;
36
+ }
37
38
/*
39
* Otherwise we must load the value from the constant pool.
40
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
41
TCGReg dst, TCGReg src)
42
{
43
tcg_debug_assert(dst >= TCG_REG_V0);
44
- tcg_debug_assert(src >= TCG_REG_V0);
45
+
46
+ /* Splat from integer reg allowed via constraints for v3.00. */
47
+ if (src < TCG_REG_V0) {
48
+ tcg_debug_assert(have_isa_3_00);
49
+ switch (vece) {
50
+ case MO_64:
51
+ tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
52
+ return true;
53
+ case MO_32:
54
+ tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
55
+ return true;
56
+ default:
57
+ /* Fail, so that we fall back on either dupm or mov+dup. */
58
+ return false;
59
+ }
60
+ }
61
62
/*
63
* Recall we use (or emulate) VSX integer loads, so the integer is
64
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
65
static const TCGTargetOpDef sub2
66
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
67
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
68
+ static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
69
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
70
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
71
static const TCGTargetOpDef v_v_v_v
72
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
73
return &v_v_v;
74
case INDEX_op_not_vec:
75
case INDEX_op_neg_vec:
76
- case INDEX_op_dup_vec:
77
return &v_v;
78
+ case INDEX_op_dup_vec:
79
+ return have_isa_3_00 ? &v_vr : &v_v;
80
case INDEX_op_ld_vec:
81
case INDEX_op_st_vec:
82
case INDEX_op_dupm_vec:
83
--
84
2.17.1
85
86
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
DisasContextBase.pc_next has type vaddr; use the correct log format.
2
2
3
qemu_cpu_kick is used for a number of reasons including to indicate
3
Fixes: 85c19af63e7 ("include/exec: Use vaddr in DisasContextBase for virtual addresses")
4
there is work to be done. However when thread=single the old
5
qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one
6
which can lead to a hang in the case that:
7
8
a) the kick is from outside the vCPUs (e.g. iothread)
9
b) the timers are paused (i.e. iothread calling run_on_cpu)
10
11
To avoid this lets split qemu_cpu_kick_rr into two functions. One for
12
the timer which continues to advance to the next timeslice and another
13
for all other kicks.
14
15
Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
---
5
---
21
cpus.c | 24 ++++++++++++++++++------
6
target/mips/tcg/octeon_translate.c | 4 ++--
22
1 file changed, 18 insertions(+), 6 deletions(-)
7
1 file changed, 2 insertions(+), 2 deletions(-)
23
8
24
diff --git a/cpus.c b/cpus.c
9
diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c
25
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
26
--- a/cpus.c
11
--- a/target/mips/tcg/octeon_translate.c
27
+++ b/cpus.c
12
+++ b/target/mips/tcg/octeon_translate.c
28
@@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void)
13
@@ -XXX,XX +XXX,XX @@ static bool trans_BBIT(DisasContext *ctx, arg_BBIT *a)
29
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
14
TCGv p;
30
}
15
31
16
if (ctx->hflags & MIPS_HFLAG_BMASK) {
32
-/* Kick the currently round-robin scheduled vCPU */
17
- LOG_DISAS("Branch in delay / forbidden slot at PC 0x"
33
-static void qemu_cpu_kick_rr_cpu(void)
18
- TARGET_FMT_lx "\n", ctx->base.pc_next);
34
+/* Kick the currently round-robin scheduled vCPU to next */
19
+ LOG_DISAS("Branch in delay / forbidden slot at PC 0x%" VADDR_PRIx "\n",
35
+static void qemu_cpu_kick_rr_next_cpu(void)
20
+ ctx->base.pc_next);
36
{
21
generate_exception_end(ctx, EXCP_RI);
37
CPUState *cpu;
22
return true;
38
do {
23
}
39
@@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void)
40
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
41
}
42
43
+/* Kick all RR vCPUs */
44
+static void qemu_cpu_kick_rr_cpus(void)
45
+{
46
+ CPUState *cpu;
47
+
48
+ CPU_FOREACH(cpu) {
49
+ cpu_exit(cpu);
50
+ };
51
+}
52
+
53
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
54
{
55
}
56
@@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
57
static void kick_tcg_thread(void *opaque)
58
{
59
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
60
- qemu_cpu_kick_rr_cpu();
61
+ qemu_cpu_kick_rr_next_cpu();
62
}
63
64
static void start_tcg_kick_timer(void)
65
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu)
66
{
67
qemu_cond_broadcast(cpu->halt_cond);
68
if (tcg_enabled()) {
69
- cpu_exit(cpu);
70
- /* NOP unless doing single-thread RR */
71
- qemu_cpu_kick_rr_cpu();
72
+ if (qemu_tcg_mttcg_enabled()) {
73
+ cpu_exit(cpu);
74
+ } else {
75
+ qemu_cpu_kick_rr_cpus();
76
+ }
77
} else {
78
if (hax_enabled()) {
79
/*
80
--
24
--
81
2.17.1
25
2.43.0
82
83
diff view generated by jsdifflib