1
The following changes since commit 40c67636f67c2a89745f2e698522fe917326a952:
1
The following changes since commit 6eeea6725a70e6fcb5abba0764496bdab07ddfb3:
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/usb-20200317-pull-request' into staging (2020-03-17 14:00:56 +0000)
3
Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2020-10-06' into staging (2020-10-06 21:13:34 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200317
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20201008
8
8
9
for you to fetch changes up to 0270bd503e3699b7202200a2d693ad1feb57473f:
9
for you to fetch changes up to 62475e9d007d83db4d0a6ccebcda8914f392e9c9:
10
10
11
tcg: Remove tcg-runtime-gvec.c DO_CMP0 (2020-03-17 08:41:07 -0700)
11
accel/tcg: Fix computing of is_write for MIPS (2020-10-08 05:57:32 -0500)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix tcg/i386 bug vs sari_vec.
14
Extend maximum gvec vector size
15
Fix tcg-runtime-gvec.c vs i386 without avx.
15
Fix i386 avx2 dupi
16
Fix mips host user-only write detection
17
Misc cleanups.
16
18
17
----------------------------------------------------------------
19
----------------------------------------------------------------
18
Richard Henderson (5):
20
Kele Huang (1):
19
tcg/i386: Bound shift count expanding sari_vec
21
accel/tcg: Fix computing of is_write for MIPS
20
tcg: Remove CONFIG_VECTOR16
21
tcg: Tidy tcg-runtime-gvec.c types
22
tcg: Tidy tcg-runtime-gvec.c DUP*
23
tcg: Remove tcg-runtime-gvec.c DO_CMP0
24
22
25
configure | 56 --------
23
Richard Henderson (10):
26
accel/tcg/tcg-runtime-gvec.c | 298 +++++++++++++++++--------------------------
24
tcg: Adjust simd_desc size encoding
27
tcg/i386/tcg-target.inc.c | 9 +-
25
tcg: Drop union from TCGArgConstraint
28
3 files changed, 122 insertions(+), 241 deletions(-)
26
tcg: Move sorted_args into TCGArgConstraint.sort_index
27
tcg: Remove TCG_CT_REG
28
tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
29
tcg: Remove TCGOpDef.used
30
tcg/i386: Fix dupi for avx2 32-bit hosts
31
tcg: Fix generation of dupi_vec for 32-bit host
32
tcg/optimize: Fold dup2_vec
33
tcg: Remove TCG_TARGET_HAS_cmp_vec
29
34
35
include/tcg/tcg-gvec-desc.h | 38 ++++++++++++------
36
include/tcg/tcg.h | 22 ++++------
37
tcg/aarch64/tcg-target.h | 1 -
38
tcg/i386/tcg-target.h | 1 -
39
tcg/ppc/tcg-target.h | 1 -
40
accel/tcg/user-exec.c | 43 ++++++++++++++++++--
41
tcg/optimize.c | 15 +++++++
42
tcg/tcg-op-gvec.c | 35 ++++++++++++----
43
tcg/tcg-op-vec.c | 12 ++++--
44
tcg/tcg.c | 96 +++++++++++++++++++-------------------------
45
tcg/aarch64/tcg-target.c.inc | 17 ++++----
46
tcg/arm/tcg-target.c.inc | 29 ++++++-------
47
tcg/i386/tcg-target.c.inc | 39 +++++++-----------
48
tcg/mips/tcg-target.c.inc | 21 +++++-----
49
tcg/ppc/tcg-target.c.inc | 29 ++++++-------
50
tcg/riscv/tcg-target.c.inc | 16 ++++----
51
tcg/s390/tcg-target.c.inc | 22 +++++-----
52
tcg/sparc/tcg-target.c.inc | 21 ++++------
53
tcg/tci/tcg-target.c.inc | 3 +-
54
19 files changed, 244 insertions(+), 217 deletions(-)
55
diff view generated by jsdifflib
New patch
1
With larger vector sizes, it turns out oprsz == maxsz, and we only
2
need to represent mismatch for oprsz <= 32. We do, however, need
3
to represent larger oprsz and do so without reducing SIMD_DATA_BITS.
1
4
5
Reduce the size of the oprsz field and increase the maxsz field.
6
Steal the oprsz value of 24 to indicate equality with maxsz.
7
8
Tested-by: Frank Chang <frank.chang@sifive.com>
9
Reviewed-by: Frank Chang <frank.chang@sifive.com>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
14
tcg/tcg-op-gvec.c | 35 ++++++++++++++++++++++++++--------
15
2 files changed, 52 insertions(+), 21 deletions(-)
16
17
diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/tcg/tcg-gvec-desc.h
20
+++ b/include/tcg/tcg-gvec-desc.h
21
@@ -XXX,XX +XXX,XX @@
22
#ifndef TCG_TCG_GVEC_DESC_H
23
#define TCG_TCG_GVEC_DESC_H
24
25
-/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
26
-#define SIMD_OPRSZ_SHIFT 0
27
-#define SIMD_OPRSZ_BITS 5
28
+/*
29
+ * This configuration allows MAXSZ to represent 2048 bytes, and
30
+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
31
+ *
32
+ * Encode this with:
33
+ * 0, 1, 3 -> 8, 16, 32
34
+ * 2 -> maxsz
35
+ *
36
+ * This steals the input that would otherwise map to 24 to match maxsz.
37
+ */
38
+#define SIMD_MAXSZ_SHIFT 0
39
+#define SIMD_MAXSZ_BITS 8
40
41
-#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
42
-#define SIMD_MAXSZ_BITS 5
43
+#define SIMD_OPRSZ_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
44
+#define SIMD_OPRSZ_BITS 2
45
46
-#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
47
+#define SIMD_DATA_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
48
#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT)
49
50
/* Create a descriptor from components. */
51
uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
52
53
-/* Extract the operation size from a descriptor. */
54
-static inline intptr_t simd_oprsz(uint32_t desc)
55
-{
56
- return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
57
-}
58
-
59
/* Extract the max vector size from a descriptor. */
60
static inline intptr_t simd_maxsz(uint32_t desc)
61
{
62
- return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
63
+ return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
64
+}
65
+
66
+/* Extract the operation size from a descriptor. */
67
+static inline intptr_t simd_oprsz(uint32_t desc)
68
+{
69
+ uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
70
+ intptr_t o = f * 8 + 8;
71
+ intptr_t m = simd_maxsz(desc);
72
+ return f == 2 ? m : o;
73
}
74
75
/* Extract the operation-specific data from a descriptor. */
76
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/tcg/tcg-op-gvec.c
79
+++ b/tcg/tcg-op-gvec.c
80
@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
81
of the operand offsets so that we can check them all at once. */
82
static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
83
{
84
- uint32_t opr_align = oprsz >= 16 ? 15 : 7;
85
- uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
86
- tcg_debug_assert(oprsz > 0);
87
- tcg_debug_assert(oprsz <= maxsz);
88
- tcg_debug_assert((oprsz & opr_align) == 0);
89
+ uint32_t max_align;
90
+
91
+ switch (oprsz) {
92
+ case 8:
93
+ case 16:
94
+ case 32:
95
+ tcg_debug_assert(oprsz <= maxsz);
96
+ break;
97
+ default:
98
+ tcg_debug_assert(oprsz == maxsz);
99
+ break;
100
+ }
101
+ tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
102
+
103
+ max_align = maxsz >= 16 ? 15 : 7;
104
tcg_debug_assert((maxsz & max_align) == 0);
105
tcg_debug_assert((ofs & max_align) == 0);
106
}
107
@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
108
{
109
uint32_t desc = 0;
110
111
- assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
112
- assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
113
- assert(data == sextract32(data, 0, SIMD_DATA_BITS));
114
+ check_size_align(oprsz, maxsz, 0);
115
+ tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
116
117
oprsz = (oprsz / 8) - 1;
118
maxsz = (maxsz / 8) - 1;
119
+
120
+ /*
121
+ * We have just asserted in check_size_align that either
122
+ * oprsz is {8,16,32} or matches maxsz. Encode the final
123
+ * case with '2', as that would otherwise map to 24.
124
+ */
125
+ if (oprsz == maxsz) {
126
+ oprsz = 2;
127
+ }
128
+
129
desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
130
desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
131
desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
132
--
133
2.25.1
134
135
diff view generated by jsdifflib
New patch
1
The union is unused; let "regs" appear in the main structure
2
without the "u.regs" wrapping.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg.h | 4 +---
8
tcg/tcg.c | 22 +++++++++++-----------
9
tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
10
tcg/arm/tcg-target.c.inc | 26 +++++++++++++-------------
11
tcg/i386/tcg-target.c.inc | 26 +++++++++++++-------------
12
tcg/mips/tcg-target.c.inc | 18 +++++++++---------
13
tcg/ppc/tcg-target.c.inc | 24 ++++++++++++------------
14
tcg/riscv/tcg-target.c.inc | 14 +++++++-------
15
tcg/s390/tcg-target.c.inc | 18 +++++++++---------
16
tcg/sparc/tcg-target.c.inc | 16 ++++++++--------
17
tcg/tci/tcg-target.c.inc | 2 +-
18
11 files changed, 91 insertions(+), 93 deletions(-)
19
20
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/tcg/tcg.h
23
+++ b/include/tcg/tcg.h
24
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
25
typedef struct TCGArgConstraint {
26
uint16_t ct;
27
uint8_t alias_index;
28
- union {
29
- TCGRegSet regs;
30
- } u;
31
+ TCGRegSet regs;
32
} TCGArgConstraint;
33
34
#define TCG_MAX_OP_ARGS 16
35
diff --git a/tcg/tcg.c b/tcg/tcg.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/tcg.c
38
+++ b/tcg/tcg.c
39
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
40
return 0;
41
n = 0;
42
for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
43
- if (tcg_regset_test_reg(arg_ct->u.regs, i))
44
+ if (tcg_regset_test_reg(arg_ct->regs, i))
45
n++;
46
}
47
}
48
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
49
/* Incomplete TCGTargetOpDef entry. */
50
tcg_debug_assert(ct_str != NULL);
51
52
- def->args_ct[i].u.regs = 0;
53
+ def->args_ct[i].regs = 0;
54
def->args_ct[i].ct = 0;
55
while (*ct_str != '\0') {
56
switch(*ct_str) {
57
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
58
pset = la_temp_pref(ts);
59
set = *pset;
60
61
- set &= ct->u.regs;
62
+ set &= ct->regs;
63
if (ct->ct & TCG_CT_IALIAS) {
64
set &= op->output_pref[ct->alias_index];
65
}
66
/* If the combination is not possible, restart. */
67
if (set == 0) {
68
- set = ct->u.regs;
69
+ set = ct->regs;
70
}
71
*pset = set;
72
}
73
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
74
return;
75
}
76
77
- dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
78
- dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
79
+ dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
80
+ dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
81
82
/* Allocate the output register now. */
83
if (ots->val_type != TEMP_VAL_REG) {
84
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
85
}
86
}
87
88
- temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
89
+ temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
90
reg = ts->reg;
91
92
- if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
93
+ if (tcg_regset_test_reg(arg_ct->regs, reg)) {
94
/* nothing to do : the constraint is satisfied */
95
} else {
96
allocate_in_reg:
97
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
98
and move the temporary register into it */
99
temp_load(s, ts, tcg_target_available_regs[ts->type],
100
i_allocated_regs, 0);
101
- reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
102
+ reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
103
o_preferred_regs, ts->indirect_base);
104
if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
105
/*
106
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
107
&& !const_args[arg_ct->alias_index]) {
108
reg = new_args[arg_ct->alias_index];
109
} else if (arg_ct->ct & TCG_CT_NEWREG) {
110
- reg = tcg_reg_alloc(s, arg_ct->u.regs,
111
+ reg = tcg_reg_alloc(s, arg_ct->regs,
112
i_allocated_regs | o_allocated_regs,
113
op->output_pref[k], ts->indirect_base);
114
} else {
115
- reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
116
+ reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
117
op->output_pref[k], ts->indirect_base);
118
}
119
tcg_regset_set_reg(o_allocated_regs, reg);
120
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
121
index XXXXXXX..XXXXXXX 100644
122
--- a/tcg/aarch64/tcg-target.c.inc
123
+++ b/tcg/aarch64/tcg-target.c.inc
124
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
125
switch (*ct_str++) {
126
case 'r': /* general registers */
127
ct->ct |= TCG_CT_REG;
128
- ct->u.regs |= 0xffffffffu;
129
+ ct->regs |= 0xffffffffu;
130
break;
131
case 'w': /* advsimd registers */
132
ct->ct |= TCG_CT_REG;
133
- ct->u.regs |= 0xffffffff00000000ull;
134
+ ct->regs |= 0xffffffff00000000ull;
135
break;
136
case 'l': /* qemu_ld / qemu_st address, data_reg */
137
ct->ct |= TCG_CT_REG;
138
- ct->u.regs = 0xffffffffu;
139
+ ct->regs = 0xffffffffu;
140
#ifdef CONFIG_SOFTMMU
141
/* x0 and x1 will be overwritten when reading the tlb entry,
142
and x2, and x3 for helper args, better to avoid using them. */
143
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
144
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
145
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
146
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
147
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
148
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
149
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
150
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
151
#endif
152
break;
153
case 'A': /* Valid for arithmetic immediate (positive or negative). */
154
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/arm/tcg-target.c.inc
157
+++ b/tcg/arm/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
159
160
case 'r':
161
ct->ct |= TCG_CT_REG;
162
- ct->u.regs = 0xffff;
163
+ ct->regs = 0xffff;
164
break;
165
166
/* qemu_ld address */
167
case 'l':
168
ct->ct |= TCG_CT_REG;
169
- ct->u.regs = 0xffff;
170
+ ct->regs = 0xffff;
171
#ifdef CONFIG_SOFTMMU
172
/* r0-r2,lr will be overwritten when reading the tlb entry,
173
so don't use these. */
174
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
175
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
176
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
177
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
178
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
179
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
180
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
181
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
182
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
183
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
184
#endif
185
break;
186
187
/* qemu_st address & data */
188
case 's':
189
ct->ct |= TCG_CT_REG;
190
- ct->u.regs = 0xffff;
191
+ ct->regs = 0xffff;
192
/* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
193
and r0-r1 doing the byte swapping, so don't use these. */
194
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
195
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
196
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
197
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
198
#if defined(CONFIG_SOFTMMU)
199
/* Avoid clashes with registers being used for helper args */
200
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
201
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
202
#if TARGET_LONG_BITS == 64
203
/* Avoid clashes with registers being used for helper args */
204
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
205
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
206
#endif
207
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
208
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
209
#endif
210
break;
211
212
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
213
index XXXXXXX..XXXXXXX 100644
214
--- a/tcg/i386/tcg-target.c.inc
215
+++ b/tcg/i386/tcg-target.c.inc
216
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
217
switch(*ct_str++) {
218
case 'a':
219
ct->ct |= TCG_CT_REG;
220
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
221
+ tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
222
break;
223
case 'b':
224
ct->ct |= TCG_CT_REG;
225
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
226
+ tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
227
break;
228
case 'c':
229
ct->ct |= TCG_CT_REG;
230
- tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
231
+ tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
232
break;
233
case 'd':
234
ct->ct |= TCG_CT_REG;
235
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
236
+ tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
237
break;
238
case 'S':
239
ct->ct |= TCG_CT_REG;
240
- tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
241
+ tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
242
break;
243
case 'D':
244
ct->ct |= TCG_CT_REG;
245
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
246
+ tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
247
break;
248
case 'q':
249
/* A register that can be used as a byte operand. */
250
ct->ct |= TCG_CT_REG;
251
- ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
252
+ ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
253
break;
254
case 'Q':
255
/* A register with an addressable second byte (e.g. %ah). */
256
ct->ct |= TCG_CT_REG;
257
- ct->u.regs = 0xf;
258
+ ct->regs = 0xf;
259
break;
260
case 'r':
261
/* A general register. */
262
ct->ct |= TCG_CT_REG;
263
- ct->u.regs |= ALL_GENERAL_REGS;
264
+ ct->regs |= ALL_GENERAL_REGS;
265
break;
266
case 'W':
267
/* With TZCNT/LZCNT, we can have operand-size as an input. */
268
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
269
case 'x':
270
/* A vector register. */
271
ct->ct |= TCG_CT_REG;
272
- ct->u.regs |= ALL_VECTOR_REGS;
273
+ ct->regs |= ALL_VECTOR_REGS;
274
break;
275
276
/* qemu_ld/st address constraint */
277
case 'L':
278
ct->ct |= TCG_CT_REG;
279
- ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
280
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
281
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
282
+ ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
283
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
284
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
285
break;
286
287
case 'e':
288
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
289
index XXXXXXX..XXXXXXX 100644
290
--- a/tcg/mips/tcg-target.c.inc
291
+++ b/tcg/mips/tcg-target.c.inc
292
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
293
switch(*ct_str++) {
294
case 'r':
295
ct->ct |= TCG_CT_REG;
296
- ct->u.regs = 0xffffffff;
297
+ ct->regs = 0xffffffff;
298
break;
299
case 'L': /* qemu_ld input arg constraint */
300
ct->ct |= TCG_CT_REG;
301
- ct->u.regs = 0xffffffff;
302
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
303
+ ct->regs = 0xffffffff;
304
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
305
#if defined(CONFIG_SOFTMMU)
306
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
307
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
308
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
309
}
310
#endif
311
break;
312
case 'S': /* qemu_st constraint */
313
ct->ct |= TCG_CT_REG;
314
- ct->u.regs = 0xffffffff;
315
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
316
+ ct->regs = 0xffffffff;
317
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
318
#if defined(CONFIG_SOFTMMU)
319
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
320
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
321
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
322
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
323
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
324
} else {
325
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
326
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
327
}
328
#endif
329
break;
330
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
331
index XXXXXXX..XXXXXXX 100644
332
--- a/tcg/ppc/tcg-target.c.inc
333
+++ b/tcg/ppc/tcg-target.c.inc
334
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
335
switch (*ct_str++) {
336
case 'A': case 'B': case 'C': case 'D':
337
ct->ct |= TCG_CT_REG;
338
- tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
339
+ tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
340
break;
341
case 'r':
342
ct->ct |= TCG_CT_REG;
343
- ct->u.regs = 0xffffffff;
344
+ ct->regs = 0xffffffff;
345
break;
346
case 'v':
347
ct->ct |= TCG_CT_REG;
348
- ct->u.regs = 0xffffffff00000000ull;
349
+ ct->regs = 0xffffffff00000000ull;
350
break;
351
case 'L': /* qemu_ld constraint */
352
ct->ct |= TCG_CT_REG;
353
- ct->u.regs = 0xffffffff;
354
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
355
+ ct->regs = 0xffffffff;
356
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
357
#ifdef CONFIG_SOFTMMU
358
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
359
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
360
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
361
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
362
#endif
363
break;
364
case 'S': /* qemu_st constraint */
365
ct->ct |= TCG_CT_REG;
366
- ct->u.regs = 0xffffffff;
367
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
368
+ ct->regs = 0xffffffff;
369
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
370
#ifdef CONFIG_SOFTMMU
371
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
372
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
373
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
374
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
375
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
376
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
377
#endif
378
break;
379
case 'I':
380
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
381
index XXXXXXX..XXXXXXX 100644
382
--- a/tcg/riscv/tcg-target.c.inc
383
+++ b/tcg/riscv/tcg-target.c.inc
384
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
385
switch (*ct_str++) {
386
case 'r':
387
ct->ct |= TCG_CT_REG;
388
- ct->u.regs = 0xffffffff;
389
+ ct->regs = 0xffffffff;
390
break;
391
case 'L':
392
/* qemu_ld/qemu_st constraint */
393
ct->ct |= TCG_CT_REG;
394
- ct->u.regs = 0xffffffff;
395
+ ct->regs = 0xffffffff;
396
/* qemu_ld/qemu_st uses TCG_REG_TMP0 */
397
#if defined(CONFIG_SOFTMMU)
398
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
399
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
400
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
401
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
402
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
403
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
404
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
405
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
406
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
407
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
408
#endif
409
break;
410
case 'I':
411
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
412
index XXXXXXX..XXXXXXX 100644
413
--- a/tcg/s390/tcg-target.c.inc
414
+++ b/tcg/s390/tcg-target.c.inc
415
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
416
switch (*ct_str++) {
417
case 'r': /* all registers */
418
ct->ct |= TCG_CT_REG;
419
- ct->u.regs = 0xffff;
420
+ ct->regs = 0xffff;
421
break;
422
case 'L': /* qemu_ld/st constraint */
423
ct->ct |= TCG_CT_REG;
424
- ct->u.regs = 0xffff;
425
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
426
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
427
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
428
+ ct->regs = 0xffff;
429
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
430
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
431
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
432
break;
433
case 'a': /* force R2 for division */
434
ct->ct |= TCG_CT_REG;
435
- ct->u.regs = 0;
436
- tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
437
+ ct->regs = 0;
438
+ tcg_regset_set_reg(ct->regs, TCG_REG_R2);
439
break;
440
case 'b': /* force R3 for division */
441
ct->ct |= TCG_CT_REG;
442
- ct->u.regs = 0;
443
- tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
444
+ ct->regs = 0;
445
+ tcg_regset_set_reg(ct->regs, TCG_REG_R3);
446
break;
447
case 'A':
448
ct->ct |= TCG_CT_CONST_S33;
449
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
450
index XXXXXXX..XXXXXXX 100644
451
--- a/tcg/sparc/tcg-target.c.inc
452
+++ b/tcg/sparc/tcg-target.c.inc
453
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
454
switch (*ct_str++) {
455
case 'r':
456
ct->ct |= TCG_CT_REG;
457
- ct->u.regs = 0xffffffff;
458
+ ct->regs = 0xffffffff;
459
break;
460
case 'R':
461
ct->ct |= TCG_CT_REG;
462
- ct->u.regs = ALL_64;
463
+ ct->regs = ALL_64;
464
break;
465
case 'A': /* qemu_ld/st address constraint */
466
ct->ct |= TCG_CT_REG;
467
- ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
468
+ ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
469
reserve_helpers:
470
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
471
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
472
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
473
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
474
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
475
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
476
break;
477
case 's': /* qemu_st data 32-bit constraint */
478
ct->ct |= TCG_CT_REG;
479
- ct->u.regs = 0xffffffff;
480
+ ct->regs = 0xffffffff;
481
goto reserve_helpers;
482
case 'S': /* qemu_st data 64-bit constraint */
483
ct->ct |= TCG_CT_REG;
484
- ct->u.regs = ALL_64;
485
+ ct->regs = ALL_64;
486
goto reserve_helpers;
487
case 'I':
488
ct->ct |= TCG_CT_CONST_S11;
489
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
490
index XXXXXXX..XXXXXXX 100644
491
--- a/tcg/tci/tcg-target.c.inc
492
+++ b/tcg/tci/tcg-target.c.inc
493
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
494
case 'L': /* qemu_ld constraint */
495
case 'S': /* qemu_st constraint */
496
ct->ct |= TCG_CT_REG;
497
- ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
498
+ ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
499
break;
500
default:
501
return NULL;
502
--
503
2.25.1
504
505
diff view generated by jsdifflib
New patch
1
This uses an existing hole in the TCGArgConstraint structure
2
and will be convenient for keeping the data in one place.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 2 +-
7
tcg/tcg.c | 35 +++++++++++++++++------------------
8
2 files changed, 18 insertions(+), 19 deletions(-)
9
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
13
+++ b/include/tcg/tcg.h
14
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
15
typedef struct TCGArgConstraint {
16
uint16_t ct;
17
uint8_t alias_index;
18
+ uint8_t sort_index;
19
TCGRegSet regs;
20
} TCGArgConstraint;
21
22
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
23
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
24
uint8_t flags;
25
TCGArgConstraint *args_ct;
26
- int *sorted_args;
27
#if defined(CONFIG_DEBUG_TCG)
28
int used;
29
#endif
30
diff --git a/tcg/tcg.c b/tcg/tcg.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/tcg.c
33
+++ b/tcg/tcg.c
34
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
35
int op, total_args, n, i;
36
TCGOpDef *def;
37
TCGArgConstraint *args_ct;
38
- int *sorted_args;
39
TCGTemp *ts;
40
41
memset(s, 0, sizeof(*s));
42
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
43
}
44
45
args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
46
- sorted_args = g_malloc(sizeof(int) * total_args);
47
48
for(op = 0; op < NB_OPS; op++) {
49
def = &tcg_op_defs[op];
50
def->args_ct = args_ct;
51
- def->sorted_args = sorted_args;
52
n = def->nb_iargs + def->nb_oargs;
53
- sorted_args += n;
54
args_ct += n;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
58
/* sort from highest priority to lowest */
59
static void sort_constraints(TCGOpDef *def, int start, int n)
60
{
61
- int i, j, p1, p2, tmp;
62
+ int i, j;
63
+ TCGArgConstraint *a = def->args_ct;
64
65
- for(i = 0; i < n; i++)
66
- def->sorted_args[start + i] = start + i;
67
- if (n <= 1)
68
+ for (i = 0; i < n; i++) {
69
+ a[start + i].sort_index = start + i;
70
+ }
71
+ if (n <= 1) {
72
return;
73
- for(i = 0; i < n - 1; i++) {
74
- for(j = i + 1; j < n; j++) {
75
- p1 = get_constraint_priority(def, def->sorted_args[start + i]);
76
- p2 = get_constraint_priority(def, def->sorted_args[start + j]);
77
+ }
78
+ for (i = 0; i < n - 1; i++) {
79
+ for (j = i + 1; j < n; j++) {
80
+ int p1 = get_constraint_priority(def, a[start + i].sort_index);
81
+ int p2 = get_constraint_priority(def, a[start + j].sort_index);
82
if (p1 < p2) {
83
- tmp = def->sorted_args[start + i];
84
- def->sorted_args[start + i] = def->sorted_args[start + j];
85
- def->sorted_args[start + j] = tmp;
86
+ int tmp = a[start + i].sort_index;
87
+ a[start + i].sort_index = a[start + j].sort_index;
88
+ a[start + j].sort_index = tmp;
89
}
90
}
91
}
92
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
93
for (k = 0; k < nb_iargs; k++) {
94
TCGRegSet i_preferred_regs, o_preferred_regs;
95
96
- i = def->sorted_args[nb_oargs + k];
97
+ i = def->args_ct[nb_oargs + k].sort_index;
98
arg = op->args[i];
99
arg_ct = &def->args_ct[i];
100
ts = arg_temp(arg);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
102
int k2, i2;
103
reg = ts->reg;
104
for (k2 = 0 ; k2 < k ; k2++) {
105
- i2 = def->sorted_args[nb_oargs + k2];
106
+ i2 = def->args_ct[nb_oargs + k2].sort_index;
107
if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
108
reg == new_args[i2]) {
109
goto allocate_in_reg;
110
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
111
112
/* satisfy the output constraints */
113
for(k = 0; k < nb_oargs; k++) {
114
- i = def->sorted_args[k];
115
+ i = def->args_ct[k].sort_index;
116
arg = op->args[i];
117
arg_ct = &def->args_ct[i];
118
ts = arg_temp(arg);
119
--
120
2.25.1
121
122
diff view generated by jsdifflib
New patch
1
This wasn't actually used for anything, really. All variable
2
operands must accept registers, and which are indicated by the
3
set in TCGArgConstraint.regs.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg.h | 1 -
8
tcg/tcg.c | 15 ++++-----------
9
tcg/aarch64/tcg-target.c.inc | 3 ---
10
tcg/arm/tcg-target.c.inc | 3 ---
11
tcg/i386/tcg-target.c.inc | 11 -----------
12
tcg/mips/tcg-target.c.inc | 3 ---
13
tcg/ppc/tcg-target.c.inc | 5 -----
14
tcg/riscv/tcg-target.c.inc | 2 --
15
tcg/s390/tcg-target.c.inc | 4 ----
16
tcg/sparc/tcg-target.c.inc | 5 -----
17
tcg/tci/tcg-target.c.inc | 1 -
18
11 files changed, 4 insertions(+), 49 deletions(-)
19
20
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/tcg/tcg.h
23
+++ b/include/tcg/tcg.h
24
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
25
#define TCG_CT_ALIAS 0x80
26
#define TCG_CT_IALIAS 0x40
27
#define TCG_CT_NEWREG 0x20 /* output requires a new register */
28
-#define TCG_CT_REG 0x01
29
#define TCG_CT_CONST 0x02 /* any constant of register size */
30
31
typedef struct TCGArgConstraint {
32
diff --git a/tcg/tcg.c b/tcg/tcg.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg.c
35
+++ b/tcg/tcg.c
36
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
37
/* we give more priority to constraints with less registers */
38
static int get_constraint_priority(const TCGOpDef *def, int k)
39
{
40
- const TCGArgConstraint *arg_ct;
41
+ const TCGArgConstraint *arg_ct = &def->args_ct[k];
42
+ int n;
43
44
- int i, n;
45
- arg_ct = &def->args_ct[k];
46
if (arg_ct->ct & TCG_CT_ALIAS) {
47
/* an alias is equivalent to a single register */
48
n = 1;
49
} else {
50
- if (!(arg_ct->ct & TCG_CT_REG))
51
- return 0;
52
- n = 0;
53
- for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
54
- if (tcg_regset_test_reg(arg_ct->regs, i))
55
- n++;
56
- }
57
+ n = ctpop64(arg_ct->regs);
58
}
59
return TCG_TARGET_NB_REGS - n + 1;
60
}
61
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
62
int oarg = *ct_str - '0';
63
tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
64
tcg_debug_assert(oarg < def->nb_oargs);
65
- tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
66
+ tcg_debug_assert(def->args_ct[oarg].regs != 0);
67
/* TCG_CT_ALIAS is for the output arguments.
68
The input is tagged with TCG_CT_IALIAS. */
69
def->args_ct[i] = def->args_ct[oarg];
70
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tcg/aarch64/tcg-target.c.inc
73
+++ b/tcg/aarch64/tcg-target.c.inc
74
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
75
{
76
switch (*ct_str++) {
77
case 'r': /* general registers */
78
- ct->ct |= TCG_CT_REG;
79
ct->regs |= 0xffffffffu;
80
break;
81
case 'w': /* advsimd registers */
82
- ct->ct |= TCG_CT_REG;
83
ct->regs |= 0xffffffff00000000ull;
84
break;
85
case 'l': /* qemu_ld / qemu_st address, data_reg */
86
- ct->ct |= TCG_CT_REG;
87
ct->regs = 0xffffffffu;
88
#ifdef CONFIG_SOFTMMU
89
/* x0 and x1 will be overwritten when reading the tlb entry,
90
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
91
index XXXXXXX..XXXXXXX 100644
92
--- a/tcg/arm/tcg-target.c.inc
93
+++ b/tcg/arm/tcg-target.c.inc
94
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
95
break;
96
97
case 'r':
98
- ct->ct |= TCG_CT_REG;
99
ct->regs = 0xffff;
100
break;
101
102
/* qemu_ld address */
103
case 'l':
104
- ct->ct |= TCG_CT_REG;
105
ct->regs = 0xffff;
106
#ifdef CONFIG_SOFTMMU
107
/* r0-r2,lr will be overwritten when reading the tlb entry,
108
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
109
110
/* qemu_st address & data */
111
case 's':
112
- ct->ct |= TCG_CT_REG;
113
ct->regs = 0xffff;
114
/* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
115
and r0-r1 doing the byte swapping, so don't use these. */
116
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/i386/tcg-target.c.inc
119
+++ b/tcg/i386/tcg-target.c.inc
120
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
121
{
122
switch(*ct_str++) {
123
case 'a':
124
- ct->ct |= TCG_CT_REG;
125
tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
126
break;
127
case 'b':
128
- ct->ct |= TCG_CT_REG;
129
tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
130
break;
131
case 'c':
132
- ct->ct |= TCG_CT_REG;
133
tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
134
break;
135
case 'd':
136
- ct->ct |= TCG_CT_REG;
137
tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
138
break;
139
case 'S':
140
- ct->ct |= TCG_CT_REG;
141
tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
142
break;
143
case 'D':
144
- ct->ct |= TCG_CT_REG;
145
tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
146
break;
147
case 'q':
148
/* A register that can be used as a byte operand. */
149
- ct->ct |= TCG_CT_REG;
150
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
151
break;
152
case 'Q':
153
/* A register with an addressable second byte (e.g. %ah). */
154
- ct->ct |= TCG_CT_REG;
155
ct->regs = 0xf;
156
break;
157
case 'r':
158
/* A general register. */
159
- ct->ct |= TCG_CT_REG;
160
ct->regs |= ALL_GENERAL_REGS;
161
break;
162
case 'W':
163
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
164
break;
165
case 'x':
166
/* A vector register. */
167
- ct->ct |= TCG_CT_REG;
168
ct->regs |= ALL_VECTOR_REGS;
169
break;
170
171
/* qemu_ld/st address constraint */
172
case 'L':
173
- ct->ct |= TCG_CT_REG;
174
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
175
tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
176
tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
177
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
178
index XXXXXXX..XXXXXXX 100644
179
--- a/tcg/mips/tcg-target.c.inc
180
+++ b/tcg/mips/tcg-target.c.inc
181
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
182
{
183
switch(*ct_str++) {
184
case 'r':
185
- ct->ct |= TCG_CT_REG;
186
ct->regs = 0xffffffff;
187
break;
188
case 'L': /* qemu_ld input arg constraint */
189
- ct->ct |= TCG_CT_REG;
190
ct->regs = 0xffffffff;
191
tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
192
#if defined(CONFIG_SOFTMMU)
193
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
194
#endif
195
break;
196
case 'S': /* qemu_st constraint */
197
- ct->ct |= TCG_CT_REG;
198
ct->regs = 0xffffffff;
199
tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
200
#if defined(CONFIG_SOFTMMU)
201
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
202
index XXXXXXX..XXXXXXX 100644
203
--- a/tcg/ppc/tcg-target.c.inc
204
+++ b/tcg/ppc/tcg-target.c.inc
205
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
206
{
207
switch (*ct_str++) {
208
case 'A': case 'B': case 'C': case 'D':
209
- ct->ct |= TCG_CT_REG;
210
tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
211
break;
212
case 'r':
213
- ct->ct |= TCG_CT_REG;
214
ct->regs = 0xffffffff;
215
break;
216
case 'v':
217
- ct->ct |= TCG_CT_REG;
218
ct->regs = 0xffffffff00000000ull;
219
break;
220
case 'L': /* qemu_ld constraint */
221
- ct->ct |= TCG_CT_REG;
222
ct->regs = 0xffffffff;
223
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
224
#ifdef CONFIG_SOFTMMU
225
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
226
#endif
227
break;
228
case 'S': /* qemu_st constraint */
229
- ct->ct |= TCG_CT_REG;
230
ct->regs = 0xffffffff;
231
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
232
#ifdef CONFIG_SOFTMMU
233
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
234
index XXXXXXX..XXXXXXX 100644
235
--- a/tcg/riscv/tcg-target.c.inc
236
+++ b/tcg/riscv/tcg-target.c.inc
237
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
238
{
239
switch (*ct_str++) {
240
case 'r':
241
- ct->ct |= TCG_CT_REG;
242
ct->regs = 0xffffffff;
243
break;
244
case 'L':
245
/* qemu_ld/qemu_st constraint */
246
- ct->ct |= TCG_CT_REG;
247
ct->regs = 0xffffffff;
248
/* qemu_ld/qemu_st uses TCG_REG_TMP0 */
249
#if defined(CONFIG_SOFTMMU)
250
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
251
index XXXXXXX..XXXXXXX 100644
252
--- a/tcg/s390/tcg-target.c.inc
253
+++ b/tcg/s390/tcg-target.c.inc
254
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
255
{
256
switch (*ct_str++) {
257
case 'r': /* all registers */
258
- ct->ct |= TCG_CT_REG;
259
ct->regs = 0xffff;
260
break;
261
case 'L': /* qemu_ld/st constraint */
262
- ct->ct |= TCG_CT_REG;
263
ct->regs = 0xffff;
264
tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
265
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
266
tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
267
break;
268
case 'a': /* force R2 for division */
269
- ct->ct |= TCG_CT_REG;
270
ct->regs = 0;
271
tcg_regset_set_reg(ct->regs, TCG_REG_R2);
272
break;
273
case 'b': /* force R3 for division */
274
- ct->ct |= TCG_CT_REG;
275
ct->regs = 0;
276
tcg_regset_set_reg(ct->regs, TCG_REG_R3);
277
break;
278
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
279
index XXXXXXX..XXXXXXX 100644
280
--- a/tcg/sparc/tcg-target.c.inc
281
+++ b/tcg/sparc/tcg-target.c.inc
282
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
283
{
284
switch (*ct_str++) {
285
case 'r':
286
- ct->ct |= TCG_CT_REG;
287
ct->regs = 0xffffffff;
288
break;
289
case 'R':
290
- ct->ct |= TCG_CT_REG;
291
ct->regs = ALL_64;
292
break;
293
case 'A': /* qemu_ld/st address constraint */
294
- ct->ct |= TCG_CT_REG;
295
ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
296
reserve_helpers:
297
tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
298
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
299
tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
300
break;
301
case 's': /* qemu_st data 32-bit constraint */
302
- ct->ct |= TCG_CT_REG;
303
ct->regs = 0xffffffff;
304
goto reserve_helpers;
305
case 'S': /* qemu_st data 64-bit constraint */
306
- ct->ct |= TCG_CT_REG;
307
ct->regs = ALL_64;
308
goto reserve_helpers;
309
case 'I':
310
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
311
index XXXXXXX..XXXXXXX 100644
312
--- a/tcg/tci/tcg-target.c.inc
313
+++ b/tcg/tci/tcg-target.c.inc
314
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
315
case 'r':
316
case 'L': /* qemu_ld constraint */
317
case 'S': /* qemu_st constraint */
318
- ct->ct |= TCG_CT_REG;
319
ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
320
break;
321
default:
322
--
323
2.25.1
324
325
diff view generated by jsdifflib
New patch
1
These are easier to set and test when they have their own fields.
2
Reduce the size of alias_index and sort_index to 4 bits, which is
3
sufficient for TCG_MAX_OP_ARGS. This leaves only the bits indicating
4
constants within the ct field.
1
5
6
Move all initialization to allocation time, rather than init
7
individual fields in process_op_defs.
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg.h | 14 +++++++-------
12
tcg/tcg.c | 28 ++++++++++++----------------
13
2 files changed, 19 insertions(+), 23 deletions(-)
14
15
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg.h
18
+++ b/include/tcg/tcg.h
19
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
20
void tcg_dump_info(void);
21
void tcg_dump_op_count(void);
22
23
-#define TCG_CT_ALIAS 0x80
24
-#define TCG_CT_IALIAS 0x40
25
-#define TCG_CT_NEWREG 0x20 /* output requires a new register */
26
-#define TCG_CT_CONST 0x02 /* any constant of register size */
27
+#define TCG_CT_CONST 1 /* any constant of register size */
28
29
typedef struct TCGArgConstraint {
30
- uint16_t ct;
31
- uint8_t alias_index;
32
- uint8_t sort_index;
33
+ unsigned ct : 16;
34
+ unsigned alias_index : 4;
35
+ unsigned sort_index : 4;
36
+ bool oalias : 1;
37
+ bool ialias : 1;
38
+ bool newreg : 1;
39
TCGRegSet regs;
40
} TCGArgConstraint;
41
42
diff --git a/tcg/tcg.c b/tcg/tcg.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/tcg.c
45
+++ b/tcg/tcg.c
46
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
47
total_args += n;
48
}
49
50
- args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
51
+ args_ct = g_new0(TCGArgConstraint, total_args);
52
53
for(op = 0; op < NB_OPS; op++) {
54
def = &tcg_op_defs[op];
55
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
56
const TCGArgConstraint *arg_ct = &def->args_ct[k];
57
int n;
58
59
- if (arg_ct->ct & TCG_CT_ALIAS) {
60
+ if (arg_ct->oalias) {
61
/* an alias is equivalent to a single register */
62
n = 1;
63
} else {
64
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
65
/* Incomplete TCGTargetOpDef entry. */
66
tcg_debug_assert(ct_str != NULL);
67
68
- def->args_ct[i].regs = 0;
69
- def->args_ct[i].ct = 0;
70
while (*ct_str != '\0') {
71
switch(*ct_str) {
72
case '0' ... '9':
73
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
74
tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
75
tcg_debug_assert(oarg < def->nb_oargs);
76
tcg_debug_assert(def->args_ct[oarg].regs != 0);
77
- /* TCG_CT_ALIAS is for the output arguments.
78
- The input is tagged with TCG_CT_IALIAS. */
79
def->args_ct[i] = def->args_ct[oarg];
80
- def->args_ct[oarg].ct |= TCG_CT_ALIAS;
81
+ /* The output sets oalias. */
82
+ def->args_ct[oarg].oalias = true;
83
def->args_ct[oarg].alias_index = i;
84
- def->args_ct[i].ct |= TCG_CT_IALIAS;
85
+ /* The input sets ialias. */
86
+ def->args_ct[i].ialias = true;
87
def->args_ct[i].alias_index = oarg;
88
}
89
ct_str++;
90
break;
91
case '&':
92
- def->args_ct[i].ct |= TCG_CT_NEWREG;
93
+ def->args_ct[i].newreg = true;
94
ct_str++;
95
break;
96
case 'i':
97
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
98
set = *pset;
99
100
set &= ct->regs;
101
- if (ct->ct & TCG_CT_IALIAS) {
102
+ if (ct->ialias) {
103
set &= op->output_pref[ct->alias_index];
104
}
105
/* If the combination is not possible, restart. */
106
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
107
}
108
109
i_preferred_regs = o_preferred_regs = 0;
110
- if (arg_ct->ct & TCG_CT_IALIAS) {
111
+ if (arg_ct->ialias) {
112
o_preferred_regs = op->output_pref[arg_ct->alias_index];
113
if (ts->fixed_reg) {
114
/* if fixed register, we must allocate a new register
115
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
116
reg = ts->reg;
117
for (k2 = 0 ; k2 < k ; k2++) {
118
i2 = def->args_ct[nb_oargs + k2].sort_index;
119
- if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
120
- reg == new_args[i2]) {
121
+ if (def->args_ct[i2].ialias && reg == new_args[i2]) {
122
goto allocate_in_reg;
123
}
124
}
125
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
126
/* ENV should not be modified. */
127
tcg_debug_assert(!ts->fixed_reg);
128
129
- if ((arg_ct->ct & TCG_CT_ALIAS)
130
- && !const_args[arg_ct->alias_index]) {
131
+ if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
132
reg = new_args[arg_ct->alias_index];
133
- } else if (arg_ct->ct & TCG_CT_NEWREG) {
134
+ } else if (arg_ct->newreg) {
135
reg = tcg_reg_alloc(s, arg_ct->regs,
136
i_allocated_regs | o_allocated_regs,
137
op->output_pref[k], ts->indirect_base);
138
--
139
2.25.1
140
141
diff view generated by jsdifflib
New patch
1
The last user of this field disappeared in f69d277ece4.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 3 ---
7
1 file changed, 3 deletions(-)
8
9
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/tcg/tcg.h
12
+++ b/include/tcg/tcg.h
13
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
14
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
15
uint8_t flags;
16
TCGArgConstraint *args_ct;
17
-#if defined(CONFIG_DEBUG_TCG)
18
- int used;
19
-#endif
20
} TCGOpDef;
21
22
extern TCGOpDef tcg_op_defs[];
23
--
24
2.25.1
25
26
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
The previous change wrongly stated that 32-bit avx2 should have
2
Replace the DUP* expansions with the scalar argument.
2
used VPBROADCASTW. But that's a 16-bit broadcast and we want a
3
32-bit broadcast.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
Fixes: 7b60ef3264e
6
Cc: qemu-stable@nongnu.org
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
accel/tcg/tcg-runtime-gvec.c | 50 +++++++++++-------------------------
9
tcg/i386/tcg-target.c.inc | 2 +-
8
1 file changed, 15 insertions(+), 35 deletions(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
9
11
10
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/tcg-runtime-gvec.c
14
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/accel/tcg/tcg-runtime-gvec.c
15
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
15
#include "tcg/tcg-gvec-desc.h"
17
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
16
18
} else {
17
19
if (have_avx2) {
18
-#define DUP16(X) X
20
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
19
-#define DUP8(X) X
21
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
20
-#define DUP4(X) X
22
} else {
21
-#define DUP2(X) X
23
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
22
-
24
}
23
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
24
{
25
intptr_t maxsz = simd_maxsz(desc);
26
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
27
void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
28
{
29
intptr_t oprsz = simd_oprsz(desc);
30
- uint8_t vecb = (uint8_t)DUP16(b);
31
intptr_t i;
32
33
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
34
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
35
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
36
}
37
clear_high(d, oprsz, desc);
38
}
39
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
40
void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
41
{
42
intptr_t oprsz = simd_oprsz(desc);
43
- uint16_t vecb = (uint16_t)DUP8(b);
44
intptr_t i;
45
46
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
47
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
48
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
49
}
50
clear_high(d, oprsz, desc);
51
}
52
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
53
void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
54
{
55
intptr_t oprsz = simd_oprsz(desc);
56
- uint32_t vecb = (uint32_t)DUP4(b);
57
intptr_t i;
58
59
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
60
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
61
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
62
}
63
clear_high(d, oprsz, desc);
64
}
65
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
66
void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
67
{
68
intptr_t oprsz = simd_oprsz(desc);
69
- uint64_t vecb = (uint64_t)DUP2(b);
70
intptr_t i;
71
72
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
73
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
74
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
75
}
76
clear_high(d, oprsz, desc);
77
}
78
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
79
void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
80
{
81
intptr_t oprsz = simd_oprsz(desc);
82
- uint8_t vecb = (uint8_t)DUP16(b);
83
intptr_t i;
84
85
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
86
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
87
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
88
}
89
clear_high(d, oprsz, desc);
90
}
91
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
92
void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
93
{
94
intptr_t oprsz = simd_oprsz(desc);
95
- uint16_t vecb = (uint16_t)DUP8(b);
96
intptr_t i;
97
98
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
99
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
100
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
101
}
102
clear_high(d, oprsz, desc);
103
}
104
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
105
void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
106
{
107
intptr_t oprsz = simd_oprsz(desc);
108
- uint32_t vecb = (uint32_t)DUP4(b);
109
intptr_t i;
110
111
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
112
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
113
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
114
}
115
clear_high(d, oprsz, desc);
116
}
117
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
118
void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
119
{
120
intptr_t oprsz = simd_oprsz(desc);
121
- uint64_t vecb = (uint64_t)DUP2(b);
122
intptr_t i;
123
124
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
125
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
126
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
127
}
128
clear_high(d, oprsz, desc);
129
}
130
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
131
void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
132
{
133
intptr_t oprsz = simd_oprsz(desc);
134
- uint8_t vecb = (uint8_t)DUP16(b);
135
intptr_t i;
136
137
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
138
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
139
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
140
}
141
clear_high(d, oprsz, desc);
142
}
143
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
144
void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
145
{
146
intptr_t oprsz = simd_oprsz(desc);
147
- uint16_t vecb = (uint16_t)DUP8(b);
148
intptr_t i;
149
150
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
151
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
152
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
153
}
154
clear_high(d, oprsz, desc);
155
}
156
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
157
void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
158
{
159
intptr_t oprsz = simd_oprsz(desc);
160
- uint32_t vecb = (uint32_t)DUP4(b);
161
intptr_t i;
162
163
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
165
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
166
}
167
clear_high(d, oprsz, desc);
168
}
169
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
170
void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
171
{
172
intptr_t oprsz = simd_oprsz(desc);
173
- uint64_t vecb = (uint64_t)DUP2(b);
174
intptr_t i;
175
176
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
177
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
178
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
179
}
180
clear_high(d, oprsz, desc);
181
}
182
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
183
void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
184
{
185
intptr_t oprsz = simd_oprsz(desc);
186
- uint64_t vecb = (uint64_t)DUP2(b);
187
intptr_t i;
188
189
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
190
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
191
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
192
}
193
clear_high(d, oprsz, desc);
194
}
195
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
196
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
197
{
198
intptr_t oprsz = simd_oprsz(desc);
199
- uint64_t vecb = (uint64_t)DUP2(b);
200
intptr_t i;
201
202
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
203
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
204
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
205
}
206
clear_high(d, oprsz, desc);
207
}
208
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
209
void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
210
{
211
intptr_t oprsz = simd_oprsz(desc);
212
- uint64_t vecb = (uint64_t)DUP2(b);
213
intptr_t i;
214
215
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
216
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
217
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
218
}
219
clear_high(d, oprsz, desc);
220
}
221
--
25
--
222
2.20.1
26
2.25.1
223
27
224
28
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
The definition of INDEX_op_dupi_vec is that it operates on
2
Replace the vec* types with their scalar expansions.
2
units of tcg_target_ulong -- in this case 32 bits. It does
3
not work to use this for a uint64_t value that happens to be
4
small enough to fit in tcg_target_ulong.
3
5
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Fixes: d2fd745fe8b
7
Fixes: db432672dc5
8
Cc: qemu-stable@nongnu.org
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
accel/tcg/tcg-runtime-gvec.c | 270 +++++++++++++++++------------------
11
tcg/tcg-op-vec.c | 12 ++++++++----
8
1 file changed, 130 insertions(+), 140 deletions(-)
12
1 file changed, 8 insertions(+), 4 deletions(-)
9
13
10
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
14
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
11
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/tcg-runtime-gvec.c
16
--- a/tcg/tcg-op-vec.c
13
+++ b/accel/tcg/tcg-runtime-gvec.c
17
+++ b/tcg/tcg-op-vec.c
14
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
15
#include "tcg/tcg-gvec-desc.h"
19
16
20
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
17
21
{
18
-typedef uint8_t vec8;
22
- if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
19
-typedef uint16_t vec16;
23
- do_dupi_vec(r, MO_32, a);
20
-typedef uint32_t vec32;
24
- } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
21
-typedef uint64_t vec64;
25
+ if (TCG_TARGET_REG_BITS == 64) {
22
-
26
do_dupi_vec(r, MO_64, a);
23
-typedef int8_t svec8;
27
+ } else if (a == dup_const(MO_32, a)) {
24
-typedef int16_t svec16;
28
+ do_dupi_vec(r, MO_32, a);
25
-typedef int32_t svec32;
29
} else {
26
-typedef int64_t svec64;
30
TCGv_i64 c = tcg_const_i64(a);
27
-
31
tcg_gen_dup_i64_vec(MO_64, r, c);
28
#define DUP16(X) X
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
29
#define DUP8(X) X
33
30
#define DUP4(X) X
34
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
31
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
35
{
32
intptr_t oprsz = simd_oprsz(desc);
36
- do_dupi_vec(r, MO_REG, dup_const(vece, a));
33
intptr_t i;
37
+ if (vece == MO_64) {
34
38
+ tcg_gen_dup64i_vec(r, a);
35
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
39
+ } else {
36
- *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
40
+ do_dupi_vec(r, MO_REG, dup_const(vece, a));
37
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
41
+ }
38
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
39
}
40
clear_high(d, oprsz, desc);
41
}
42
}
42
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
43
43
intptr_t oprsz = simd_oprsz(desc);
44
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
44
intptr_t i;
45
46
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
47
- *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
48
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
49
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
50
}
51
clear_high(d, oprsz, desc);
52
}
53
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
54
intptr_t oprsz = simd_oprsz(desc);
55
intptr_t i;
56
57
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
58
- *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
59
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
60
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
61
}
62
clear_high(d, oprsz, desc);
63
}
64
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
65
intptr_t oprsz = simd_oprsz(desc);
66
intptr_t i;
67
68
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
69
- *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
70
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
71
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
72
}
73
clear_high(d, oprsz, desc);
74
}
75
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
76
void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
77
{
78
intptr_t oprsz = simd_oprsz(desc);
79
- vec8 vecb = (vec8)DUP16(b);
80
+ uint8_t vecb = (uint8_t)DUP16(b);
81
intptr_t i;
82
83
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
84
- *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
85
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
86
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
87
}
88
clear_high(d, oprsz, desc);
89
}
90
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
91
void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
92
{
93
intptr_t oprsz = simd_oprsz(desc);
94
- vec16 vecb = (vec16)DUP8(b);
95
+ uint16_t vecb = (uint16_t)DUP8(b);
96
intptr_t i;
97
98
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
99
- *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
100
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
101
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
102
}
103
clear_high(d, oprsz, desc);
104
}
105
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
106
void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
107
{
108
intptr_t oprsz = simd_oprsz(desc);
109
- vec32 vecb = (vec32)DUP4(b);
110
+ uint32_t vecb = (uint32_t)DUP4(b);
111
intptr_t i;
112
113
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
114
- *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
115
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
116
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
117
}
118
clear_high(d, oprsz, desc);
119
}
120
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
121
void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
122
{
123
intptr_t oprsz = simd_oprsz(desc);
124
- vec64 vecb = (vec64)DUP2(b);
125
+ uint64_t vecb = (uint64_t)DUP2(b);
126
intptr_t i;
127
128
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
129
- *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
130
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
131
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
132
}
133
clear_high(d, oprsz, desc);
134
}
135
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
136
intptr_t oprsz = simd_oprsz(desc);
137
intptr_t i;
138
139
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
140
- *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
141
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
142
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
143
}
144
clear_high(d, oprsz, desc);
145
}
146
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
147
intptr_t oprsz = simd_oprsz(desc);
148
intptr_t i;
149
150
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
151
- *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
152
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
153
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
154
}
155
clear_high(d, oprsz, desc);
156
}
157
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
158
intptr_t oprsz = simd_oprsz(desc);
159
intptr_t i;
160
161
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
162
- *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
163
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
165
}
166
clear_high(d, oprsz, desc);
167
}
168
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
169
intptr_t oprsz = simd_oprsz(desc);
170
intptr_t i;
171
172
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
173
- *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
174
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
175
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
176
}
177
clear_high(d, oprsz, desc);
178
}
179
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
180
void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
181
{
182
intptr_t oprsz = simd_oprsz(desc);
183
- vec8 vecb = (vec8)DUP16(b);
184
+ uint8_t vecb = (uint8_t)DUP16(b);
185
intptr_t i;
186
187
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
188
- *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
189
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
190
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
191
}
192
clear_high(d, oprsz, desc);
193
}
194
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
195
void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
196
{
197
intptr_t oprsz = simd_oprsz(desc);
198
- vec16 vecb = (vec16)DUP8(b);
199
+ uint16_t vecb = (uint16_t)DUP8(b);
200
intptr_t i;
201
202
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
203
- *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
204
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
205
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
206
}
207
clear_high(d, oprsz, desc);
208
}
209
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
210
void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
211
{
212
intptr_t oprsz = simd_oprsz(desc);
213
- vec32 vecb = (vec32)DUP4(b);
214
+ uint32_t vecb = (uint32_t)DUP4(b);
215
intptr_t i;
216
217
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
218
- *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
219
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
220
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
221
}
222
clear_high(d, oprsz, desc);
223
}
224
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
225
void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
226
{
227
intptr_t oprsz = simd_oprsz(desc);
228
- vec64 vecb = (vec64)DUP2(b);
229
+ uint64_t vecb = (uint64_t)DUP2(b);
230
intptr_t i;
231
232
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
233
- *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
234
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
235
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
236
}
237
clear_high(d, oprsz, desc);
238
}
239
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
240
intptr_t oprsz = simd_oprsz(desc);
241
intptr_t i;
242
243
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
244
- *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
245
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
246
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
247
}
248
clear_high(d, oprsz, desc);
249
}
250
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
251
intptr_t oprsz = simd_oprsz(desc);
252
intptr_t i;
253
254
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
255
- *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
256
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
257
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
258
}
259
clear_high(d, oprsz, desc);
260
}
261
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
262
intptr_t oprsz = simd_oprsz(desc);
263
intptr_t i;
264
265
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
266
- *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
267
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
268
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
269
}
270
clear_high(d, oprsz, desc);
271
}
272
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
273
intptr_t oprsz = simd_oprsz(desc);
274
intptr_t i;
275
276
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
277
- *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
278
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
279
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
280
}
281
clear_high(d, oprsz, desc);
282
}
283
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
284
void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
285
{
286
intptr_t oprsz = simd_oprsz(desc);
287
- vec8 vecb = (vec8)DUP16(b);
288
+ uint8_t vecb = (uint8_t)DUP16(b);
289
intptr_t i;
290
291
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
292
- *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
293
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
294
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
295
}
296
clear_high(d, oprsz, desc);
297
}
298
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
299
void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
300
{
301
intptr_t oprsz = simd_oprsz(desc);
302
- vec16 vecb = (vec16)DUP8(b);
303
+ uint16_t vecb = (uint16_t)DUP8(b);
304
intptr_t i;
305
306
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
307
- *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
308
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
309
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
310
}
311
clear_high(d, oprsz, desc);
312
}
313
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
314
void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
315
{
316
intptr_t oprsz = simd_oprsz(desc);
317
- vec32 vecb = (vec32)DUP4(b);
318
+ uint32_t vecb = (uint32_t)DUP4(b);
319
intptr_t i;
320
321
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
322
- *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
323
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
324
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
325
}
326
clear_high(d, oprsz, desc);
327
}
328
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
329
void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
330
{
331
intptr_t oprsz = simd_oprsz(desc);
332
- vec64 vecb = (vec64)DUP2(b);
333
+ uint64_t vecb = (uint64_t)DUP2(b);
334
intptr_t i;
335
336
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
337
- *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
338
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
339
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
340
}
341
clear_high(d, oprsz, desc);
342
}
343
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
344
intptr_t oprsz = simd_oprsz(desc);
345
intptr_t i;
346
347
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
348
- *(vec8 *)(d + i) = -*(vec8 *)(a + i);
349
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
350
+ *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
351
}
352
clear_high(d, oprsz, desc);
353
}
354
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
355
intptr_t oprsz = simd_oprsz(desc);
356
intptr_t i;
357
358
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
359
- *(vec16 *)(d + i) = -*(vec16 *)(a + i);
360
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
361
+ *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
362
}
363
clear_high(d, oprsz, desc);
364
}
365
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
366
intptr_t oprsz = simd_oprsz(desc);
367
intptr_t i;
368
369
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
370
- *(vec32 *)(d + i) = -*(vec32 *)(a + i);
371
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
372
+ *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
373
}
374
clear_high(d, oprsz, desc);
375
}
376
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
377
intptr_t oprsz = simd_oprsz(desc);
378
intptr_t i;
379
380
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
381
- *(vec64 *)(d + i) = -*(vec64 *)(a + i);
382
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
383
+ *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
384
}
385
clear_high(d, oprsz, desc);
386
}
387
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
388
intptr_t oprsz = simd_oprsz(desc);
389
intptr_t i;
390
391
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
392
- *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
393
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
394
+ *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
395
}
396
clear_high(d, oprsz, desc);
397
}
398
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
399
intptr_t oprsz = simd_oprsz(desc);
400
intptr_t i;
401
402
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
403
- *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
404
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
405
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
406
}
407
clear_high(d, oprsz, desc);
408
}
409
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
410
intptr_t oprsz = simd_oprsz(desc);
411
intptr_t i;
412
413
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
414
- *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
415
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
416
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
417
}
418
clear_high(d, oprsz, desc);
419
}
420
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
421
intptr_t oprsz = simd_oprsz(desc);
422
intptr_t i;
423
424
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
425
- *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
426
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
427
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
428
}
429
clear_high(d, oprsz, desc);
430
}
431
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
432
intptr_t oprsz = simd_oprsz(desc);
433
intptr_t i;
434
435
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
436
- *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
437
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
438
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
439
}
440
clear_high(d, oprsz, desc);
441
}
442
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
443
intptr_t oprsz = simd_oprsz(desc);
444
intptr_t i;
445
446
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
447
- *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
448
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
450
}
451
clear_high(d, oprsz, desc);
452
}
453
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
454
intptr_t oprsz = simd_oprsz(desc);
455
intptr_t i;
456
457
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
458
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
459
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
461
}
462
clear_high(d, oprsz, desc);
463
}
464
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
465
intptr_t oprsz = simd_oprsz(desc);
466
intptr_t i;
467
468
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
469
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
470
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
472
}
473
clear_high(d, oprsz, desc);
474
}
475
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
476
intptr_t oprsz = simd_oprsz(desc);
477
intptr_t i;
478
479
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
480
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
481
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
483
}
484
clear_high(d, oprsz, desc);
485
}
486
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
487
void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
488
{
489
intptr_t oprsz = simd_oprsz(desc);
490
- vec64 vecb = (vec64)DUP2(b);
491
+ uint64_t vecb = (uint64_t)DUP2(b);
492
intptr_t i;
493
494
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
495
- *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
496
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
497
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
498
}
499
clear_high(d, oprsz, desc);
500
}
501
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
502
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
503
{
504
intptr_t oprsz = simd_oprsz(desc);
505
- vec64 vecb = (vec64)DUP2(b);
506
+ uint64_t vecb = (uint64_t)DUP2(b);
507
intptr_t i;
508
509
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
510
- *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
511
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
512
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
513
}
514
clear_high(d, oprsz, desc);
515
}
516
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
517
void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
518
{
519
intptr_t oprsz = simd_oprsz(desc);
520
- vec64 vecb = (vec64)DUP2(b);
521
+ uint64_t vecb = (uint64_t)DUP2(b);
522
intptr_t i;
523
524
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
525
- *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
526
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
527
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
528
}
529
clear_high(d, oprsz, desc);
530
}
531
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
532
int shift = simd_data(desc);
533
intptr_t i;
534
535
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
536
- *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
537
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
538
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
539
}
540
clear_high(d, oprsz, desc);
541
}
542
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
543
int shift = simd_data(desc);
544
intptr_t i;
545
546
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
547
- *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
548
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
549
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
550
}
551
clear_high(d, oprsz, desc);
552
}
553
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
554
int shift = simd_data(desc);
555
intptr_t i;
556
557
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
558
- *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
559
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
560
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
561
}
562
clear_high(d, oprsz, desc);
563
}
564
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
565
int shift = simd_data(desc);
566
intptr_t i;
567
568
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
569
- *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
570
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
571
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
572
}
573
clear_high(d, oprsz, desc);
574
}
575
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
576
int shift = simd_data(desc);
577
intptr_t i;
578
579
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
580
- *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
581
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
582
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
583
}
584
clear_high(d, oprsz, desc);
585
}
586
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
587
int shift = simd_data(desc);
588
intptr_t i;
589
590
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
591
- *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
592
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
593
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
594
}
595
clear_high(d, oprsz, desc);
596
}
597
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
598
int shift = simd_data(desc);
599
intptr_t i;
600
601
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
602
- *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
603
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
604
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
605
}
606
clear_high(d, oprsz, desc);
607
}
608
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
609
int shift = simd_data(desc);
610
intptr_t i;
611
612
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
613
- *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
614
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
615
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
616
}
617
clear_high(d, oprsz, desc);
618
}
619
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
620
int shift = simd_data(desc);
621
intptr_t i;
622
623
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
624
- *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
625
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
626
+ *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
627
}
628
clear_high(d, oprsz, desc);
629
}
630
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
631
int shift = simd_data(desc);
632
intptr_t i;
633
634
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
635
- *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
636
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
637
+ *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
638
}
639
clear_high(d, oprsz, desc);
640
}
641
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
642
int shift = simd_data(desc);
643
intptr_t i;
644
645
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
646
- *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
647
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
648
+ *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
649
}
650
clear_high(d, oprsz, desc);
651
}
652
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
653
int shift = simd_data(desc);
654
intptr_t i;
655
656
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
657
- *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
658
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
659
+ *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
660
}
661
clear_high(d, oprsz, desc);
662
}
663
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
664
}
665
666
#define DO_CMP2(SZ) \
667
- DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
668
- DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
669
- DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
670
- DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
671
- DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
672
- DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
673
+ DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
674
+ DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
675
+ DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
676
+ DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
677
+ DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
678
+ DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
679
680
DO_CMP2(8)
681
DO_CMP2(16)
682
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
683
intptr_t oprsz = simd_oprsz(desc);
684
intptr_t i;
685
686
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
687
- vec64 aa = *(vec64 *)(a + i);
688
- vec64 bb = *(vec64 *)(b + i);
689
- vec64 cc = *(vec64 *)(c + i);
690
- *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa);
691
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
692
+ uint64_t aa = *(uint64_t *)(a + i);
693
+ uint64_t bb = *(uint64_t *)(b + i);
694
+ uint64_t cc = *(uint64_t *)(c + i);
695
+ *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
696
}
697
clear_high(d, oprsz, desc);
698
}
699
--
45
--
700
2.20.1
46
2.25.1
701
47
702
48
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
When the two arguments are identical, this can be reduced to
2
Replace DO_CMP0 with its scalar expansion, a simple negation.
2
dup_vec or to mov_vec from a tcg_constant_vec.
3
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
accel/tcg/tcg-runtime-gvec.c | 5 +----
6
tcg/optimize.c | 15 +++++++++++++++
7
1 file changed, 1 insertion(+), 4 deletions(-)
7
1 file changed, 15 insertions(+)
8
8
9
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/tcg-runtime-gvec.c
11
--- a/tcg/optimize.c
12
+++ b/accel/tcg/tcg-runtime-gvec.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
clear_high(d, oprsz, desc);
14
}
15
}
15
goto do_default;
16
16
17
-#define DO_CMP0(X) -(X)
17
+ case INDEX_op_dup2_vec:
18
-
18
+ assert(TCG_TARGET_REG_BITS == 32);
19
#define DO_CMP1(NAME, TYPE, OP) \
19
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
20
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
20
+ tmp = arg_info(op->args[1])->val;
21
{ \
21
+ if (tmp == arg_info(op->args[2])->val) {
22
intptr_t oprsz = simd_oprsz(desc); \
22
+ tcg_opt_gen_movi(s, op, op->args[0], tmp);
23
intptr_t i; \
23
+ break;
24
for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
24
+ }
25
- *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
25
+ } else if (args_are_copies(op->args[1], op->args[2])) {
26
+ *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
26
+ op->opc = INDEX_op_dup_vec;
27
} \
27
+ TCGOP_VECE(op) = MO_32;
28
clear_high(d, oprsz, desc); \
28
+ nb_iargs = 1;
29
}
29
+ }
30
@@ -XXX,XX +XXX,XX @@ DO_CMP2(16)
30
+ goto do_default;
31
DO_CMP2(32)
31
+
32
DO_CMP2(64)
32
CASE_OP_32_64(not):
33
33
CASE_OP_32_64(neg):
34
-#undef DO_CMP0
34
CASE_OP_32_64(ext8s):
35
#undef DO_CMP1
36
#undef DO_CMP2
37
38
--
35
--
39
2.20.1
36
2.25.1
40
37
41
38
diff view generated by jsdifflib
1
A given RISU testcase for SVE can produce
1
The cmp_vec opcode is mandatory; this symbol is unused.
2
2
3
tcg-op-vec.c:511: do_shifti: Assertion `i >= 0 && i < (8 << vece)' failed.
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
5
because expand_vec_sari gave a shift count of 32 to a MO_32
6
vector shift.
7
8
In 44f1441dbe1, we changed from direct expansion of vector opcodes
9
to re-use of the tcg expanders. So while the comment correctly notes
10
that the hw will handle such a shift count, we now have to take our
11
own sanity checks into account. Which is easy in this particular case.
12
13
Fixes: 44f1441dbe1
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
5
---
16
tcg/i386/tcg-target.inc.c | 9 ++++++---
6
tcg/aarch64/tcg-target.h | 1 -
17
1 file changed, 6 insertions(+), 3 deletions(-)
7
tcg/i386/tcg-target.h | 1 -
8
tcg/ppc/tcg-target.h | 1 -
9
3 files changed, 3 deletions(-)
18
10
19
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
11
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/i386/tcg-target.inc.c
13
--- a/tcg/aarch64/tcg-target.h
22
+++ b/tcg/i386/tcg-target.inc.c
14
+++ b/tcg/aarch64/tcg-target.h
23
@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
15
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
16
#define TCG_TARGET_HAS_shi_vec 1
25
case MO_64:
17
#define TCG_TARGET_HAS_shs_vec 0
26
if (imm <= 32) {
18
#define TCG_TARGET_HAS_shv_vec 1
27
- /* We can emulate a small sign extend by performing an arithmetic
19
-#define TCG_TARGET_HAS_cmp_vec 1
28
+ /*
20
#define TCG_TARGET_HAS_mul_vec 1
29
+ * We can emulate a small sign extend by performing an arithmetic
21
#define TCG_TARGET_HAS_sat_vec 1
30
* 32-bit shift and overwriting the high half of a 64-bit logical
22
#define TCG_TARGET_HAS_minmax_vec 1
31
- * shift (note that the ISA says shift of 32 is valid).
23
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
32
+ * shift. Note that the ISA says shift of 32 is valid, but TCG
24
index XXXXXXX..XXXXXXX 100644
33
+ * does not, so we have to bound the smaller shift -- we get the
25
--- a/tcg/i386/tcg-target.h
34
+ * same result in the high half either way.
26
+++ b/tcg/i386/tcg-target.h
35
*/
27
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
36
t1 = tcg_temp_new_vec(type);
28
#define TCG_TARGET_HAS_shi_vec 1
37
- tcg_gen_sari_vec(MO_32, t1, v1, imm);
29
#define TCG_TARGET_HAS_shs_vec 1
38
+ tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
30
#define TCG_TARGET_HAS_shv_vec have_avx2
39
tcg_gen_shri_vec(MO_64, v0, v1, imm);
31
-#define TCG_TARGET_HAS_cmp_vec 1
40
vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
32
#define TCG_TARGET_HAS_mul_vec 1
41
tcgv_vec_arg(v0), tcgv_vec_arg(v0),
33
#define TCG_TARGET_HAS_sat_vec 1
34
#define TCG_TARGET_HAS_minmax_vec 1
35
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/ppc/tcg-target.h
38
+++ b/tcg/ppc/tcg-target.h
39
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
40
#define TCG_TARGET_HAS_shi_vec 0
41
#define TCG_TARGET_HAS_shs_vec 0
42
#define TCG_TARGET_HAS_shv_vec 1
43
-#define TCG_TARGET_HAS_cmp_vec 1
44
#define TCG_TARGET_HAS_mul_vec 1
45
#define TCG_TARGET_HAS_sat_vec 1
46
#define TCG_TARGET_HAS_minmax_vec 1
42
--
47
--
43
2.20.1
48
2.25.1
44
49
45
50
diff view generated by jsdifflib
1
The comment in tcg-runtime-gvec.c about CONFIG_VECTOR16 says that
1
From: Kele Huang <kele.hwang@gmail.com>
2
tcg-op-gvec.c has eliminated size 8 vectors, and only passes on
3
multiples of 16. This may have been true of the first few operations,
4
but is not true of all operations.
5
2
6
In particular, multiply, shift by scalar, and compare of 8- and 16-bit
3
Detect all MIPS store instructions in cpu_signal_handler for all available
7
elements are not expanded inline if host vector operations are not
4
MIPS versions, and set is_write if encountering such store instructions.
8
supported.
9
5
10
For an x86_64 host that does not support AVX, this means that we will
6
This fixed the error while dealing with self-modified code for MIPS.
11
fall back to the helper, which will attempt to use SSE instructions,
12
which will SEGV on an invalid 8-byte aligned memory operation.
13
7
14
This patch simply removes the CONFIG_VECTOR16 code and configuration
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
without further simplification.
9
Signed-off-by: Kele Huang <kele.hwang@gmail.com>
16
10
Signed-off-by: Xu Zou <iwatchnima@gmail.com>
17
Buglink: https://bugs.launchpad.net/bugs/1863508
11
Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com>
12
[rth: Use uintptr_t for pc to fix n32 build error.]
18
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
---
14
---
20
configure | 56 ------------------------------------
15
accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++----
21
accel/tcg/tcg-runtime-gvec.c | 35 +---------------------
16
1 file changed, 39 insertions(+), 4 deletions(-)
22
2 files changed, 1 insertion(+), 90 deletions(-)
23
17
24
diff --git a/configure b/configure
18
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
25
index XXXXXXX..XXXXXXX 100755
26
--- a/configure
27
+++ b/configure
28
@@ -XXX,XX +XXX,XX @@ if test "$plugins" = "yes" &&
29
"for this purpose. You can't build with --static."
30
fi
31
32
-########################################
33
-# See if 16-byte vector operations are supported.
34
-# Even without a vector unit the compiler may expand these.
35
-# There is a bug in old GCC for PPC that crashes here.
36
-# Unfortunately it's the system compiler for Centos 7.
37
-
38
-cat > $TMPC << EOF
39
-typedef unsigned char U1 __attribute__((vector_size(16)));
40
-typedef unsigned short U2 __attribute__((vector_size(16)));
41
-typedef unsigned int U4 __attribute__((vector_size(16)));
42
-typedef unsigned long long U8 __attribute__((vector_size(16)));
43
-typedef signed char S1 __attribute__((vector_size(16)));
44
-typedef signed short S2 __attribute__((vector_size(16)));
45
-typedef signed int S4 __attribute__((vector_size(16)));
46
-typedef signed long long S8 __attribute__((vector_size(16)));
47
-static U1 a1, b1;
48
-static U2 a2, b2;
49
-static U4 a4, b4;
50
-static U8 a8, b8;
51
-static S1 c1;
52
-static S2 c2;
53
-static S4 c4;
54
-static S8 c8;
55
-static int i;
56
-void helper(void *d, void *a, int shift, int i);
57
-void helper(void *d, void *a, int shift, int i)
58
-{
59
- *(U1 *)(d + i) = *(U1 *)(a + i) << shift;
60
- *(U2 *)(d + i) = *(U2 *)(a + i) << shift;
61
- *(U4 *)(d + i) = *(U4 *)(a + i) << shift;
62
- *(U8 *)(d + i) = *(U8 *)(a + i) << shift;
63
-}
64
-int main(void)
65
-{
66
- a1 += b1; a2 += b2; a4 += b4; a8 += b8;
67
- a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
68
- a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
69
- a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
70
- a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
71
- a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
72
- a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
73
- a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
74
- c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
75
- return 0;
76
-}
77
-EOF
78
-
79
-vector16=no
80
-if compile_prog "" "" ; then
81
- vector16=yes
82
-fi
83
-
84
########################################
85
# See if __attribute__((alias)) is supported.
86
# This false for Xcode 9, but has been remedied for Xcode 10.
87
@@ -XXX,XX +XXX,XX @@ if test "$atomic64" = "yes" ; then
88
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
89
fi
90
91
-if test "$vector16" = "yes" ; then
92
- echo "CONFIG_VECTOR16=y" >> $config_host_mak
93
-fi
94
-
95
if test "$attralias" = "yes" ; then
96
echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak
97
fi
98
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
99
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
100
--- a/accel/tcg/tcg-runtime-gvec.c
20
--- a/accel/tcg/user-exec.c
101
+++ b/accel/tcg/tcg-runtime-gvec.c
21
+++ b/accel/tcg/user-exec.c
102
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
103
#include "tcg/tcg-gvec-desc.h"
23
104
24
#elif defined(__mips__)
105
25
106
-/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
26
+#if defined(__misp16) || defined(__mips_micromips)
107
- * them via GCC's generic vector extension. This turns out to be simpler and
27
+#error "Unsupported encoding"
108
- * more reliable than getting the compiler to autovectorize.
28
+#endif
109
- *
29
+
110
- * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
30
int cpu_signal_handler(int host_signum, void *pinfo,
111
- * are multiples of 16.
31
void *puc)
112
- *
113
- * When the compiler does not support all of the operations we require, the
114
- * loops are written so that we can always fall back on the base types.
115
- */
116
-#ifdef CONFIG_VECTOR16
117
-typedef uint8_t vec8 __attribute__((vector_size(16)));
118
-typedef uint16_t vec16 __attribute__((vector_size(16)));
119
-typedef uint32_t vec32 __attribute__((vector_size(16)));
120
-typedef uint64_t vec64 __attribute__((vector_size(16)));
121
-
122
-typedef int8_t svec8 __attribute__((vector_size(16)));
123
-typedef int16_t svec16 __attribute__((vector_size(16)));
124
-typedef int32_t svec32 __attribute__((vector_size(16)));
125
-typedef int64_t svec64 __attribute__((vector_size(16)));
126
-
127
-#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
128
-#define DUP8(X) { X, X, X, X, X, X, X, X }
129
-#define DUP4(X) { X, X, X, X }
130
-#define DUP2(X) { X, X }
131
-#else
132
typedef uint8_t vec8;
133
typedef uint16_t vec16;
134
typedef uint32_t vec32;
135
@@ -XXX,XX +XXX,XX @@ typedef int64_t svec64;
136
#define DUP8(X) X
137
#define DUP4(X) X
138
#define DUP2(X) X
139
-#endif /* CONFIG_VECTOR16 */
140
141
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
142
{
32
{
143
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
33
siginfo_t *info = pinfo;
144
clear_high(d, oprsz, desc);
34
ucontext_t *uc = puc;
35
- greg_t pc = uc->uc_mcontext.pc;
36
- int is_write;
37
+ uintptr_t pc = uc->uc_mcontext.pc;
38
+ uint32_t insn = *(uint32_t *)pc;
39
+ int is_write = 0;
40
+
41
+ /* Detect all store instructions at program counter. */
42
+ switch((insn >> 26) & 077) {
43
+ case 050: /* SB */
44
+ case 051: /* SH */
45
+ case 052: /* SWL */
46
+ case 053: /* SW */
47
+ case 054: /* SDL */
48
+ case 055: /* SDR */
49
+ case 056: /* SWR */
50
+ case 070: /* SC */
51
+ case 071: /* SWC1 */
52
+ case 074: /* SCD */
53
+ case 075: /* SDC1 */
54
+ case 077: /* SD */
55
+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
56
+ case 072: /* SWC2 */
57
+ case 076: /* SDC2 */
58
+#endif
59
+ is_write = 1;
60
+ break;
61
+ case 023: /* COP1X */
62
+ /* Required in all versions of MIPS64 since
63
+ MIPS64r1 and subsequent versions of MIPS32r2. */
64
+ switch (insn & 077) {
65
+ case 010: /* SWXC1 */
66
+ case 011: /* SDXC1 */
67
+ case 015: /* SUXC1 */
68
+ is_write = 1;
69
+ }
70
+ break;
71
+ }
72
73
- /* XXX: compute is_write */
74
- is_write = 0;
75
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
145
}
76
}
146
77
147
-/* If vectors are enabled, the compiler fills in -1 for true.
148
- Otherwise, we must take care of this by hand. */
149
-#ifdef CONFIG_VECTOR16
150
-# define DO_CMP0(X) X
151
-#else
152
-# define DO_CMP0(X) -(X)
153
-#endif
154
+#define DO_CMP0(X) -(X)
155
156
#define DO_CMP1(NAME, TYPE, OP) \
157
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
158
--
78
--
159
2.20.1
79
2.25.1
160
80
161
81
diff view generated by jsdifflib