1
The following changes since commit 40c67636f67c2a89745f2e698522fe917326a952:
1
The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/usb-20200317-pull-request' into staging (2020-03-17 14:00:56 +0000)
3
Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200317
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104
8
8
9
for you to fetch changes up to 0270bd503e3699b7202200a2d693ad1feb57473f:
9
for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:
10
10
11
tcg: Remove tcg-runtime-gvec.c DO_CMP0 (2020-03-17 08:41:07 -0700)
11
common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix tcg/i386 bug vs sari_vec.
14
Fix for safe_syscall_base.
15
Fix tcg-runtime-gvec.c vs i386 without avx.
15
Fix for folding of vector add/sub.
16
Fix build on loongarch64 with gcc 8.
17
Remove decl for qemu_run_machine_init_done_notifiers.
16
18
17
----------------------------------------------------------------
19
----------------------------------------------------------------
18
Richard Henderson (5):
20
Philippe Mathieu-Daudé (1):
19
tcg/i386: Bound shift count expanding sari_vec
21
linux-user: Fix trivial build error on loongarch64 hosts
20
tcg: Remove CONFIG_VECTOR16
21
tcg: Tidy tcg-runtime-gvec.c types
22
tcg: Tidy tcg-runtime-gvec.c DUP*
23
tcg: Remove tcg-runtime-gvec.c DO_CMP0
24
22
25
configure | 56 --------
23
Richard Henderson (2):
26
accel/tcg/tcg-runtime-gvec.c | 298 +++++++++++++++++--------------------------
24
tcg/optimize: Fix folding of vector ops
27
tcg/i386/tcg-target.inc.c | 9 +-
25
common-user: Fix tail calls to safe_syscall_set_errno_tail
28
3 files changed, 122 insertions(+), 241 deletions(-)
29
26
27
Xiaoyao Li (1):
28
sysemu: Cleanup qemu_run_machine_init_done_notifiers()
29
30
include/sysemu/sysemu.h | 1 -
31
linux-user/host/loongarch64/host-signal.h | 4 +--
32
tcg/optimize.c | 49 +++++++++++++++++++++++-------
33
common-user/host/i386/safe-syscall.inc.S | 1 +
34
common-user/host/mips/safe-syscall.inc.S | 1 +
35
common-user/host/x86_64/safe-syscall.inc.S | 1 +
36
6 files changed, 42 insertions(+), 15 deletions(-)
37
diff view generated by jsdifflib
Deleted patch
1
A given RISU testcase for SVE can produce
2
1
3
tcg-op-vec.c:511: do_shifti: Assertion `i >= 0 && i < (8 << vece)' failed.
4
5
because expand_vec_sari gave a shift count of 32 to a MO_32
6
vector shift.
7
8
In 44f1441dbe1, we changed from direct expansion of vector opcodes
9
to re-use of the tcg expanders. So while the comment correctly notes
10
that the hw will handle such a shift count, we now have to take our
11
own sanity checks into account. Which is easy in this particular case.
12
13
Fixes: 44f1441dbe1
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
tcg/i386/tcg-target.inc.c | 9 ++++++---
17
1 file changed, 6 insertions(+), 3 deletions(-)
18
19
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/i386/tcg-target.inc.c
22
+++ b/tcg/i386/tcg-target.inc.c
23
@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
24
25
case MO_64:
26
if (imm <= 32) {
27
- /* We can emulate a small sign extend by performing an arithmetic
28
+ /*
29
+ * We can emulate a small sign extend by performing an arithmetic
30
* 32-bit shift and overwriting the high half of a 64-bit logical
31
- * shift (note that the ISA says shift of 32 is valid).
32
+ * shift. Note that the ISA says shift of 32 is valid, but TCG
33
+ * does not, so we have to bound the smaller shift -- we get the
34
+ * same result in the high half either way.
35
*/
36
t1 = tcg_temp_new_vec(type);
37
- tcg_gen_sari_vec(MO_32, t1, v1, imm);
38
+ tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
39
tcg_gen_shri_vec(MO_64, v0, v1, imm);
40
vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
41
tcgv_vec_arg(v0), tcgv_vec_arg(v0),
42
--
43
2.20.1
44
45
diff view generated by jsdifflib
1
The comment in tcg-runtime-gvec.c about CONFIG_VECTOR16 says that
1
Bitwise operations are easy to fold, because the operation is
2
tcg-op-gvec.c has eliminated size 8 vectors, and only passes on
2
identical regardless of element size. But add and sub need
3
multiples of 16. This may have been true of the first few operations,
3
extra element size info that is not currently propagated.
4
but is not true of all operations.
5
4
6
In particular, multiply, shift by scalar, and compare of 8- and 16-bit
5
Fixes: 2f9f08ba43d
7
elements are not expanded inline if host vector operations are not
6
Cc: qemu-stable@nongnu.org
8
supported.
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
9
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
For an x86_64 host that does not support AVX, this means that we will
11
fall back to the helper, which will attempt to use SSE instructions,
12
which will SEGV on an invalid 8-byte aligned memory operation.
13
14
This patch simply removes the CONFIG_VECTOR16 code and configuration
15
without further simplification.
16
17
Buglink: https://bugs.launchpad.net/bugs/1863508
18
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
---
10
---
20
configure | 56 ------------------------------------
11
tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
21
accel/tcg/tcg-runtime-gvec.c | 35 +---------------------
12
1 file changed, 38 insertions(+), 11 deletions(-)
22
2 files changed, 1 insertion(+), 90 deletions(-)
23
13
24
diff --git a/configure b/configure
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
25
index XXXXXXX..XXXXXXX 100755
26
--- a/configure
27
+++ b/configure
28
@@ -XXX,XX +XXX,XX @@ if test "$plugins" = "yes" &&
29
"for this purpose. You can't build with --static."
30
fi
31
32
-########################################
33
-# See if 16-byte vector operations are supported.
34
-# Even without a vector unit the compiler may expand these.
35
-# There is a bug in old GCC for PPC that crashes here.
36
-# Unfortunately it's the system compiler for Centos 7.
37
-
38
-cat > $TMPC << EOF
39
-typedef unsigned char U1 __attribute__((vector_size(16)));
40
-typedef unsigned short U2 __attribute__((vector_size(16)));
41
-typedef unsigned int U4 __attribute__((vector_size(16)));
42
-typedef unsigned long long U8 __attribute__((vector_size(16)));
43
-typedef signed char S1 __attribute__((vector_size(16)));
44
-typedef signed short S2 __attribute__((vector_size(16)));
45
-typedef signed int S4 __attribute__((vector_size(16)));
46
-typedef signed long long S8 __attribute__((vector_size(16)));
47
-static U1 a1, b1;
48
-static U2 a2, b2;
49
-static U4 a4, b4;
50
-static U8 a8, b8;
51
-static S1 c1;
52
-static S2 c2;
53
-static S4 c4;
54
-static S8 c8;
55
-static int i;
56
-void helper(void *d, void *a, int shift, int i);
57
-void helper(void *d, void *a, int shift, int i)
58
-{
59
- *(U1 *)(d + i) = *(U1 *)(a + i) << shift;
60
- *(U2 *)(d + i) = *(U2 *)(a + i) << shift;
61
- *(U4 *)(d + i) = *(U4 *)(a + i) << shift;
62
- *(U8 *)(d + i) = *(U8 *)(a + i) << shift;
63
-}
64
-int main(void)
65
-{
66
- a1 += b1; a2 += b2; a4 += b4; a8 += b8;
67
- a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
68
- a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
69
- a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
70
- a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
71
- a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
72
- a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
73
- a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
74
- c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
75
- return 0;
76
-}
77
-EOF
78
-
79
-vector16=no
80
-if compile_prog "" "" ; then
81
- vector16=yes
82
-fi
83
-
84
########################################
85
# See if __attribute__((alias)) is supported.
86
# This false for Xcode 9, but has been remedied for Xcode 10.
87
@@ -XXX,XX +XXX,XX @@ if test "$atomic64" = "yes" ; then
88
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
89
fi
90
91
-if test "$vector16" = "yes" ; then
92
- echo "CONFIG_VECTOR16=y" >> $config_host_mak
93
-fi
94
-
95
if test "$attralias" = "yes" ; then
96
echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak
97
fi
98
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
99
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
100
--- a/accel/tcg/tcg-runtime-gvec.c
16
--- a/tcg/optimize.c
101
+++ b/accel/tcg/tcg-runtime-gvec.c
17
+++ b/tcg/optimize.c
102
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
103
#include "tcg/tcg-gvec-desc.h"
19
CASE_OP_32_64(mul):
104
20
return x * y;
105
21
106
-/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
22
- CASE_OP_32_64(and):
107
- * them via GCC's generic vector extension. This turns out to be simpler and
23
+ CASE_OP_32_64_VEC(and):
108
- * more reliable than getting the compiler to autovectorize.
24
return x & y;
109
- *
25
110
- * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
26
- CASE_OP_32_64(or):
111
- * are multiples of 16.
27
+ CASE_OP_32_64_VEC(or):
112
- *
28
return x | y;
113
- * When the compiler does not support all of the operations we require, the
29
114
- * loops are written so that we can always fall back on the base types.
30
- CASE_OP_32_64(xor):
115
- */
31
+ CASE_OP_32_64_VEC(xor):
116
-#ifdef CONFIG_VECTOR16
32
return x ^ y;
117
-typedef uint8_t vec8 __attribute__((vector_size(16)));
33
118
-typedef uint16_t vec16 __attribute__((vector_size(16)));
34
case INDEX_op_shl_i32:
119
-typedef uint32_t vec32 __attribute__((vector_size(16)));
35
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
120
-typedef uint64_t vec64 __attribute__((vector_size(16)));
36
case INDEX_op_rotl_i64:
121
-
37
return rol64(x, y & 63);
122
-typedef int8_t svec8 __attribute__((vector_size(16)));
38
123
-typedef int16_t svec16 __attribute__((vector_size(16)));
39
- CASE_OP_32_64(not):
124
-typedef int32_t svec32 __attribute__((vector_size(16)));
40
+ CASE_OP_32_64_VEC(not):
125
-typedef int64_t svec64 __attribute__((vector_size(16)));
41
return ~x;
126
-
42
127
-#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
43
CASE_OP_32_64(neg):
128
-#define DUP8(X) { X, X, X, X, X, X, X, X }
44
return -x;
129
-#define DUP4(X) { X, X, X, X }
45
130
-#define DUP2(X) { X, X }
46
- CASE_OP_32_64(andc):
131
-#else
47
+ CASE_OP_32_64_VEC(andc):
132
typedef uint8_t vec8;
48
return x & ~y;
133
typedef uint16_t vec16;
49
134
typedef uint32_t vec32;
50
- CASE_OP_32_64(orc):
135
@@ -XXX,XX +XXX,XX @@ typedef int64_t svec64;
51
+ CASE_OP_32_64_VEC(orc):
136
#define DUP8(X) X
52
return x | ~y;
137
#define DUP4(X) X
53
138
#define DUP2(X) X
54
CASE_OP_32_64(eqv):
139
-#endif /* CONFIG_VECTOR16 */
55
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
140
56
return false;
141
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
57
}
58
59
+static bool fold_commutative(OptContext *ctx, TCGOp *op)
60
+{
61
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
62
+ return false;
63
+}
64
+
65
static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
142
{
66
{
143
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
67
swap_commutative(op->args[0], &op->args[1], &op->args[2]);
144
clear_high(d, oprsz, desc);
68
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
69
return false;
145
}
70
}
146
71
147
-/* If vectors are enabled, the compiler fills in -1 for true.
72
+/* We cannot as yet do_constant_folding with vectors. */
148
- Otherwise, we must take care of this by hand. */
73
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
149
-#ifdef CONFIG_VECTOR16
74
+{
150
-# define DO_CMP0(X) X
75
+ if (fold_commutative(ctx, op) ||
151
-#else
76
+ fold_xi_to_x(ctx, op, 0)) {
152
-# define DO_CMP0(X) -(X)
77
+ return true;
153
-#endif
78
+ }
154
+#define DO_CMP0(X) -(X)
79
+ return false;
155
80
+}
156
#define DO_CMP1(NAME, TYPE, OP) \
81
+
157
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
82
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
83
{
84
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
85
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
86
return false;
87
}
88
89
-static bool fold_sub(OptContext *ctx, TCGOp *op)
90
+/* We cannot as yet do_constant_folding with vectors. */
91
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
92
{
93
- if (fold_const2(ctx, op) ||
94
- fold_xx_to_i(ctx, op, 0) ||
95
+ if (fold_xx_to_i(ctx, op, 0) ||
96
fold_xi_to_x(ctx, op, 0) ||
97
fold_sub_to_neg(ctx, op)) {
98
return true;
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
+static bool fold_sub(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
106
+}
107
+
108
static bool fold_sub2(OptContext *ctx, TCGOp *op)
109
{
110
return fold_addsub2(ctx, op, false);
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
* Sorted alphabetically by opcode as much as possible.
113
*/
114
switch (opc) {
115
- CASE_OP_32_64_VEC(add):
116
+ CASE_OP_32_64(add):
117
done = fold_add(&ctx, op);
118
break;
119
+ case INDEX_op_add_vec:
120
+ done = fold_add_vec(&ctx, op);
121
+ break;
122
CASE_OP_32_64(add2):
123
done = fold_add2(&ctx, op);
124
break;
125
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
126
CASE_OP_32_64(sextract):
127
done = fold_sextract(&ctx, op);
128
break;
129
- CASE_OP_32_64_VEC(sub):
130
+ CASE_OP_32_64(sub):
131
done = fold_sub(&ctx, op);
132
break;
133
+ case INDEX_op_sub_vec:
134
+ done = fold_sub_vec(&ctx, op);
135
+ break;
136
CASE_OP_32_64(sub2):
137
done = fold_sub2(&ctx, op);
138
break;
158
--
139
--
159
2.20.1
140
2.25.1
160
141
161
142
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Replace the DUP* expansions with the scalar argument.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:
4
5
In file included from ../linux-user/signal.c:33:
6
../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
7
../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
8
uint32_t sel = (insn >> 15) & 0b11111111111;
9
^~~~~~~~
10
11
We don't use the 'sel' variable more than once, so drop it.
12
13
Meson output for the record:
14
15
Host machine cpu family: loongarch64
16
Host machine cpu: loongarch64
17
C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
18
C linker for the host machine: cc ld.bfd 2.31.1-system
19
20
Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
21
Reported-by: Song Gao <gaosong@loongson.cn>
22
Suggested-by: Song Gao <gaosong@loongson.cn>
23
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
24
Reviewed-by: WANG Xuerui <git@xen0n.name>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
27
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
28
---
7
accel/tcg/tcg-runtime-gvec.c | 50 +++++++++++-------------------------
29
linux-user/host/loongarch64/host-signal.h | 4 +---
8
1 file changed, 15 insertions(+), 35 deletions(-)
30
1 file changed, 1 insertion(+), 3 deletions(-)
9
31
10
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
32
diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
11
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/tcg-runtime-gvec.c
34
--- a/linux-user/host/loongarch64/host-signal.h
13
+++ b/accel/tcg/tcg-runtime-gvec.c
35
+++ b/linux-user/host/loongarch64/host-signal.h
14
@@ -XXX,XX +XXX,XX @@
36
@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
15
#include "tcg/tcg-gvec-desc.h"
37
}
16
38
break;
17
39
case 0b001110: /* indexed, atomic, bounds-checking memory operations */
18
-#define DUP16(X) X
40
- uint32_t sel = (insn >> 15) & 0b11111111111;
19
-#define DUP8(X) X
20
-#define DUP4(X) X
21
-#define DUP2(X) X
22
-
41
-
23
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
42
- switch (sel) {
24
{
43
+ switch ((insn >> 15) & 0b11111111111) {
25
intptr_t maxsz = simd_maxsz(desc);
44
case 0b00000100000: /* stx.b */
26
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
45
case 0b00000101000: /* stx.h */
27
void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
46
case 0b00000110000: /* stx.w */
28
{
29
intptr_t oprsz = simd_oprsz(desc);
30
- uint8_t vecb = (uint8_t)DUP16(b);
31
intptr_t i;
32
33
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
34
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
35
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
36
}
37
clear_high(d, oprsz, desc);
38
}
39
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
40
void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
41
{
42
intptr_t oprsz = simd_oprsz(desc);
43
- uint16_t vecb = (uint16_t)DUP8(b);
44
intptr_t i;
45
46
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
47
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
48
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
49
}
50
clear_high(d, oprsz, desc);
51
}
52
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
53
void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
54
{
55
intptr_t oprsz = simd_oprsz(desc);
56
- uint32_t vecb = (uint32_t)DUP4(b);
57
intptr_t i;
58
59
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
60
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
61
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
62
}
63
clear_high(d, oprsz, desc);
64
}
65
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
66
void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
67
{
68
intptr_t oprsz = simd_oprsz(desc);
69
- uint64_t vecb = (uint64_t)DUP2(b);
70
intptr_t i;
71
72
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
73
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
74
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
75
}
76
clear_high(d, oprsz, desc);
77
}
78
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
79
void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
80
{
81
intptr_t oprsz = simd_oprsz(desc);
82
- uint8_t vecb = (uint8_t)DUP16(b);
83
intptr_t i;
84
85
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
86
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
87
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
88
}
89
clear_high(d, oprsz, desc);
90
}
91
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
92
void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
93
{
94
intptr_t oprsz = simd_oprsz(desc);
95
- uint16_t vecb = (uint16_t)DUP8(b);
96
intptr_t i;
97
98
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
99
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
100
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
101
}
102
clear_high(d, oprsz, desc);
103
}
104
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
105
void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
106
{
107
intptr_t oprsz = simd_oprsz(desc);
108
- uint32_t vecb = (uint32_t)DUP4(b);
109
intptr_t i;
110
111
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
112
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
113
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
114
}
115
clear_high(d, oprsz, desc);
116
}
117
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
118
void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
119
{
120
intptr_t oprsz = simd_oprsz(desc);
121
- uint64_t vecb = (uint64_t)DUP2(b);
122
intptr_t i;
123
124
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
125
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
126
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
127
}
128
clear_high(d, oprsz, desc);
129
}
130
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
131
void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
132
{
133
intptr_t oprsz = simd_oprsz(desc);
134
- uint8_t vecb = (uint8_t)DUP16(b);
135
intptr_t i;
136
137
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
138
- *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
139
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
140
}
141
clear_high(d, oprsz, desc);
142
}
143
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
144
void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
145
{
146
intptr_t oprsz = simd_oprsz(desc);
147
- uint16_t vecb = (uint16_t)DUP8(b);
148
intptr_t i;
149
150
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
151
- *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
152
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
153
}
154
clear_high(d, oprsz, desc);
155
}
156
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
157
void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
158
{
159
intptr_t oprsz = simd_oprsz(desc);
160
- uint32_t vecb = (uint32_t)DUP4(b);
161
intptr_t i;
162
163
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164
- *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
165
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
166
}
167
clear_high(d, oprsz, desc);
168
}
169
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
170
void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
171
{
172
intptr_t oprsz = simd_oprsz(desc);
173
- uint64_t vecb = (uint64_t)DUP2(b);
174
intptr_t i;
175
176
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
177
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
178
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
179
}
180
clear_high(d, oprsz, desc);
181
}
182
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
183
void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
184
{
185
intptr_t oprsz = simd_oprsz(desc);
186
- uint64_t vecb = (uint64_t)DUP2(b);
187
intptr_t i;
188
189
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
190
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
191
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
192
}
193
clear_high(d, oprsz, desc);
194
}
195
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
196
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
197
{
198
intptr_t oprsz = simd_oprsz(desc);
199
- uint64_t vecb = (uint64_t)DUP2(b);
200
intptr_t i;
201
202
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
203
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
204
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
205
}
206
clear_high(d, oprsz, desc);
207
}
208
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
209
void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
210
{
211
intptr_t oprsz = simd_oprsz(desc);
212
- uint64_t vecb = (uint64_t)DUP2(b);
213
intptr_t i;
214
215
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
216
- *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
217
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
218
}
219
clear_high(d, oprsz, desc);
220
}
221
--
47
--
222
2.20.1
48
2.25.1
223
49
224
50
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
From: Xiaoyao Li <xiaoyao.li@intel.com>
2
Replace the vec* types with their scalar expansions.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
Remove qemu_run_machine_init_done_notifiers() since no implementation
4
and user.
5
6
Fixes: f66dc8737c9 ("vl: move all generic initialization out of vl.c")
7
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Message-Id: <20220104024136.1433545-1-xiaoyao.li@intel.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
accel/tcg/tcg-runtime-gvec.c | 270 +++++++++++++++++------------------
12
include/sysemu/sysemu.h | 1 -
8
1 file changed, 130 insertions(+), 140 deletions(-)
13
1 file changed, 1 deletion(-)
9
14
10
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
15
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/tcg-runtime-gvec.c
17
--- a/include/sysemu/sysemu.h
13
+++ b/accel/tcg/tcg-runtime-gvec.c
18
+++ b/include/sysemu/sysemu.h
14
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ extern bool qemu_uuid_set;
15
#include "tcg/tcg-gvec-desc.h"
20
void qemu_add_exit_notifier(Notifier *notify);
16
21
void qemu_remove_exit_notifier(Notifier *notify);
17
22
18
-typedef uint8_t vec8;
23
-void qemu_run_machine_init_done_notifiers(void);
19
-typedef uint16_t vec16;
24
void qemu_add_machine_init_done_notifier(Notifier *notify);
20
-typedef uint32_t vec32;
25
void qemu_remove_machine_init_done_notifier(Notifier *notify);
21
-typedef uint64_t vec64;
26
22
-
23
-typedef int8_t svec8;
24
-typedef int16_t svec16;
25
-typedef int32_t svec32;
26
-typedef int64_t svec64;
27
-
28
#define DUP16(X) X
29
#define DUP8(X) X
30
#define DUP4(X) X
31
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
32
intptr_t oprsz = simd_oprsz(desc);
33
intptr_t i;
34
35
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
36
- *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
37
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
38
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
39
}
40
clear_high(d, oprsz, desc);
41
}
42
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
43
intptr_t oprsz = simd_oprsz(desc);
44
intptr_t i;
45
46
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
47
- *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
48
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
49
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
50
}
51
clear_high(d, oprsz, desc);
52
}
53
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
54
intptr_t oprsz = simd_oprsz(desc);
55
intptr_t i;
56
57
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
58
- *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
59
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
60
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
61
}
62
clear_high(d, oprsz, desc);
63
}
64
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
65
intptr_t oprsz = simd_oprsz(desc);
66
intptr_t i;
67
68
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
69
- *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
70
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
71
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
72
}
73
clear_high(d, oprsz, desc);
74
}
75
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
76
void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
77
{
78
intptr_t oprsz = simd_oprsz(desc);
79
- vec8 vecb = (vec8)DUP16(b);
80
+ uint8_t vecb = (uint8_t)DUP16(b);
81
intptr_t i;
82
83
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
84
- *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
85
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
86
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
87
}
88
clear_high(d, oprsz, desc);
89
}
90
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
91
void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
92
{
93
intptr_t oprsz = simd_oprsz(desc);
94
- vec16 vecb = (vec16)DUP8(b);
95
+ uint16_t vecb = (uint16_t)DUP8(b);
96
intptr_t i;
97
98
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
99
- *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
100
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
101
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
102
}
103
clear_high(d, oprsz, desc);
104
}
105
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
106
void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
107
{
108
intptr_t oprsz = simd_oprsz(desc);
109
- vec32 vecb = (vec32)DUP4(b);
110
+ uint32_t vecb = (uint32_t)DUP4(b);
111
intptr_t i;
112
113
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
114
- *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
115
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
116
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
117
}
118
clear_high(d, oprsz, desc);
119
}
120
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
121
void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
122
{
123
intptr_t oprsz = simd_oprsz(desc);
124
- vec64 vecb = (vec64)DUP2(b);
125
+ uint64_t vecb = (uint64_t)DUP2(b);
126
intptr_t i;
127
128
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
129
- *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
130
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
131
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
132
}
133
clear_high(d, oprsz, desc);
134
}
135
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
136
intptr_t oprsz = simd_oprsz(desc);
137
intptr_t i;
138
139
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
140
- *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
141
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
142
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
143
}
144
clear_high(d, oprsz, desc);
145
}
146
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
147
intptr_t oprsz = simd_oprsz(desc);
148
intptr_t i;
149
150
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
151
- *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
152
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
153
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
154
}
155
clear_high(d, oprsz, desc);
156
}
157
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
158
intptr_t oprsz = simd_oprsz(desc);
159
intptr_t i;
160
161
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
162
- *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
163
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
165
}
166
clear_high(d, oprsz, desc);
167
}
168
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
169
intptr_t oprsz = simd_oprsz(desc);
170
intptr_t i;
171
172
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
173
- *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
174
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
175
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
176
}
177
clear_high(d, oprsz, desc);
178
}
179
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
180
void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
181
{
182
intptr_t oprsz = simd_oprsz(desc);
183
- vec8 vecb = (vec8)DUP16(b);
184
+ uint8_t vecb = (uint8_t)DUP16(b);
185
intptr_t i;
186
187
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
188
- *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
189
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
190
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
191
}
192
clear_high(d, oprsz, desc);
193
}
194
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
195
void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
196
{
197
intptr_t oprsz = simd_oprsz(desc);
198
- vec16 vecb = (vec16)DUP8(b);
199
+ uint16_t vecb = (uint16_t)DUP8(b);
200
intptr_t i;
201
202
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
203
- *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
204
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
205
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
206
}
207
clear_high(d, oprsz, desc);
208
}
209
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
210
void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
211
{
212
intptr_t oprsz = simd_oprsz(desc);
213
- vec32 vecb = (vec32)DUP4(b);
214
+ uint32_t vecb = (uint32_t)DUP4(b);
215
intptr_t i;
216
217
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
218
- *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
219
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
220
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
221
}
222
clear_high(d, oprsz, desc);
223
}
224
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
225
void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
226
{
227
intptr_t oprsz = simd_oprsz(desc);
228
- vec64 vecb = (vec64)DUP2(b);
229
+ uint64_t vecb = (uint64_t)DUP2(b);
230
intptr_t i;
231
232
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
233
- *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
234
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
235
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
236
}
237
clear_high(d, oprsz, desc);
238
}
239
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
240
intptr_t oprsz = simd_oprsz(desc);
241
intptr_t i;
242
243
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
244
- *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
245
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
246
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
247
}
248
clear_high(d, oprsz, desc);
249
}
250
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
251
intptr_t oprsz = simd_oprsz(desc);
252
intptr_t i;
253
254
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
255
- *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
256
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
257
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
258
}
259
clear_high(d, oprsz, desc);
260
}
261
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
262
intptr_t oprsz = simd_oprsz(desc);
263
intptr_t i;
264
265
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
266
- *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
267
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
268
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
269
}
270
clear_high(d, oprsz, desc);
271
}
272
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
273
intptr_t oprsz = simd_oprsz(desc);
274
intptr_t i;
275
276
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
277
- *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
278
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
279
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
280
}
281
clear_high(d, oprsz, desc);
282
}
283
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
284
void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
285
{
286
intptr_t oprsz = simd_oprsz(desc);
287
- vec8 vecb = (vec8)DUP16(b);
288
+ uint8_t vecb = (uint8_t)DUP16(b);
289
intptr_t i;
290
291
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
292
- *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
293
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
294
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
295
}
296
clear_high(d, oprsz, desc);
297
}
298
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
299
void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
300
{
301
intptr_t oprsz = simd_oprsz(desc);
302
- vec16 vecb = (vec16)DUP8(b);
303
+ uint16_t vecb = (uint16_t)DUP8(b);
304
intptr_t i;
305
306
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
307
- *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
308
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
309
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
310
}
311
clear_high(d, oprsz, desc);
312
}
313
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
314
void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
315
{
316
intptr_t oprsz = simd_oprsz(desc);
317
- vec32 vecb = (vec32)DUP4(b);
318
+ uint32_t vecb = (uint32_t)DUP4(b);
319
intptr_t i;
320
321
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
322
- *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
323
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
324
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
325
}
326
clear_high(d, oprsz, desc);
327
}
328
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
329
void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
330
{
331
intptr_t oprsz = simd_oprsz(desc);
332
- vec64 vecb = (vec64)DUP2(b);
333
+ uint64_t vecb = (uint64_t)DUP2(b);
334
intptr_t i;
335
336
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
337
- *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
338
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
339
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
340
}
341
clear_high(d, oprsz, desc);
342
}
343
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
344
intptr_t oprsz = simd_oprsz(desc);
345
intptr_t i;
346
347
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
348
- *(vec8 *)(d + i) = -*(vec8 *)(a + i);
349
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
350
+ *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
351
}
352
clear_high(d, oprsz, desc);
353
}
354
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
355
intptr_t oprsz = simd_oprsz(desc);
356
intptr_t i;
357
358
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
359
- *(vec16 *)(d + i) = -*(vec16 *)(a + i);
360
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
361
+ *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
362
}
363
clear_high(d, oprsz, desc);
364
}
365
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
366
intptr_t oprsz = simd_oprsz(desc);
367
intptr_t i;
368
369
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
370
- *(vec32 *)(d + i) = -*(vec32 *)(a + i);
371
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
372
+ *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
373
}
374
clear_high(d, oprsz, desc);
375
}
376
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
377
intptr_t oprsz = simd_oprsz(desc);
378
intptr_t i;
379
380
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
381
- *(vec64 *)(d + i) = -*(vec64 *)(a + i);
382
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
383
+ *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
384
}
385
clear_high(d, oprsz, desc);
386
}
387
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
388
intptr_t oprsz = simd_oprsz(desc);
389
intptr_t i;
390
391
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
392
- *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
393
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
394
+ *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
395
}
396
clear_high(d, oprsz, desc);
397
}
398
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
399
intptr_t oprsz = simd_oprsz(desc);
400
intptr_t i;
401
402
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
403
- *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
404
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
405
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
406
}
407
clear_high(d, oprsz, desc);
408
}
409
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
410
intptr_t oprsz = simd_oprsz(desc);
411
intptr_t i;
412
413
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
414
- *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
415
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
416
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
417
}
418
clear_high(d, oprsz, desc);
419
}
420
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
421
intptr_t oprsz = simd_oprsz(desc);
422
intptr_t i;
423
424
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
425
- *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
426
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
427
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
428
}
429
clear_high(d, oprsz, desc);
430
}
431
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
432
intptr_t oprsz = simd_oprsz(desc);
433
intptr_t i;
434
435
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
436
- *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
437
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
438
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
439
}
440
clear_high(d, oprsz, desc);
441
}
442
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
443
intptr_t oprsz = simd_oprsz(desc);
444
intptr_t i;
445
446
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
447
- *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
448
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
450
}
451
clear_high(d, oprsz, desc);
452
}
453
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
454
intptr_t oprsz = simd_oprsz(desc);
455
intptr_t i;
456
457
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
458
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
459
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
461
}
462
clear_high(d, oprsz, desc);
463
}
464
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
465
intptr_t oprsz = simd_oprsz(desc);
466
intptr_t i;
467
468
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
469
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
470
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
472
}
473
clear_high(d, oprsz, desc);
474
}
475
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
476
intptr_t oprsz = simd_oprsz(desc);
477
intptr_t i;
478
479
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
480
- *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
481
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482
+ *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
483
}
484
clear_high(d, oprsz, desc);
485
}
486
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
487
void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
488
{
489
intptr_t oprsz = simd_oprsz(desc);
490
- vec64 vecb = (vec64)DUP2(b);
491
+ uint64_t vecb = (uint64_t)DUP2(b);
492
intptr_t i;
493
494
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
495
- *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
496
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
497
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
498
}
499
clear_high(d, oprsz, desc);
500
}
501
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
502
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
503
{
504
intptr_t oprsz = simd_oprsz(desc);
505
- vec64 vecb = (vec64)DUP2(b);
506
+ uint64_t vecb = (uint64_t)DUP2(b);
507
intptr_t i;
508
509
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
510
- *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
511
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
512
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
513
}
514
clear_high(d, oprsz, desc);
515
}
516
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
517
void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
518
{
519
intptr_t oprsz = simd_oprsz(desc);
520
- vec64 vecb = (vec64)DUP2(b);
521
+ uint64_t vecb = (uint64_t)DUP2(b);
522
intptr_t i;
523
524
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
525
- *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
526
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
527
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
528
}
529
clear_high(d, oprsz, desc);
530
}
531
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
532
int shift = simd_data(desc);
533
intptr_t i;
534
535
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
536
- *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
537
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
538
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
539
}
540
clear_high(d, oprsz, desc);
541
}
542
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
543
int shift = simd_data(desc);
544
intptr_t i;
545
546
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
547
- *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
548
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
549
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
550
}
551
clear_high(d, oprsz, desc);
552
}
553
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
554
int shift = simd_data(desc);
555
intptr_t i;
556
557
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
558
- *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
559
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
560
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
561
}
562
clear_high(d, oprsz, desc);
563
}
564
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
565
int shift = simd_data(desc);
566
intptr_t i;
567
568
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
569
- *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
570
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
571
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
572
}
573
clear_high(d, oprsz, desc);
574
}
575
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
576
int shift = simd_data(desc);
577
intptr_t i;
578
579
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
580
- *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
581
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
582
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
583
}
584
clear_high(d, oprsz, desc);
585
}
586
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
587
int shift = simd_data(desc);
588
intptr_t i;
589
590
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
591
- *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
592
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
593
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
594
}
595
clear_high(d, oprsz, desc);
596
}
597
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
598
int shift = simd_data(desc);
599
intptr_t i;
600
601
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
602
- *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
603
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
604
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
605
}
606
clear_high(d, oprsz, desc);
607
}
608
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
609
int shift = simd_data(desc);
610
intptr_t i;
611
612
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
613
- *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
614
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
615
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
616
}
617
clear_high(d, oprsz, desc);
618
}
619
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
620
int shift = simd_data(desc);
621
intptr_t i;
622
623
- for (i = 0; i < oprsz; i += sizeof(vec8)) {
624
- *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
625
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
626
+ *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
627
}
628
clear_high(d, oprsz, desc);
629
}
630
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
631
int shift = simd_data(desc);
632
intptr_t i;
633
634
- for (i = 0; i < oprsz; i += sizeof(vec16)) {
635
- *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
636
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
637
+ *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
638
}
639
clear_high(d, oprsz, desc);
640
}
641
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
642
int shift = simd_data(desc);
643
intptr_t i;
644
645
- for (i = 0; i < oprsz; i += sizeof(vec32)) {
646
- *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
647
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
648
+ *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
649
}
650
clear_high(d, oprsz, desc);
651
}
652
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
653
int shift = simd_data(desc);
654
intptr_t i;
655
656
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
657
- *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
658
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
659
+ *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
660
}
661
clear_high(d, oprsz, desc);
662
}
663
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
664
}
665
666
#define DO_CMP2(SZ) \
667
- DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
668
- DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
669
- DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
670
- DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
671
- DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
672
- DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
673
+ DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
674
+ DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
675
+ DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
676
+ DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
677
+ DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
678
+ DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
679
680
DO_CMP2(8)
681
DO_CMP2(16)
682
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
683
intptr_t oprsz = simd_oprsz(desc);
684
intptr_t i;
685
686
- for (i = 0; i < oprsz; i += sizeof(vec64)) {
687
- vec64 aa = *(vec64 *)(a + i);
688
- vec64 bb = *(vec64 *)(b + i);
689
- vec64 cc = *(vec64 *)(c + i);
690
- *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa);
691
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
692
+ uint64_t aa = *(uint64_t *)(a + i);
693
+ uint64_t bb = *(uint64_t *)(b + i);
694
+ uint64_t cc = *(uint64_t *)(c + i);
695
+ *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
696
}
697
clear_high(d, oprsz, desc);
698
}
699
--
27
--
700
2.20.1
28
2.25.1
701
29
702
30
diff view generated by jsdifflib
1
Partial cleanup from the CONFIG_VECTOR16 removal.
1
For the ABIs in which the syscall return register is not
2
Replace DO_CMP0 with its scalar expansion, a simple negation.
2
also the first function argument register, move the errno
3
value into the correct place.
3
4
5
Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
6
Reported-by: Laurent Vivier <laurent@vivier.eu>
7
Tested-by: Laurent Vivier <laurent@vivier.eu>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
5
---
11
---
6
accel/tcg/tcg-runtime-gvec.c | 5 +----
12
common-user/host/i386/safe-syscall.inc.S | 1 +
7
1 file changed, 1 insertion(+), 4 deletions(-)
13
common-user/host/mips/safe-syscall.inc.S | 1 +
14
common-user/host/x86_64/safe-syscall.inc.S | 1 +
15
3 files changed, 3 insertions(+)
8
16
9
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
17
diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
10
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/tcg-runtime-gvec.c
19
--- a/common-user/host/i386/safe-syscall.inc.S
12
+++ b/accel/tcg/tcg-runtime-gvec.c
20
+++ b/common-user/host/i386/safe-syscall.inc.S
13
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
21
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
14
clear_high(d, oprsz, desc);
22
pop %ebp
15
}
23
.cfi_adjust_cfa_offset -4
16
24
.cfi_restore ebp
17
-#define DO_CMP0(X) -(X)
25
+ mov %eax, (%esp)
18
-
26
jmp safe_syscall_set_errno_tail
19
#define DO_CMP1(NAME, TYPE, OP) \
27
20
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
28
.cfi_endproc
21
{ \
29
diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
22
intptr_t oprsz = simd_oprsz(desc); \
30
index XXXXXXX..XXXXXXX 100644
23
intptr_t i; \
31
--- a/common-user/host/mips/safe-syscall.inc.S
24
for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
32
+++ b/common-user/host/mips/safe-syscall.inc.S
25
- *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
33
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
26
+ *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
34
1: USE_ALT_CP(t0)
27
} \
35
SETUP_GPX(t1)
28
clear_high(d, oprsz, desc); \
36
SETUP_GPX64(t0, t1)
29
}
37
+ move a0, v0
30
@@ -XXX,XX +XXX,XX @@ DO_CMP2(16)
38
PTR_LA t9, safe_syscall_set_errno_tail
31
DO_CMP2(32)
39
jr t9
32
DO_CMP2(64)
40
33
41
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
34
-#undef DO_CMP0
42
index XXXXXXX..XXXXXXX 100644
35
#undef DO_CMP1
43
--- a/common-user/host/x86_64/safe-syscall.inc.S
36
#undef DO_CMP2
44
+++ b/common-user/host/x86_64/safe-syscall.inc.S
45
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
46
1: pop %rbp
47
.cfi_def_cfa_offset 8
48
.cfi_restore rbp
49
+ mov %eax, %edi
50
jmp safe_syscall_set_errno_tail
51
.cfi_endproc
37
52
38
--
53
--
39
2.20.1
54
2.25.1
40
55
41
56
diff view generated by jsdifflib