1
The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d:
1
The following changes since commit 4907644841e3200aea6475c0f72d3d987e9f3d93:
2
2
3
Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700)
3
Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into staging (2023-09-19 13:22:19 -0400)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230605
7
https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230920
8
8
9
for you to fetch changes up to 8555ddc671203969b0e6eb651e538d02a9a79b3a:
9
for you to fetch changes up to 2cd81e37512648a03d7dd37c39fa7fd50e2e4478:
10
10
11
hw/intc/loongarch_ipi: Bring back all 4 IPI mailboxes (2023-06-05 11:08:55 +0800)
11
target/loongarch: CPUCFG support LASX (2023-09-20 14:33:43 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fixes Coverity CID: 1512452, 1512453
14
Add LASX instructions support.
15
Fixes: 78464f023b54 ("hw/loongarch/virt: Modify ipi as percpu device")
16
15
17
----------------------------------------------------------------
16
----------------------------------------------------------------
18
Jiaxun Yang (1):
17
Song Gao (57):
19
hw/intc/loongarch_ipi: Bring back all 4 IPI mailboxes
18
target/loongarch: Renamed lsx*.c to vec* .c
19
target/loongarch: Implement gvec_*_vl functions
20
target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions
21
target/loongarch: Use gen_helper_gvec_4 for 4OP vector instructions
22
target/loongarch: Use gen_helper_gvec_3_ptr for 3OP + env vector instructions
23
target/loongarch: Use gen_helper_gvec_3 for 3OP vector instructions
24
target/loongarch: Use gen_helper_gvec_2_ptr for 2OP + env vector instructions
25
target/loongarch: Use gen_helper_gvec_2 for 2OP vector instructions
26
target/loongarch: Use gen_helper_gvec_2i for 2OP + imm vector instructions
27
target/loongarch: Replace CHECK_SXE to check_vec(ctx, 16)
28
target/loongarch: Add LASX data support
29
target/loongarch: check_vec support check LASX instructions
30
target/loongarch: Add avail_LASX to check LASX instructions
31
target/loongarch: Implement xvadd/xvsub
32
target/loongarch: Implement xvreplgr2vr
33
target/loongarch: Implement xvaddi/xvsubi
34
target/loongarch: Implement xvneg
35
target/loongarch: Implement xvsadd/xvssub
36
target/loongarch: Implement xvhaddw/xvhsubw
37
target/loongarch: Implement xvaddw/xvsubw
38
target/loongarch: Implement xavg/xvagr
39
target/loongarch: Implement xvabsd
40
target/loongarch: Implement xvadda
41
target/loongarch: Implement xvmax/xvmin
42
target/loongarch: Implement xvmul/xvmuh/xvmulw{ev/od}
43
target/loongarch: Implement xvmadd/xvmsub/xvmaddw{ev/od}
44
target/loongarch; Implement xvdiv/xvmod
45
target/loongarch: Implement xvsat
46
target/loongarch: Implement xvexth
47
target/loongarch: Implement vext2xv
48
target/loongarch: Implement xvsigncov
49
target/loongarch: Implement xvmskltz/xvmskgez/xvmsknz
50
target/loongarch: Implement xvldi
51
target/loongarch: Implement LASX logic instructions
52
target/loongarch: Implement xvsll xvsrl xvsra xvrotr
53
target/loongarch: Implement xvsllwil xvextl
54
target/loongarch: Implement xvsrlr xvsrar
55
target/loongarch: Implement xvsrln xvsran
56
target/loongarch: Implement xvsrlrn xvsrarn
57
target/loongarch: Implement xvssrln xvssran
58
target/loongarch: Implement xvssrlrn xvssrarn
59
target/loongarch: Implement xvclo xvclz
60
target/loongarch: Implement xvpcnt
61
target/loongarch: Implement xvbitclr xvbitset xvbitrev
62
target/loongarch: Implement xvfrstp
63
target/loongarch: Implement LASX fpu arith instructions
64
target/loongarch: Implement LASX fpu fcvt instructions
65
target/loongarch: Implement xvseq xvsle xvslt
66
target/loongarch: Implement xvfcmp
67
target/loongarch: Implement xvbitsel xvset
68
target/loongarch: Implement xvinsgr2vr xvpickve2gr
69
target/loongarch: Implement xvreplve xvinsve0 xvpickve
70
target/loongarch: Implement xvpack xvpick xvilv{l/h}
71
target/loongarch: Implement xvshuf xvperm{i} xvshuf4i
72
target/loongarch: Implement xvld xvst
73
target/loongarch: Move simply DO_XX marcos togther
74
target/loongarch: CPUCFG support LASX
20
75
21
hw/intc/loongarch_ipi.c | 6 +++---
76
target/loongarch/cpu.h | 26 +-
22
include/hw/intc/loongarch_ipi.h | 4 +++-
77
target/loongarch/helper.h | 689 ++--
23
2 files changed, 6 insertions(+), 4 deletions(-)
78
target/loongarch/internals.h | 22 -
79
target/loongarch/translate.h | 1 +
80
target/loongarch/vec.h | 75 +
81
target/loongarch/insns.decode | 782 +++++
82
linux-user/loongarch64/signal.c | 1 +
83
target/loongarch/cpu.c | 4 +
84
target/loongarch/disas.c | 924 ++++++
85
target/loongarch/gdbstub.c | 1 +
86
target/loongarch/lsx_helper.c | 3004 -----------------
87
target/loongarch/machine.c | 36 +-
88
target/loongarch/translate.c | 19 +-
89
target/loongarch/vec_helper.c | 3494 ++++++++++++++++++++
90
.../{trans_lsx.c.inc => trans_vec.c.inc} | 2393 ++++++++++----
91
target/loongarch/meson.build | 2 +-
92
16 files changed, 7386 insertions(+), 4087 deletions(-)
93
create mode 100644 target/loongarch/vec.h
94
delete mode 100644 target/loongarch/lsx_helper.c
95
create mode 100644 target/loongarch/vec_helper.c
96
rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (61%)
diff view generated by jsdifflib
New patch
1
Renamed lsx_helper.c to vec_helper.c and trans_lsx.c.inc to trans_vec.c.inc
2
So LASX can used them.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-2-gaosong@loongson.cn>
7
---
8
target/loongarch/translate.c | 2 +-
9
target/loongarch/{lsx_helper.c => vec_helper.c} | 2 +-
10
.../loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} | 2 +-
11
target/loongarch/meson.build | 2 +-
12
4 files changed, 4 insertions(+), 4 deletions(-)
13
rename target/loongarch/{lsx_helper.c => vec_helper.c} (99%)
14
rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (99%)
15
16
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/translate.c
19
+++ b/target/loongarch/translate.c
20
@@ -XXX,XX +XXX,XX @@ static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr)
21
#include "insn_trans/trans_fmemory.c.inc"
22
#include "insn_trans/trans_branch.c.inc"
23
#include "insn_trans/trans_privileged.c.inc"
24
-#include "insn_trans/trans_lsx.c.inc"
25
+#include "insn_trans/trans_vec.c.inc"
26
27
static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
28
{
29
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/vec_helper.c
30
similarity index 99%
31
rename from target/loongarch/lsx_helper.c
32
rename to target/loongarch/vec_helper.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/loongarch/lsx_helper.c
35
+++ b/target/loongarch/vec_helper.c
36
@@ -XXX,XX +XXX,XX @@
37
/* SPDX-License-Identifier: GPL-2.0-or-later */
38
/*
39
- * QEMU LoongArch LSX helper functions.
40
+ * QEMU LoongArch vector helper functions.
41
*
42
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
43
*/
44
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
45
similarity index 99%
46
rename from target/loongarch/insn_trans/trans_lsx.c.inc
47
rename to target/loongarch/insn_trans/trans_vec.c.inc
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
50
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
51
@@ -XXX,XX +XXX,XX @@
52
/* SPDX-License-Identifier: GPL-2.0-or-later */
53
/*
54
- * LSX translate functions
55
+ * LoongArch vector translate functions
56
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
57
*/
58
59
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/loongarch/meson.build
62
+++ b/target/loongarch/meson.build
63
@@ -XXX,XX +XXX,XX @@ loongarch_tcg_ss.add(files(
64
'op_helper.c',
65
'translate.c',
66
'gdbstub.c',
67
- 'lsx_helper.c',
68
+ 'vec_helper.c',
69
))
70
loongarch_tcg_ss.add(zlib)
71
72
--
73
2.39.1
diff view generated by jsdifflib
New patch
1
Create gvec_*_vl functions in order to hide oprsz.
2
This is used by gvec_v* functions for oprsz 16,
3
and will be used by gvec_x* functions for oprsz 32.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-3-gaosong@loongson.cn>
8
---
9
target/loongarch/insn_trans/trans_vec.c.inc | 68 +++++++++++++--------
10
1 file changed, 44 insertions(+), 24 deletions(-)
11
12
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/loongarch/insn_trans/trans_vec.c.inc
15
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool gen_cv(DisasContext *ctx, arg_cv *a,
17
return true;
18
}
19
20
+static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
21
+ uint32_t oprsz, MemOp mop,
22
+ void (*func)(unsigned, uint32_t, uint32_t,
23
+ uint32_t, uint32_t, uint32_t))
24
+{
25
+ uint32_t vd_ofs = vec_full_offset(a->vd);
26
+ uint32_t vj_ofs = vec_full_offset(a->vj);
27
+ uint32_t vk_ofs = vec_full_offset(a->vk);
28
+
29
+ func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
30
+ return true;
31
+}
32
+
33
static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
34
void (*func)(unsigned, uint32_t, uint32_t,
35
uint32_t, uint32_t, uint32_t))
36
{
37
- uint32_t vd_ofs, vj_ofs, vk_ofs;
38
-
39
CHECK_SXE;
40
+ return gvec_vvv_vl(ctx, a, 16, mop, func);
41
+}
42
43
- vd_ofs = vec_full_offset(a->vd);
44
- vj_ofs = vec_full_offset(a->vj);
45
- vk_ofs = vec_full_offset(a->vk);
46
47
- func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
48
+static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
49
+ uint32_t oprsz, MemOp mop,
50
+ void (*func)(unsigned, uint32_t, uint32_t,
51
+ uint32_t, uint32_t))
52
+{
53
+ uint32_t vd_ofs = vec_full_offset(a->vd);
54
+ uint32_t vj_ofs = vec_full_offset(a->vj);
55
+
56
+ func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
57
return true;
58
}
59
60
+
61
static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
62
void (*func)(unsigned, uint32_t, uint32_t,
63
uint32_t, uint32_t))
64
{
65
- uint32_t vd_ofs, vj_ofs;
66
-
67
CHECK_SXE;
68
+ return gvec_vv_vl(ctx, a, 16, mop, func);
69
+}
70
71
- vd_ofs = vec_full_offset(a->vd);
72
- vj_ofs = vec_full_offset(a->vj);
73
+static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
74
+ uint32_t oprsz, MemOp mop,
75
+ void (*func)(unsigned, uint32_t, uint32_t,
76
+ int64_t, uint32_t, uint32_t))
77
+{
78
+ uint32_t vd_ofs = vec_full_offset(a->vd);
79
+ uint32_t vj_ofs = vec_full_offset(a->vj);
80
81
- func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
82
+ func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
83
return true;
84
}
85
86
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
87
void (*func)(unsigned, uint32_t, uint32_t,
88
int64_t, uint32_t, uint32_t))
89
{
90
- uint32_t vd_ofs, vj_ofs;
91
-
92
CHECK_SXE;
93
+ return gvec_vv_i_vl(ctx, a, 16, mop, func);
94
+}
95
96
- vd_ofs = vec_full_offset(a->vd);
97
- vj_ofs = vec_full_offset(a->vj);
98
+static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
99
+ uint32_t oprsz, MemOp mop)
100
+{
101
+ uint32_t vd_ofs = vec_full_offset(a->vd);
102
+ uint32_t vj_ofs = vec_full_offset(a->vj);
103
104
- func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
105
+ tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
106
return true;
107
}
108
109
static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
110
{
111
- uint32_t vd_ofs, vj_ofs;
112
-
113
CHECK_SXE;
114
-
115
- vd_ofs = vec_full_offset(a->vd);
116
- vj_ofs = vec_full_offset(a->vj);
117
-
118
- tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
119
- return true;
120
+ return gvec_subi_vl(ctx, a, 16, mop);
121
}
122
123
TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
124
--
125
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-4-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 16 +++++-----
6
target/loongarch/vec_helper.c | 12 +++----
7
target/loongarch/insn_trans/trans_vec.c.inc | 35 ++++++++++++++++-----
8
3 files changed, 41 insertions(+), 22 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
15
DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
16
DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
17
18
-DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32)
19
-DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32)
20
-DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32)
21
-DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32)
22
-DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32)
23
-DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32)
24
-DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32)
25
-DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32)
26
+DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
34
35
DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
36
DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
37
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/loongarch/vec_helper.c
40
+++ b/target/loongarch/vec_helper.c
41
@@ -XXX,XX +XXX,XX @@ DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
42
DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
43
44
#define DO_4OP_F(NAME, BIT, E, FN, flags) \
45
-void HELPER(NAME)(CPULoongArchState *env, \
46
- uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \
47
+void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
48
+ CPULoongArchState *env, uint32_t desc) \
49
{ \
50
int i; \
51
- VReg *Vd = &(env->fpr[vd].vreg); \
52
- VReg *Vj = &(env->fpr[vj].vreg); \
53
- VReg *Vk = &(env->fpr[vk].vreg); \
54
- VReg *Va = &(env->fpr[va].vreg); \
55
+ VReg *Vd = (VReg *)vd; \
56
+ VReg *Vj = (VReg *)vj; \
57
+ VReg *Vk = (VReg *)vk; \
58
+ VReg *Va = (VReg *)va; \
59
\
60
vec_clear_cause(env); \
61
for (i = 0; i < LSX_LEN/BIT; i++) { \
62
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/insn_trans/trans_vec.c.inc
65
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
66
@@ -XXX,XX +XXX,XX @@
67
#define CHECK_SXE
68
#endif
69
70
+static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
71
+ gen_helper_gvec_4_ptr *fn)
72
+{
73
+ tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
74
+ vec_full_offset(a->vj),
75
+ vec_full_offset(a->vk),
76
+ vec_full_offset(a->va),
77
+ cpu_env,
78
+ oprsz, ctx->vl / 8, 0, fn);
79
+ return true;
80
+}
81
+
82
+static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
83
+ gen_helper_gvec_4_ptr *fn)
84
+{
85
+ CHECK_SXE;
86
+ return gen_vvvv_ptr_vl(ctx, a, 16, fn);
87
+}
88
+
89
static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
90
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
91
TCGv_i32, TCGv_i32))
92
@@ -XXX,XX +XXX,XX @@ TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d)
93
TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s)
94
TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d)
95
96
-TRANS(vfmadd_s, LSX, gen_vvvv, gen_helper_vfmadd_s)
97
-TRANS(vfmadd_d, LSX, gen_vvvv, gen_helper_vfmadd_d)
98
-TRANS(vfmsub_s, LSX, gen_vvvv, gen_helper_vfmsub_s)
99
-TRANS(vfmsub_d, LSX, gen_vvvv, gen_helper_vfmsub_d)
100
-TRANS(vfnmadd_s, LSX, gen_vvvv, gen_helper_vfnmadd_s)
101
-TRANS(vfnmadd_d, LSX, gen_vvvv, gen_helper_vfnmadd_d)
102
-TRANS(vfnmsub_s, LSX, gen_vvvv, gen_helper_vfnmsub_s)
103
-TRANS(vfnmsub_d, LSX, gen_vvvv, gen_helper_vfnmsub_d)
104
+TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
105
+TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
106
+TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
107
+TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
108
+TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
109
+TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
110
+TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
111
+TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
112
113
TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s)
114
TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d)
115
--
116
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-5-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 2 +-
6
target/loongarch/vec_helper.c | 11 +++++------
7
target/loongarch/insn_trans/trans_vec.c.inc | 22 ++++++++++++---------
8
3 files changed, 19 insertions(+), 16 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
15
DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
16
DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
17
18
-DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32)
19
+DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
21
DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
22
DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
23
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/loongarch/vec_helper.c
26
+++ b/target/loongarch/vec_helper.c
27
@@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H)
28
VILVH(vilvh_w, 64, W)
29
VILVH(vilvh_d, 128, D)
30
31
-void HELPER(vshuf_b)(CPULoongArchState *env,
32
- uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
33
+void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
34
{
35
int i, m;
36
VReg temp;
37
- VReg *Vd = &(env->fpr[vd].vreg);
38
- VReg *Vj = &(env->fpr[vj].vreg);
39
- VReg *Vk = &(env->fpr[vk].vreg);
40
- VReg *Va = &(env->fpr[va].vreg);
41
+ VReg *Vd = (VReg *)vd;
42
+ VReg *Vj = (VReg *)vj;
43
+ VReg *Vk = (VReg *)vk;
44
+ VReg *Va = (VReg *)va;
45
46
m = LSX_LEN/8;
47
for (i = 0; i < m ; i++) {
48
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/loongarch/insn_trans/trans_vec.c.inc
51
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
52
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
53
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
54
}
55
56
-static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
57
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
58
- TCGv_i32, TCGv_i32))
59
+static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
60
+ gen_helper_gvec_4 *fn)
61
{
62
- TCGv_i32 vd = tcg_constant_i32(a->vd);
63
- TCGv_i32 vj = tcg_constant_i32(a->vj);
64
- TCGv_i32 vk = tcg_constant_i32(a->vk);
65
- TCGv_i32 va = tcg_constant_i32(a->va);
66
+ tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
67
+ vec_full_offset(a->vj),
68
+ vec_full_offset(a->vk),
69
+ vec_full_offset(a->va),
70
+ oprsz, ctx->vl / 8, 0, fn);
71
+ return true;
72
+}
73
74
+static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
75
+ gen_helper_gvec_4 *fn)
76
+{
77
CHECK_SXE;
78
- func(cpu_env, vd, vj, vk, va);
79
- return true;
80
+ return gen_vvvv_vl(ctx, a, 16, fn);
81
}
82
83
static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
84
--
85
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-6-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 48 +++++++--------
6
target/loongarch/vec_helper.c | 50 ++++++++--------
7
target/loongarch/insn_trans/trans_vec.c.inc | 66 +++++++++++++--------
8
3 files changed, 91 insertions(+), 73 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
15
DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
16
DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
17
18
-DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32)
19
-DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32)
20
-DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32)
21
-DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32)
22
-DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32)
23
-DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
24
-DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
25
-DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
26
+DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
27
+DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
28
+DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
29
+DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
30
+DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
31
+DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
32
+DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
33
+DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
34
35
DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
36
DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
37
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i3
38
DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
39
DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
40
41
-DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
42
-DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
43
-DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32)
44
-DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32)
45
+DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
46
+DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
47
+DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
48
+DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
49
50
-DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32)
51
-DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32)
52
-DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32)
53
-DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32)
54
+DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
55
+DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
56
+DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
58
59
DEF_HELPER_3(vflogb_s, void, env, i32, i32)
60
DEF_HELPER_3(vflogb_d, void, env, i32, i32)
61
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
62
DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
63
DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
64
DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
65
-DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32)
66
-DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32)
67
+DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
68
+DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
69
70
DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
71
DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
72
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
73
DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
74
DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
75
DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
76
-DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32)
77
-DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32)
78
-DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32)
79
-DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32)
80
-DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32)
81
+DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
82
+DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
83
+DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
84
+DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
85
+DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
86
DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
87
DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
88
DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
89
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
90
DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
91
DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
92
DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
93
-DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
94
+DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
95
96
DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
97
DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
98
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/loongarch/vec_helper.c
101
+++ b/target/loongarch/vec_helper.c
102
@@ -XXX,XX +XXX,XX @@ static inline void vec_clear_cause(CPULoongArchState *env)
103
}
104
105
#define DO_3OP_F(NAME, BIT, E, FN) \
106
-void HELPER(NAME)(CPULoongArchState *env, \
107
- uint32_t vd, uint32_t vj, uint32_t vk) \
108
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
109
+ CPULoongArchState *env, uint32_t desc) \
110
{ \
111
int i; \
112
- VReg *Vd = &(env->fpr[vd].vreg); \
113
- VReg *Vj = &(env->fpr[vj].vreg); \
114
- VReg *Vk = &(env->fpr[vk].vreg); \
115
+ VReg *Vd = (VReg *)vd; \
116
+ VReg *Vj = (VReg *)vj; \
117
+ VReg *Vk = (VReg *)vk; \
118
\
119
vec_clear_cause(env); \
120
for (i = 0; i < LSX_LEN/BIT; i++) { \
121
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
122
*Vd = temp;
123
}
124
125
-void HELPER(vfcvt_h_s)(CPULoongArchState *env,
126
- uint32_t vd, uint32_t vj, uint32_t vk)
127
+void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
128
+ CPULoongArchState *env, uint32_t desc)
129
{
130
int i;
131
VReg temp;
132
- VReg *Vd = &(env->fpr[vd].vreg);
133
- VReg *Vj = &(env->fpr[vj].vreg);
134
- VReg *Vk = &(env->fpr[vk].vreg);
135
+ VReg *Vd = (VReg *)vd;
136
+ VReg *Vj = (VReg *)vj;
137
+ VReg *Vk = (VReg *)vk;
138
139
vec_clear_cause(env);
140
for(i = 0; i < LSX_LEN/32; i++) {
141
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_h_s)(CPULoongArchState *env,
142
*Vd = temp;
143
}
144
145
-void HELPER(vfcvt_s_d)(CPULoongArchState *env,
146
- uint32_t vd, uint32_t vj, uint32_t vk)
147
+void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
148
+ CPULoongArchState *env, uint32_t desc)
149
{
150
int i;
151
VReg temp;
152
- VReg *Vd = &(env->fpr[vd].vreg);
153
- VReg *Vj = &(env->fpr[vj].vreg);
154
- VReg *Vk = &(env->fpr[vk].vreg);
155
+ VReg *Vd = (VReg *)vd;
156
+ VReg *Vj = (VReg *)vj;
157
+ VReg *Vk = (VReg *)vk;
158
159
vec_clear_cause(env);
160
for(i = 0; i < LSX_LEN/64; i++) {
161
@@ -XXX,XX +XXX,XX @@ FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
162
FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
163
164
#define FTINT_W_D(NAME, FN) \
165
-void HELPER(NAME)(CPULoongArchState *env, \
166
- uint32_t vd, uint32_t vj, uint32_t vk) \
167
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
168
+ CPULoongArchState *env, uint32_t desc) \
169
{ \
170
int i; \
171
VReg temp; \
172
- VReg *Vd = &(env->fpr[vd].vreg); \
173
- VReg *Vj = &(env->fpr[vj].vreg); \
174
- VReg *Vk = &(env->fpr[vk].vreg); \
175
+ VReg *Vd = (VReg *)vd; \
176
+ VReg *Vj = (VReg *)vj; \
177
+ VReg *Vk = (VReg *)vk; \
178
\
179
vec_clear_cause(env); \
180
for (i = 0; i < 2; i++) { \
181
@@ -XXX,XX +XXX,XX @@ void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
182
*Vd = temp;
183
}
184
185
-void HELPER(vffint_s_l)(CPULoongArchState *env,
186
- uint32_t vd, uint32_t vj, uint32_t vk)
187
+void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
188
+ CPULoongArchState *env, uint32_t desc)
189
{
190
int i;
191
VReg temp;
192
- VReg *Vd = &(env->fpr[vd].vreg);
193
- VReg *Vj = &(env->fpr[vj].vreg);
194
- VReg *Vk = &(env->fpr[vk].vreg);
195
+ VReg *Vd = (VReg *)vd;
196
+ VReg *Vj = (VReg *)vj;
197
+ VReg *Vk = (VReg *)vk;
198
199
vec_clear_cause(env);
200
for (i = 0; i < 2; i++) {
201
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
202
index XXXXXXX..XXXXXXX 100644
203
--- a/target/loongarch/insn_trans/trans_vec.c.inc
204
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
205
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
206
return gen_vvvv_vl(ctx, a, 16, fn);
207
}
208
209
+static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
210
+ gen_helper_gvec_3_ptr *fn)
211
+{
212
+ tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
213
+ vec_full_offset(a->vj),
214
+ vec_full_offset(a->vk),
215
+ cpu_env,
216
+ oprsz, ctx->vl / 8, 0, fn);
217
+ return true;
218
+}
219
+
220
+static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
221
+ gen_helper_gvec_3_ptr *fn)
222
+{
223
+ CHECK_SXE;
224
+ return gen_vvv_ptr_vl(ctx, a, 16, fn);
225
+}
226
+
227
static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
228
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
229
{
230
@@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
231
TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
232
TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
233
234
-TRANS(vfadd_s, LSX, gen_vvv, gen_helper_vfadd_s)
235
-TRANS(vfadd_d, LSX, gen_vvv, gen_helper_vfadd_d)
236
-TRANS(vfsub_s, LSX, gen_vvv, gen_helper_vfsub_s)
237
-TRANS(vfsub_d, LSX, gen_vvv, gen_helper_vfsub_d)
238
-TRANS(vfmul_s, LSX, gen_vvv, gen_helper_vfmul_s)
239
-TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d)
240
-TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s)
241
-TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d)
242
+TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
243
+TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
244
+TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
245
+TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
246
+TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
247
+TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
248
+TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
249
+TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
250
251
TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
252
TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
253
@@ -XXX,XX +XXX,XX @@ TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
254
TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
255
TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
256
257
-TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s)
258
-TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d)
259
-TRANS(vfmin_s, LSX, gen_vvv, gen_helper_vfmin_s)
260
-TRANS(vfmin_d, LSX, gen_vvv, gen_helper_vfmin_d)
261
+TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
262
+TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
263
+TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
264
+TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
265
266
-TRANS(vfmaxa_s, LSX, gen_vvv, gen_helper_vfmaxa_s)
267
-TRANS(vfmaxa_d, LSX, gen_vvv, gen_helper_vfmaxa_d)
268
-TRANS(vfmina_s, LSX, gen_vvv, gen_helper_vfmina_s)
269
-TRANS(vfmina_d, LSX, gen_vvv, gen_helper_vfmina_d)
270
+TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
271
+TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
272
+TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
273
+TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
274
275
TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s)
276
TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d)
277
@@ -XXX,XX +XXX,XX @@ TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h)
278
TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h)
279
TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s)
280
TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s)
281
-TRANS(vfcvt_h_s, LSX, gen_vvv, gen_helper_vfcvt_h_s)
282
-TRANS(vfcvt_s_d, LSX, gen_vvv, gen_helper_vfcvt_s_d)
283
+TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
284
+TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
285
286
TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s)
287
TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d)
288
@@ -XXX,XX +XXX,XX @@ TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s)
289
TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d)
290
TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s)
291
TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d)
292
-TRANS(vftintrne_w_d, LSX, gen_vvv, gen_helper_vftintrne_w_d)
293
-TRANS(vftintrz_w_d, LSX, gen_vvv, gen_helper_vftintrz_w_d)
294
-TRANS(vftintrp_w_d, LSX, gen_vvv, gen_helper_vftintrp_w_d)
295
-TRANS(vftintrm_w_d, LSX, gen_vvv, gen_helper_vftintrm_w_d)
296
-TRANS(vftint_w_d, LSX, gen_vvv, gen_helper_vftint_w_d)
297
+TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
298
+TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
299
+TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
300
+TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
301
+TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
302
TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s)
303
TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s)
304
TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s)
305
@@ -XXX,XX +XXX,XX @@ TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu)
306
TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu)
307
TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w)
308
TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w)
309
-TRANS(vffint_s_l, LSX, gen_vvv, gen_helper_vffint_s_l)
310
+TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
311
312
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
313
{
314
--
315
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-7-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 214 +++++-----
6
target/loongarch/vec_helper.c | 444 +++++++++-----------
7
target/loongarch/insn_trans/trans_vec.c.inc | 19 +-
8
3 files changed, 326 insertions(+), 351 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(idle, void, env)
15
#endif
16
17
/* LoongArch LSX */
18
-DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32)
19
-DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32)
20
-DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32)
21
-DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32)
22
-DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32)
23
-DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32)
24
-DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32)
25
-DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32)
26
-DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32)
27
-DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32)
28
-DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32)
29
-DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32)
30
-DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
31
-DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
32
-DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
33
-DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
34
+DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
+DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
+DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
45
+DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
+DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
50
51
DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
52
DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
53
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
54
DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
55
DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
56
57
-DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32)
58
-DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32)
59
-DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32)
60
-DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32)
61
-DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32)
62
-DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32)
63
-DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32)
64
-DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32)
65
-DEF_HELPER_4(vmod_b, void, env, i32, i32, i32)
66
-DEF_HELPER_4(vmod_h, void, env, i32, i32, i32)
67
-DEF_HELPER_4(vmod_w, void, env, i32, i32, i32)
68
-DEF_HELPER_4(vmod_d, void, env, i32, i32, i32)
69
-DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
70
-DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
71
-DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
72
-DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
73
+DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
74
+DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
75
+DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
76
+DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
77
+DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
78
+DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
79
+DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
80
+DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
81
+DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
82
+DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
83
+DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
84
+DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
85
+DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
86
+DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
87
+DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
88
+DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
89
90
DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
91
DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
92
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
93
DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
94
DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
95
96
-DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32)
97
-DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32)
98
-DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32)
99
-DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32)
100
+DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
101
+DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
102
+DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
103
+DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
104
DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
105
DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
106
DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
107
DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
108
109
-DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32)
110
-DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32)
111
-DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32)
112
-DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32)
113
+DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
114
+DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
115
+DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
116
+DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
117
DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
118
DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
119
DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
120
DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
121
122
-DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32)
123
-DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32)
124
-DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32)
125
-DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32)
126
-DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32)
127
-DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32)
128
+DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
129
+DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
130
+DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
131
+DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
132
+DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
133
+DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
134
135
DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
136
DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
137
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
138
DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
139
DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
140
141
-DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32)
142
-DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32)
143
-DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32)
144
-DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32)
145
-DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32)
146
-DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32)
147
+DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
148
+DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
149
+DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
150
+DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
151
+DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
152
+DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
153
154
DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
155
DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
156
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
157
DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
158
DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
159
160
-DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
161
-DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
162
-DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
163
-DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
164
-DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
165
-DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
166
-DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
167
-DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
168
-DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
169
-DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
170
-DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
171
-DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
172
+DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
173
+DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
174
+DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
175
+DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
176
+DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
177
+DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
178
+DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
179
+DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
180
+DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
181
+DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
182
+DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
183
+DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
184
185
DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
186
DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
187
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
188
DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
189
DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
190
191
-DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32)
192
-DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32)
193
-DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32)
194
-DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32)
195
-DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32)
196
-DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32)
197
-DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32)
198
-DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32)
199
-DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32)
200
-DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32)
201
-DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32)
202
-DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32)
203
+DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
204
+DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
205
+DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
206
+DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
207
+DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
208
+DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
209
+DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
210
+DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
211
+DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
212
+DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
213
+DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
214
+DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
215
216
DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
217
DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
218
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
219
DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
220
DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
221
222
-DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
223
-DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
224
+DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
225
+DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
226
DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
227
DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
228
229
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
230
DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
231
DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
232
233
-DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
234
-DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
235
-DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
236
-DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
237
-DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
238
-DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
239
-DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
240
-DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
241
-
242
-DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
243
-DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
244
-DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
245
-DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
246
-DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
247
-DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
248
-DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
249
-DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
250
-
251
-DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32)
252
-DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32)
253
-DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32)
254
-DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32)
255
-DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32)
256
-DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
257
-DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
258
-DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
259
+DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
260
+DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
261
+DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
262
+DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
263
+DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
264
+DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
265
+DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
266
+DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
267
+
268
+DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
269
+DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
270
+DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
271
+DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
272
+DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
273
+DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
274
+DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
275
+DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
276
+
277
+DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
278
+DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
279
+DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
280
+DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
281
+DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
282
+DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
283
+DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
284
+DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
285
286
DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
287
-DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
288
-DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
289
-DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
290
+DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
291
+DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
292
+DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
293
DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
294
DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
295
DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
296
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
297
index XXXXXXX..XXXXXXX 100644
298
--- a/target/loongarch/vec_helper.c
299
+++ b/target/loongarch/vec_helper.c
300
@@ -XXX,XX +XXX,XX @@
301
#define DO_SUB(a, b) (a - b)
302
303
#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
304
-void HELPER(NAME)(CPULoongArchState *env, \
305
- uint32_t vd, uint32_t vj, uint32_t vk) \
306
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
307
{ \
308
int i; \
309
- VReg *Vd = &(env->fpr[vd].vreg); \
310
- VReg *Vj = &(env->fpr[vj].vreg); \
311
- VReg *Vk = &(env->fpr[vk].vreg); \
312
+ VReg *Vd = (VReg *)vd; \
313
+ VReg *Vj = (VReg *)vj; \
314
+ VReg *Vk = (VReg *)vk; \
315
typedef __typeof(Vd->E1(0)) TD; \
316
\
317
for (i = 0; i < LSX_LEN/BIT; i++) { \
318
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
319
DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
320
DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
321
322
-void HELPER(vhaddw_q_d)(CPULoongArchState *env,
323
- uint32_t vd, uint32_t vj, uint32_t vk)
324
+void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
325
{
326
- VReg *Vd = &(env->fpr[vd].vreg);
327
- VReg *Vj = &(env->fpr[vj].vreg);
328
- VReg *Vk = &(env->fpr[vk].vreg);
329
+ VReg *Vd = (VReg *)vd;
330
+ VReg *Vj = (VReg *)vj;
331
+ VReg *Vk = (VReg *)vk;
332
333
Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
334
}
335
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
336
DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
337
DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
338
339
-void HELPER(vhsubw_q_d)(CPULoongArchState *env,
340
- uint32_t vd, uint32_t vj, uint32_t vk)
341
+void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
342
{
343
- VReg *Vd = &(env->fpr[vd].vreg);
344
- VReg *Vj = &(env->fpr[vj].vreg);
345
- VReg *Vk = &(env->fpr[vk].vreg);
346
+ VReg *Vd = (VReg *)vd;
347
+ VReg *Vj = (VReg *)vj;
348
+ VReg *Vk = (VReg *)vk;
349
350
Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
351
}
352
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
353
DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
354
DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
355
356
-void HELPER(vhaddw_qu_du)(CPULoongArchState *env,
357
- uint32_t vd, uint32_t vj, uint32_t vk)
358
+void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
359
{
360
- VReg *Vd = &(env->fpr[vd].vreg);
361
- VReg *Vj = &(env->fpr[vj].vreg);
362
- VReg *Vk = &(env->fpr[vk].vreg);
363
+ VReg *Vd = (VReg *)vd;
364
+ VReg *Vj = (VReg *)vj;
365
+ VReg *Vk = (VReg *)vk;
366
367
Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
368
int128_make64((uint64_t)Vk->D(0)));
369
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
370
DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
371
DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
372
373
-void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
374
- uint32_t vd, uint32_t vj, uint32_t vk)
375
+void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
376
{
377
- VReg *Vd = &(env->fpr[vd].vreg);
378
- VReg *Vj = &(env->fpr[vj].vreg);
379
- VReg *Vk = &(env->fpr[vk].vreg);
380
+ VReg *Vd = (VReg *)vd;
381
+ VReg *Vj = (VReg *)vj;
382
+ VReg *Vk = (VReg *)vk;
383
384
Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
385
int128_make64((uint64_t)Vk->D(0)));
386
@@ -XXX,XX +XXX,XX @@ VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
387
#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
388
unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
389
390
-#define VDIV(NAME, BIT, E, DO_OP) \
391
-void HELPER(NAME)(CPULoongArchState *env, \
392
- uint32_t vd, uint32_t vj, uint32_t vk) \
393
-{ \
394
- int i; \
395
- VReg *Vd = &(env->fpr[vd].vreg); \
396
- VReg *Vj = &(env->fpr[vj].vreg); \
397
- VReg *Vk = &(env->fpr[vk].vreg); \
398
- for (i = 0; i < LSX_LEN/BIT; i++) { \
399
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
400
- } \
401
+#define VDIV(NAME, BIT, E, DO_OP) \
402
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
403
+{ \
404
+ int i; \
405
+ VReg *Vd = (VReg *)vd; \
406
+ VReg *Vj = (VReg *)vj; \
407
+ VReg *Vk = (VReg *)vk; \
408
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
409
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
410
+ } \
411
}
412
413
VDIV(vdiv_b, 8, B, DO_DIV)
414
@@ -XXX,XX +XXX,XX @@ do_vsrlr(W, uint32_t)
415
do_vsrlr(D, uint64_t)
416
417
#define VSRLR(NAME, BIT, T, E) \
418
-void HELPER(NAME)(CPULoongArchState *env, \
419
- uint32_t vd, uint32_t vj, uint32_t vk) \
420
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
421
{ \
422
int i; \
423
- VReg *Vd = &(env->fpr[vd].vreg); \
424
- VReg *Vj = &(env->fpr[vj].vreg); \
425
- VReg *Vk = &(env->fpr[vk].vreg); \
426
+ VReg *Vd = (VReg *)vd; \
427
+ VReg *Vj = (VReg *)vj; \
428
+ VReg *Vk = (VReg *)vk; \
429
\
430
for (i = 0; i < LSX_LEN/BIT; i++) { \
431
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
432
@@ -XXX,XX +XXX,XX @@ do_vsrar(W, int32_t)
433
do_vsrar(D, int64_t)
434
435
#define VSRAR(NAME, BIT, T, E) \
436
-void HELPER(NAME)(CPULoongArchState *env, \
437
- uint32_t vd, uint32_t vj, uint32_t vk) \
438
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
439
{ \
440
int i; \
441
- VReg *Vd = &(env->fpr[vd].vreg); \
442
- VReg *Vj = &(env->fpr[vj].vreg); \
443
- VReg *Vk = &(env->fpr[vk].vreg); \
444
+ VReg *Vd = (VReg *)vd; \
445
+ VReg *Vj = (VReg *)vj; \
446
+ VReg *Vk = (VReg *)vk; \
447
\
448
for (i = 0; i < LSX_LEN/BIT; i++) { \
449
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
450
@@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_d, 64, D)
451
#define R_SHIFT(a, b) (a >> b)
452
453
#define VSRLN(NAME, BIT, T, E1, E2) \
454
-void HELPER(NAME)(CPULoongArchState *env, \
455
- uint32_t vd, uint32_t vj, uint32_t vk) \
456
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
457
{ \
458
int i; \
459
- VReg *Vd = &(env->fpr[vd].vreg); \
460
- VReg *Vj = &(env->fpr[vj].vreg); \
461
- VReg *Vk = &(env->fpr[vk].vreg); \
462
+ VReg *Vd = (VReg *)vd; \
463
+ VReg *Vj = (VReg *)vj; \
464
+ VReg *Vk = (VReg *)vk; \
465
\
466
for (i = 0; i < LSX_LEN/BIT; i++) { \
467
Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
468
@@ -XXX,XX +XXX,XX @@ VSRLN(vsrln_b_h, 16, uint16_t, B, H)
469
VSRLN(vsrln_h_w, 32, uint32_t, H, W)
470
VSRLN(vsrln_w_d, 64, uint64_t, W, D)
471
472
-#define VSRAN(NAME, BIT, T, E1, E2) \
473
-void HELPER(NAME)(CPULoongArchState *env, \
474
- uint32_t vd, uint32_t vj, uint32_t vk) \
475
-{ \
476
- int i; \
477
- VReg *Vd = &(env->fpr[vd].vreg); \
478
- VReg *Vj = &(env->fpr[vj].vreg); \
479
- VReg *Vk = &(env->fpr[vk].vreg); \
480
- \
481
- for (i = 0; i < LSX_LEN/BIT; i++) { \
482
- Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
483
- } \
484
- Vd->D(1) = 0; \
485
+#define VSRAN(NAME, BIT, T, E1, E2) \
486
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
487
+{ \
488
+ int i; \
489
+ VReg *Vd = (VReg *)vd; \
490
+ VReg *Vj = (VReg *)vj; \
491
+ VReg *Vk = (VReg *)vk; \
492
+ \
493
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
494
+ Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
495
+ } \
496
+ Vd->D(1) = 0; \
497
}
498
499
VSRAN(vsran_b_h, 16, uint16_t, B, H)
500
@@ -XXX,XX +XXX,XX @@ VSRANI(vsrani_h_w, 32, H, W)
501
VSRANI(vsrani_w_d, 64, W, D)
502
503
#define VSRLRN(NAME, BIT, T, E1, E2) \
504
-void HELPER(NAME)(CPULoongArchState *env, \
505
- uint32_t vd, uint32_t vj, uint32_t vk) \
506
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
507
{ \
508
int i; \
509
- VReg *Vd = &(env->fpr[vd].vreg); \
510
- VReg *Vj = &(env->fpr[vj].vreg); \
511
- VReg *Vk = &(env->fpr[vk].vreg); \
512
+ VReg *Vd = (VReg *)vd; \
513
+ VReg *Vj = (VReg *)vj; \
514
+ VReg *Vk = (VReg *)vk; \
515
\
516
for (i = 0; i < LSX_LEN/BIT; i++) { \
517
Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
518
@@ -XXX,XX +XXX,XX @@ VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
519
VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
520
521
#define VSRARN(NAME, BIT, T, E1, E2) \
522
-void HELPER(NAME)(CPULoongArchState *env, \
523
- uint32_t vd, uint32_t vj, uint32_t vk) \
524
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
525
{ \
526
int i; \
527
- VReg *Vd = &(env->fpr[vd].vreg); \
528
- VReg *Vj = &(env->fpr[vj].vreg); \
529
- VReg *Vk = &(env->fpr[vk].vreg); \
530
+ VReg *Vd = (VReg *)vd; \
531
+ VReg *Vj = (VReg *)vj; \
532
+ VReg *Vk = (VReg *)vk; \
533
\
534
for (i = 0; i < LSX_LEN/BIT; i++) { \
535
Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
536
@@ -XXX,XX +XXX,XX @@ SSRLNS(H, uint32_t, int32_t, uint16_t)
537
SSRLNS(W, uint64_t, int64_t, uint32_t)
538
539
#define VSSRLN(NAME, BIT, T, E1, E2) \
540
-void HELPER(NAME)(CPULoongArchState *env, \
541
- uint32_t vd, uint32_t vj, uint32_t vk) \
542
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
543
{ \
544
int i; \
545
- VReg *Vd = &(env->fpr[vd].vreg); \
546
- VReg *Vj = &(env->fpr[vj].vreg); \
547
- VReg *Vk = &(env->fpr[vk].vreg); \
548
+ VReg *Vd = (VReg *)vd; \
549
+ VReg *Vj = (VReg *)vj; \
550
+ VReg *Vk = (VReg *)vk; \
551
\
552
for (i = 0; i < LSX_LEN/BIT; i++) { \
553
Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
554
@@ -XXX,XX +XXX,XX @@ SSRANS(H, int32_t, int16_t)
555
SSRANS(W, int64_t, int32_t)
556
557
#define VSSRAN(NAME, BIT, T, E1, E2) \
558
-void HELPER(NAME)(CPULoongArchState *env, \
559
- uint32_t vd, uint32_t vj, uint32_t vk) \
560
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
561
{ \
562
int i; \
563
- VReg *Vd = &(env->fpr[vd].vreg); \
564
- VReg *Vj = &(env->fpr[vj].vreg); \
565
- VReg *Vk = &(env->fpr[vk].vreg); \
566
+ VReg *Vd = (VReg *)vd; \
567
+ VReg *Vj = (VReg *)vj; \
568
+ VReg *Vk = (VReg *)vk; \
569
\
570
for (i = 0; i < LSX_LEN/BIT; i++) { \
571
Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
572
@@ -XXX,XX +XXX,XX @@ SSRLNU(H, uint32_t, uint16_t, int32_t)
573
SSRLNU(W, uint64_t, uint32_t, int64_t)
574
575
#define VSSRLNU(NAME, BIT, T, E1, E2) \
576
-void HELPER(NAME)(CPULoongArchState *env, \
577
- uint32_t vd, uint32_t vj, uint32_t vk) \
578
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
579
{ \
580
int i; \
581
- VReg *Vd = &(env->fpr[vd].vreg); \
582
- VReg *Vj = &(env->fpr[vj].vreg); \
583
- VReg *Vk = &(env->fpr[vk].vreg); \
584
+ VReg *Vd = (VReg *)vd; \
585
+ VReg *Vj = (VReg *)vj; \
586
+ VReg *Vk = (VReg *)vk; \
587
\
588
for (i = 0; i < LSX_LEN/BIT; i++) { \
589
Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
590
@@ -XXX,XX +XXX,XX @@ SSRANU(H, uint32_t, uint16_t, int32_t)
591
SSRANU(W, uint64_t, uint32_t, int64_t)
592
593
#define VSSRANU(NAME, BIT, T, E1, E2) \
594
-void HELPER(NAME)(CPULoongArchState *env, \
595
- uint32_t vd, uint32_t vj, uint32_t vk) \
596
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
597
{ \
598
int i; \
599
- VReg *Vd = &(env->fpr[vd].vreg); \
600
- VReg *Vj = &(env->fpr[vj].vreg); \
601
- VReg *Vk = &(env->fpr[vk].vreg); \
602
+ VReg *Vd = (VReg *)vd; \
603
+ VReg *Vj = (VReg *)vj; \
604
+ VReg *Vk = (VReg *)vk; \
605
\
606
for (i = 0; i < LSX_LEN/BIT; i++) { \
607
Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
608
@@ -XXX,XX +XXX,XX @@ SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
609
SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
610
611
#define VSSRLRN(NAME, BIT, T, E1, E2) \
612
-void HELPER(NAME)(CPULoongArchState *env, \
613
- uint32_t vd, uint32_t vj, uint32_t vk) \
614
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
615
{ \
616
int i; \
617
- VReg *Vd = &(env->fpr[vd].vreg); \
618
- VReg *Vj = &(env->fpr[vj].vreg); \
619
- VReg *Vk = &(env->fpr[vk].vreg); \
620
+ VReg *Vd = (VReg *)vd; \
621
+ VReg *Vj = (VReg *)vj; \
622
+ VReg *Vk = (VReg *)vk; \
623
\
624
for (i = 0; i < LSX_LEN/BIT; i++) { \
625
Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
626
@@ -XXX,XX +XXX,XX @@ SSRARNS(H, W, int32_t, int16_t)
627
SSRARNS(W, D, int64_t, int32_t)
628
629
#define VSSRARN(NAME, BIT, T, E1, E2) \
630
-void HELPER(NAME)(CPULoongArchState *env, \
631
- uint32_t vd, uint32_t vj, uint32_t vk) \
632
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
633
{ \
634
int i; \
635
- VReg *Vd = &(env->fpr[vd].vreg); \
636
- VReg *Vj = &(env->fpr[vj].vreg); \
637
- VReg *Vk = &(env->fpr[vk].vreg); \
638
+ VReg *Vd = (VReg *)vd; \
639
+ VReg *Vj = (VReg *)vj; \
640
+ VReg *Vk = (VReg *)vk; \
641
\
642
for (i = 0; i < LSX_LEN/BIT; i++) { \
643
Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
644
@@ -XXX,XX +XXX,XX @@ SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
645
SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
646
647
#define VSSRLRNU(NAME, BIT, T, E1, E2) \
648
-void HELPER(NAME)(CPULoongArchState *env, \
649
- uint32_t vd, uint32_t vj, uint32_t vk) \
650
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
651
{ \
652
int i; \
653
- VReg *Vd = &(env->fpr[vd].vreg); \
654
- VReg *Vj = &(env->fpr[vj].vreg); \
655
- VReg *Vk = &(env->fpr[vk].vreg); \
656
+ VReg *Vd = (VReg *)vd; \
657
+ VReg *Vj = (VReg *)vj; \
658
+ VReg *Vk = (VReg *)vk; \
659
\
660
for (i = 0; i < LSX_LEN/BIT; i++) { \
661
Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
662
@@ -XXX,XX +XXX,XX @@ SSRARNU(H, W, uint32_t, uint16_t, int32_t)
663
SSRARNU(W, D, uint64_t, uint32_t, int64_t)
664
665
#define VSSRARNU(NAME, BIT, T, E1, E2) \
666
-void HELPER(NAME)(CPULoongArchState *env, \
667
- uint32_t vd, uint32_t vj, uint32_t vk) \
668
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
669
{ \
670
int i; \
671
- VReg *Vd = &(env->fpr[vd].vreg); \
672
- VReg *Vj = &(env->fpr[vj].vreg); \
673
- VReg *Vk = &(env->fpr[vk].vreg); \
674
+ VReg *Vd = (VReg *)vd; \
675
+ VReg *Vj = (VReg *)vj; \
676
+ VReg *Vk = (VReg *)vk; \
677
\
678
for (i = 0; i < LSX_LEN/BIT; i++) { \
679
Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
680
@@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
681
DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
682
DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
683
684
-#define VFRSTP(NAME, BIT, MASK, E) \
685
-void HELPER(NAME)(CPULoongArchState *env, \
686
- uint32_t vd, uint32_t vj, uint32_t vk) \
687
-{ \
688
- int i, m; \
689
- VReg *Vd = &(env->fpr[vd].vreg); \
690
- VReg *Vj = &(env->fpr[vj].vreg); \
691
- VReg *Vk = &(env->fpr[vk].vreg); \
692
- \
693
- for (i = 0; i < LSX_LEN/BIT; i++) { \
694
- if (Vj->E(i) < 0) { \
695
- break; \
696
- } \
697
- } \
698
- m = Vk->E(0) & MASK; \
699
- Vd->E(m) = i; \
700
+#define VFRSTP(NAME, BIT, MASK, E) \
701
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
702
+{ \
703
+ int i, m; \
704
+ VReg *Vd = (VReg *)vd; \
705
+ VReg *Vj = (VReg *)vj; \
706
+ VReg *Vk = (VReg *)vk; \
707
+ \
708
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
709
+ if (Vj->E(i) < 0) { \
710
+ break; \
711
+ } \
712
+ } \
713
+ m = Vk->E(0) & MASK; \
714
+ Vd->E(m) = i; \
715
}
716
717
VFRSTP(vfrstp_b, 8, 0xf, B)
718
@@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_h, MO_16)
719
SETALLNEZ(vsetallnez_w, MO_32)
720
SETALLNEZ(vsetallnez_d, MO_64)
721
722
-#define VPACKEV(NAME, BIT, E) \
723
-void HELPER(NAME)(CPULoongArchState *env, \
724
- uint32_t vd, uint32_t vj, uint32_t vk) \
725
-{ \
726
- int i; \
727
- VReg temp; \
728
- VReg *Vd = &(env->fpr[vd].vreg); \
729
- VReg *Vj = &(env->fpr[vj].vreg); \
730
- VReg *Vk = &(env->fpr[vk].vreg); \
731
- \
732
- for (i = 0; i < LSX_LEN/BIT; i++) { \
733
- temp.E(2 * i + 1) = Vj->E(2 * i); \
734
- temp.E(2 *i) = Vk->E(2 * i); \
735
- } \
736
- *Vd = temp; \
737
+#define VPACKEV(NAME, BIT, E) \
738
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
739
+{ \
740
+ int i; \
741
+ VReg temp; \
742
+ VReg *Vd = (VReg *)vd; \
743
+ VReg *Vj = (VReg *)vj; \
744
+ VReg *Vk = (VReg *)vk; \
745
+ \
746
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
747
+ temp.E(2 * i + 1) = Vj->E(2 * i); \
748
+ temp.E(2 *i) = Vk->E(2 * i); \
749
+ } \
750
+ *Vd = temp; \
751
}
752
753
VPACKEV(vpackev_b, 16, B)
754
@@ -XXX,XX +XXX,XX @@ VPACKEV(vpackev_h, 32, H)
755
VPACKEV(vpackev_w, 64, W)
756
VPACKEV(vpackev_d, 128, D)
757
758
-#define VPACKOD(NAME, BIT, E) \
759
-void HELPER(NAME)(CPULoongArchState *env, \
760
- uint32_t vd, uint32_t vj, uint32_t vk) \
761
-{ \
762
- int i; \
763
- VReg temp; \
764
- VReg *Vd = &(env->fpr[vd].vreg); \
765
- VReg *Vj = &(env->fpr[vj].vreg); \
766
- VReg *Vk = &(env->fpr[vk].vreg); \
767
- \
768
- for (i = 0; i < LSX_LEN/BIT; i++) { \
769
- temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
770
- temp.E(2 * i) = Vk->E(2 * i + 1); \
771
- } \
772
- *Vd = temp; \
773
+#define VPACKOD(NAME, BIT, E) \
774
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
775
+{ \
776
+ int i; \
777
+ VReg temp; \
778
+ VReg *Vd = (VReg *)vd; \
779
+ VReg *Vj = (VReg *)vj; \
780
+ VReg *Vk = (VReg *)vk; \
781
+ \
782
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
783
+ temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
784
+ temp.E(2 * i) = Vk->E(2 * i + 1); \
785
+ } \
786
+ *Vd = temp; \
787
}
788
789
VPACKOD(vpackod_b, 16, B)
790
@@ -XXX,XX +XXX,XX @@ VPACKOD(vpackod_h, 32, H)
791
VPACKOD(vpackod_w, 64, W)
792
VPACKOD(vpackod_d, 128, D)
793
794
-#define VPICKEV(NAME, BIT, E) \
795
-void HELPER(NAME)(CPULoongArchState *env, \
796
- uint32_t vd, uint32_t vj, uint32_t vk) \
797
-{ \
798
- int i; \
799
- VReg temp; \
800
- VReg *Vd = &(env->fpr[vd].vreg); \
801
- VReg *Vj = &(env->fpr[vj].vreg); \
802
- VReg *Vk = &(env->fpr[vk].vreg); \
803
- \
804
- for (i = 0; i < LSX_LEN/BIT; i++) { \
805
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
806
- temp.E(i) = Vk->E(2 * i); \
807
- } \
808
- *Vd = temp; \
809
+#define VPICKEV(NAME, BIT, E) \
810
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
811
+{ \
812
+ int i; \
813
+ VReg temp; \
814
+ VReg *Vd = (VReg *)vd; \
815
+ VReg *Vj = (VReg *)vj; \
816
+ VReg *Vk = (VReg *)vk; \
817
+ \
818
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
819
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
820
+ temp.E(i) = Vk->E(2 * i); \
821
+ } \
822
+ *Vd = temp; \
823
}
824
825
VPICKEV(vpickev_b, 16, B)
826
@@ -XXX,XX +XXX,XX @@ VPICKEV(vpickev_h, 32, H)
827
VPICKEV(vpickev_w, 64, W)
828
VPICKEV(vpickev_d, 128, D)
829
830
-#define VPICKOD(NAME, BIT, E) \
831
-void HELPER(NAME)(CPULoongArchState *env, \
832
- uint32_t vd, uint32_t vj, uint32_t vk) \
833
-{ \
834
- int i; \
835
- VReg temp; \
836
- VReg *Vd = &(env->fpr[vd].vreg); \
837
- VReg *Vj = &(env->fpr[vj].vreg); \
838
- VReg *Vk = &(env->fpr[vk].vreg); \
839
- \
840
- for (i = 0; i < LSX_LEN/BIT; i++) { \
841
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
842
- temp.E(i) = Vk->E(2 * i + 1); \
843
- } \
844
- *Vd = temp; \
845
+#define VPICKOD(NAME, BIT, E) \
846
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
847
+{ \
848
+ int i; \
849
+ VReg temp; \
850
+ VReg *Vd = (VReg *)vd; \
851
+ VReg *Vj = (VReg *)vj; \
852
+ VReg *Vk = (VReg *)vk; \
853
+ \
854
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
855
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
856
+ temp.E(i) = Vk->E(2 * i + 1); \
857
+ } \
858
+ *Vd = temp; \
859
}
860
861
VPICKOD(vpickod_b, 16, B)
862
@@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_h, 32, H)
863
VPICKOD(vpickod_w, 64, W)
864
VPICKOD(vpickod_d, 128, D)
865
866
-#define VILVL(NAME, BIT, E) \
867
-void HELPER(NAME)(CPULoongArchState *env, \
868
- uint32_t vd, uint32_t vj, uint32_t vk) \
869
-{ \
870
- int i; \
871
- VReg temp; \
872
- VReg *Vd = &(env->fpr[vd].vreg); \
873
- VReg *Vj = &(env->fpr[vj].vreg); \
874
- VReg *Vk = &(env->fpr[vk].vreg); \
875
- \
876
- for (i = 0; i < LSX_LEN/BIT; i++) { \
877
- temp.E(2 * i + 1) = Vj->E(i); \
878
- temp.E(2 * i) = Vk->E(i); \
879
- } \
880
- *Vd = temp; \
881
+#define VILVL(NAME, BIT, E) \
882
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
883
+{ \
884
+ int i; \
885
+ VReg temp; \
886
+ VReg *Vd = (VReg *)vd; \
887
+ VReg *Vj = (VReg *)vj; \
888
+ VReg *Vk = (VReg *)vk; \
889
+ \
890
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
891
+ temp.E(2 * i + 1) = Vj->E(i); \
892
+ temp.E(2 * i) = Vk->E(i); \
893
+ } \
894
+ *Vd = temp; \
895
}
896
897
VILVL(vilvl_b, 16, B)
898
@@ -XXX,XX +XXX,XX @@ VILVL(vilvl_h, 32, H)
899
VILVL(vilvl_w, 64, W)
900
VILVL(vilvl_d, 128, D)
901
902
-#define VILVH(NAME, BIT, E) \
903
-void HELPER(NAME)(CPULoongArchState *env, \
904
- uint32_t vd, uint32_t vj, uint32_t vk) \
905
-{ \
906
- int i; \
907
- VReg temp; \
908
- VReg *Vd = &(env->fpr[vd].vreg); \
909
- VReg *Vj = &(env->fpr[vj].vreg); \
910
- VReg *Vk = &(env->fpr[vk].vreg); \
911
- \
912
- for (i = 0; i < LSX_LEN/BIT; i++) { \
913
- temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
914
- temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
915
- } \
916
- *Vd = temp; \
917
+#define VILVH(NAME, BIT, E) \
918
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
919
+{ \
920
+ int i; \
921
+ VReg temp; \
922
+ VReg *Vd = (VReg *)vd; \
923
+ VReg *Vj = (VReg *)vj; \
924
+ VReg *Vk = (VReg *)vk; \
925
+ \
926
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
927
+ temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
928
+ temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
929
+ } \
930
+ *Vd = temp; \
931
}
932
933
VILVH(vilvh_b, 16, B)
934
@@ -XXX,XX +XXX,XX @@ void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
935
*Vd = temp;
936
}
937
938
-#define VSHUF(NAME, BIT, E) \
939
-void HELPER(NAME)(CPULoongArchState *env, \
940
- uint32_t vd, uint32_t vj, uint32_t vk) \
941
-{ \
942
- int i, m; \
943
- VReg temp; \
944
- VReg *Vd = &(env->fpr[vd].vreg); \
945
- VReg *Vj = &(env->fpr[vj].vreg); \
946
- VReg *Vk = &(env->fpr[vk].vreg); \
947
- \
948
- m = LSX_LEN/BIT; \
949
- for (i = 0; i < m; i++) { \
950
- uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
951
- temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
952
- } \
953
- *Vd = temp; \
954
+#define VSHUF(NAME, BIT, E) \
955
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
956
+{ \
957
+ int i, m; \
958
+ VReg temp; \
959
+ VReg *Vd = (VReg *)vd; \
960
+ VReg *Vj = (VReg *)vj; \
961
+ VReg *Vk = (VReg *)vk; \
962
+ \
963
+ m = LSX_LEN/BIT; \
964
+ for (i = 0; i < m; i++) { \
965
+ uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
966
+ temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
967
+ } \
968
+ *Vd = temp; \
969
}
970
971
VSHUF(vshuf_h, 16, H)
972
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
973
index XXXXXXX..XXXXXXX 100644
974
--- a/target/loongarch/insn_trans/trans_vec.c.inc
975
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
976
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
977
return gen_vvv_ptr_vl(ctx, a, 16, fn);
978
}
979
980
-static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
981
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
982
+static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
983
+ gen_helper_gvec_3 *fn)
984
{
985
- TCGv_i32 vd = tcg_constant_i32(a->vd);
986
- TCGv_i32 vj = tcg_constant_i32(a->vj);
987
- TCGv_i32 vk = tcg_constant_i32(a->vk);
988
+ tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
989
+ vec_full_offset(a->vj),
990
+ vec_full_offset(a->vk),
991
+ oprsz, ctx->vl / 8, 0, fn);
992
+ return true;
993
+}
994
995
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
996
+{
997
CHECK_SXE;
998
-
999
- func(cpu_env, vd, vj, vk);
1000
- return true;
1001
+ return gen_vvv_vl(ctx, a, 16, fn);
1002
}
1003
1004
static bool gen_vv(DisasContext *ctx, arg_vv *a,
1005
--
1006
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-8-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 118 +++++++-------
6
target/loongarch/vec_helper.c | 161 +++++++++++---------
7
target/loongarch/insn_trans/trans_vec.c.inc | 129 +++++++++-------
8
3 files changed, 219 insertions(+), 189 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
15
DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
16
DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
17
18
-DEF_HELPER_3(vflogb_s, void, env, i32, i32)
19
-DEF_HELPER_3(vflogb_d, void, env, i32, i32)
20
-
21
-DEF_HELPER_3(vfclass_s, void, env, i32, i32)
22
-DEF_HELPER_3(vfclass_d, void, env, i32, i32)
23
-
24
-DEF_HELPER_3(vfsqrt_s, void, env, i32, i32)
25
-DEF_HELPER_3(vfsqrt_d, void, env, i32, i32)
26
-DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
27
-DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
28
-DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
29
-DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
30
-
31
-DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
32
-DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
33
-DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
34
-DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
35
+DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
36
+DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
37
+
38
+DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
39
+DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
40
+
41
+DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
42
+DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
43
+DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
44
+DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
45
+DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
46
+DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
47
+
48
+DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
49
+DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
50
+DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
51
+DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
52
DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
53
DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
54
55
-DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
56
-DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
57
-DEF_HELPER_3(vfrintrz_s, void, env, i32, i32)
58
-DEF_HELPER_3(vfrintrz_d, void, env, i32, i32)
59
-DEF_HELPER_3(vfrintrp_s, void, env, i32, i32)
60
-DEF_HELPER_3(vfrintrp_d, void, env, i32, i32)
61
-DEF_HELPER_3(vfrintrm_s, void, env, i32, i32)
62
-DEF_HELPER_3(vfrintrm_d, void, env, i32, i32)
63
-DEF_HELPER_3(vfrint_s, void, env, i32, i32)
64
-DEF_HELPER_3(vfrint_d, void, env, i32, i32)
65
-
66
-DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32)
67
-DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32)
68
-DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32)
69
-DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32)
70
-DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32)
71
-DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32)
72
-DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32)
73
-DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32)
74
-DEF_HELPER_3(vftint_w_s, void, env, i32, i32)
75
-DEF_HELPER_3(vftint_l_d, void, env, i32, i32)
76
-DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
77
-DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
78
-DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
79
-DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
80
+DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
81
+DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
82
+DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
83
+DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
84
+DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
85
+DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
86
+DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
87
+DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
88
+DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
89
+DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
90
+
91
+DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
92
+DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
93
+DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
94
+DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
95
+DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
96
+DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
97
+DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
98
+DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
99
+DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
100
+DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
101
+DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
102
+DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
103
+DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
104
+DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
105
DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
106
DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
107
DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
108
DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
109
DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
110
-DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
111
-DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
112
-DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
113
-DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32)
114
-DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32)
115
-DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32)
116
-DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32)
117
-DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32)
118
-DEF_HELPER_3(vftintl_l_s, void, env, i32, i32)
119
-DEF_HELPER_3(vftinth_l_s, void, env, i32, i32)
120
-
121
-DEF_HELPER_3(vffint_s_w, void, env, i32, i32)
122
-DEF_HELPER_3(vffint_d_l, void, env, i32, i32)
123
-DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
124
-DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
125
-DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
126
-DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
127
+DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
128
+DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
129
+DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
130
+DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
131
+DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
132
+DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
133
+DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
134
+DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
135
+DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
136
+DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
137
+
138
+DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
139
+DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
140
+DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
141
+DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
142
+DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
143
+DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
144
DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
145
146
DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
147
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/loongarch/vec_helper.c
150
+++ b/target/loongarch/vec_helper.c
151
@@ -XXX,XX +XXX,XX @@ DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
152
DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
153
float_muladd_negate_c | float_muladd_negate_result)
154
155
-#define DO_2OP_F(NAME, BIT, E, FN) \
156
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
157
-{ \
158
- int i; \
159
- VReg *Vd = &(env->fpr[vd].vreg); \
160
- VReg *Vj = &(env->fpr[vj].vreg); \
161
- \
162
- vec_clear_cause(env); \
163
- for (i = 0; i < LSX_LEN/BIT; i++) { \
164
- Vd->E(i) = FN(env, Vj->E(i)); \
165
- } \
166
+#define DO_2OP_F(NAME, BIT, E, FN) \
167
+void HELPER(NAME)(void *vd, void *vj, \
168
+ CPULoongArchState *env, uint32_t desc) \
169
+{ \
170
+ int i; \
171
+ VReg *Vd = (VReg *)vd; \
172
+ VReg *Vj = (VReg *)vj; \
173
+ \
174
+ vec_clear_cause(env); \
175
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
176
+ Vd->E(i) = FN(env, Vj->E(i)); \
177
+ } \
178
}
179
180
#define FLOGB(BIT, T) \
181
@@ -XXX,XX +XXX,XX @@ static T do_flogb_## BIT(CPULoongArchState *env, T fj) \
182
FLOGB(32, uint32_t)
183
FLOGB(64, uint64_t)
184
185
-#define FCLASS(NAME, BIT, E, FN) \
186
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
187
-{ \
188
- int i; \
189
- VReg *Vd = &(env->fpr[vd].vreg); \
190
- VReg *Vj = &(env->fpr[vj].vreg); \
191
- \
192
- for (i = 0; i < LSX_LEN/BIT; i++) { \
193
- Vd->E(i) = FN(env, Vj->E(i)); \
194
- } \
195
+#define FCLASS(NAME, BIT, E, FN) \
196
+void HELPER(NAME)(void *vd, void *vj, \
197
+ CPULoongArchState *env, uint32_t desc) \
198
+{ \
199
+ int i; \
200
+ VReg *Vd = (VReg *)vd; \
201
+ VReg *Vj = (VReg *)vj; \
202
+ \
203
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
204
+ Vd->E(i) = FN(env, Vj->E(i)); \
205
+ } \
206
}
207
208
FCLASS(vfclass_s, 32, UW, helper_fclass_s)
209
@@ -XXX,XX +XXX,XX @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
210
return float64_to_float32(d, status);
211
}
212
213
-void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
214
+void HELPER(vfcvtl_s_h)(void *vd, void *vj,
215
+ CPULoongArchState *env, uint32_t desc)
216
{
217
int i;
218
VReg temp;
219
- VReg *Vd = &(env->fpr[vd].vreg);
220
- VReg *Vj = &(env->fpr[vj].vreg);
221
+ VReg *Vd = (VReg *)vd;
222
+ VReg *Vj = (VReg *)vj;
223
224
vec_clear_cause(env);
225
for (i = 0; i < LSX_LEN/32; i++) {
226
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
227
*Vd = temp;
228
}
229
230
-void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
231
+void HELPER(vfcvtl_d_s)(void *vd, void *vj,
232
+ CPULoongArchState *env, uint32_t desc)
233
{
234
int i;
235
VReg temp;
236
- VReg *Vd = &(env->fpr[vd].vreg);
237
- VReg *Vj = &(env->fpr[vj].vreg);
238
+ VReg *Vd = (VReg *)vd;
239
+ VReg *Vj = (VReg *)vj;
240
241
vec_clear_cause(env);
242
for (i = 0; i < LSX_LEN/64; i++) {
243
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
244
*Vd = temp;
245
}
246
247
-void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
248
+void HELPER(vfcvth_s_h)(void *vd, void *vj,
249
+ CPULoongArchState *env, uint32_t desc)
250
{
251
int i;
252
VReg temp;
253
- VReg *Vd = &(env->fpr[vd].vreg);
254
- VReg *Vj = &(env->fpr[vj].vreg);
255
+ VReg *Vd = (VReg *)vd;
256
+ VReg *Vj = (VReg *)vj;
257
258
vec_clear_cause(env);
259
for (i = 0; i < LSX_LEN/32; i++) {
260
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
261
*Vd = temp;
262
}
263
264
-void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
265
+void HELPER(vfcvth_d_s)(void *vd, void *vj,
266
+ CPULoongArchState *env, uint32_t desc)
267
{
268
int i;
269
VReg temp;
270
- VReg *Vd = &(env->fpr[vd].vreg);
271
- VReg *Vj = &(env->fpr[vj].vreg);
272
+ VReg *Vd = (VReg *)vd;
273
+ VReg *Vj = (VReg *)vj;
274
275
vec_clear_cause(env);
276
for (i = 0; i < LSX_LEN/64; i++) {
277
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
278
*Vd = temp;
279
}
280
281
-void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
282
+void HELPER(vfrint_s)(void *vd, void *vj,
283
+ CPULoongArchState *env, uint32_t desc)
284
{
285
int i;
286
- VReg *Vd = &(env->fpr[vd].vreg);
287
- VReg *Vj = &(env->fpr[vj].vreg);
288
+ VReg *Vd = (VReg *)vd;
289
+ VReg *Vj = (VReg *)vj;
290
291
vec_clear_cause(env);
292
for (i = 0; i < 4; i++) {
293
@@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
294
}
295
}
296
297
-void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
298
+void HELPER(vfrint_d)(void *vd, void *vj,
299
+ CPULoongArchState *env, uint32_t desc)
300
{
301
int i;
302
- VReg *Vd = &(env->fpr[vd].vreg);
303
- VReg *Vj = &(env->fpr[vj].vreg);
304
+ VReg *Vd = (VReg *)vd;
305
+ VReg *Vj = (VReg *)vj;
306
307
vec_clear_cause(env);
308
for (i = 0; i < 2; i++) {
309
@@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
310
}
311
312
#define FCVT_2OP(NAME, BIT, E, MODE) \
313
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
314
+void HELPER(NAME)(void *vd, void *vj, \
315
+ CPULoongArchState *env, uint32_t desc) \
316
{ \
317
int i; \
318
- VReg *Vd = &(env->fpr[vd].vreg); \
319
- VReg *Vj = &(env->fpr[vj].vreg); \
320
+ VReg *Vd = (VReg *)vd; \
321
+ VReg *Vj = (VReg *)vj; \
322
\
323
vec_clear_cause(env); \
324
for (i = 0; i < LSX_LEN/BIT; i++) { \
325
@@ -XXX,XX +XXX,XX @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
326
FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
327
FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
328
329
-#define FTINTL_L_S(NAME, FN) \
330
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
331
-{ \
332
- int i; \
333
- VReg temp; \
334
- VReg *Vd = &(env->fpr[vd].vreg); \
335
- VReg *Vj = &(env->fpr[vj].vreg); \
336
- \
337
- vec_clear_cause(env); \
338
- for (i = 0; i < 2; i++) { \
339
- temp.D(i) = FN(env, Vj->UW(i)); \
340
- } \
341
- *Vd = temp; \
342
+#define FTINTL_L_S(NAME, FN) \
343
+void HELPER(NAME)(void *vd, void *vj, \
344
+ CPULoongArchState *env, uint32_t desc) \
345
+{ \
346
+ int i; \
347
+ VReg temp; \
348
+ VReg *Vd = (VReg *)vd; \
349
+ VReg *Vj = (VReg *)vj; \
350
+ \
351
+ vec_clear_cause(env); \
352
+ for (i = 0; i < 2; i++) { \
353
+ temp.D(i) = FN(env, Vj->UW(i)); \
354
+ } \
355
+ *Vd = temp; \
356
}
357
358
FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
359
@@ -XXX,XX +XXX,XX @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
360
FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
361
FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
362
363
-#define FTINTH_L_S(NAME, FN) \
364
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
365
-{ \
366
- int i; \
367
- VReg temp; \
368
- VReg *Vd = &(env->fpr[vd].vreg); \
369
- VReg *Vj = &(env->fpr[vj].vreg); \
370
- \
371
- vec_clear_cause(env); \
372
- for (i = 0; i < 2; i++) { \
373
- temp.D(i) = FN(env, Vj->UW(i + 2)); \
374
- } \
375
- *Vd = temp; \
376
+#define FTINTH_L_S(NAME, FN) \
377
+void HELPER(NAME)(void *vd, void *vj, \
378
+ CPULoongArchState *env, uint32_t desc) \
379
+{ \
380
+ int i; \
381
+ VReg temp; \
382
+ VReg *Vd = (VReg *)vd; \
383
+ VReg *Vj = (VReg *)vj; \
384
+ \
385
+ vec_clear_cause(env); \
386
+ for (i = 0; i < 2; i++) { \
387
+ temp.D(i) = FN(env, Vj->UW(i + 2)); \
388
+ } \
389
+ *Vd = temp; \
390
}
391
392
FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
393
@@ -XXX,XX +XXX,XX @@ DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
394
DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
395
DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
396
397
-void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
398
+void HELPER(vffintl_d_w)(void *vd, void *vj,
399
+ CPULoongArchState *env, uint32_t desc)
400
{
401
int i;
402
VReg temp;
403
- VReg *Vd = &(env->fpr[vd].vreg);
404
- VReg *Vj = &(env->fpr[vj].vreg);
405
+ VReg *Vd = (VReg *)vd;
406
+ VReg *Vj = (VReg *)vj;
407
408
vec_clear_cause(env);
409
for (i = 0; i < 2; i++) {
410
@@ -XXX,XX +XXX,XX @@ void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
411
*Vd = temp;
412
}
413
414
-void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
415
+void HELPER(vffinth_d_w)(void *vd, void *vj,
416
+ CPULoongArchState *env, uint32_t desc)
417
{
418
int i;
419
VReg temp;
420
- VReg *Vd = &(env->fpr[vd].vreg);
421
- VReg *Vj = &(env->fpr[vj].vreg);
422
+ VReg *Vd = (VReg *)vd;
423
+ VReg *Vj = (VReg *)vj;
424
425
vec_clear_cause(env);
426
for (i = 0; i < 2; i++) {
427
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
428
index XXXXXXX..XXXXXXX 100644
429
--- a/target/loongarch/insn_trans/trans_vec.c.inc
430
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
431
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
432
return gen_vvv_vl(ctx, a, 16, fn);
433
}
434
435
+static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
436
+ gen_helper_gvec_2_ptr *fn)
437
+{
438
+ tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
439
+ vec_full_offset(a->vj),
440
+ cpu_env,
441
+ oprsz, ctx->vl / 8, 0, fn);
442
+ return true;
443
+}
444
+
445
+static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
446
+ gen_helper_gvec_2_ptr *fn)
447
+{
448
+ CHECK_SXE;
449
+ return gen_vv_ptr_vl(ctx, a, 16, fn);
450
+}
451
+
452
static bool gen_vv(DisasContext *ctx, arg_vv *a,
453
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
454
{
455
@@ -XXX,XX +XXX,XX @@ TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
456
TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
457
TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
458
459
-TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s)
460
-TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d)
461
+TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
462
+TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
463
464
-TRANS(vfclass_s, LSX, gen_vv, gen_helper_vfclass_s)
465
-TRANS(vfclass_d, LSX, gen_vv, gen_helper_vfclass_d)
466
+TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
467
+TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
468
469
-TRANS(vfsqrt_s, LSX, gen_vv, gen_helper_vfsqrt_s)
470
-TRANS(vfsqrt_d, LSX, gen_vv, gen_helper_vfsqrt_d)
471
-TRANS(vfrecip_s, LSX, gen_vv, gen_helper_vfrecip_s)
472
-TRANS(vfrecip_d, LSX, gen_vv, gen_helper_vfrecip_d)
473
-TRANS(vfrsqrt_s, LSX, gen_vv, gen_helper_vfrsqrt_s)
474
-TRANS(vfrsqrt_d, LSX, gen_vv, gen_helper_vfrsqrt_d)
475
+TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
476
+TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
477
+TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
478
+TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
479
+TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
480
+TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
481
482
-TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h)
483
-TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h)
484
-TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s)
485
-TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s)
486
+TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
487
+TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
488
+TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
489
+TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
490
TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
491
TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
492
493
-TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s)
494
-TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d)
495
-TRANS(vfrintrz_s, LSX, gen_vv, gen_helper_vfrintrz_s)
496
-TRANS(vfrintrz_d, LSX, gen_vv, gen_helper_vfrintrz_d)
497
-TRANS(vfrintrp_s, LSX, gen_vv, gen_helper_vfrintrp_s)
498
-TRANS(vfrintrp_d, LSX, gen_vv, gen_helper_vfrintrp_d)
499
-TRANS(vfrintrm_s, LSX, gen_vv, gen_helper_vfrintrm_s)
500
-TRANS(vfrintrm_d, LSX, gen_vv, gen_helper_vfrintrm_d)
501
-TRANS(vfrint_s, LSX, gen_vv, gen_helper_vfrint_s)
502
-TRANS(vfrint_d, LSX, gen_vv, gen_helper_vfrint_d)
503
-
504
-TRANS(vftintrne_w_s, LSX, gen_vv, gen_helper_vftintrne_w_s)
505
-TRANS(vftintrne_l_d, LSX, gen_vv, gen_helper_vftintrne_l_d)
506
-TRANS(vftintrz_w_s, LSX, gen_vv, gen_helper_vftintrz_w_s)
507
-TRANS(vftintrz_l_d, LSX, gen_vv, gen_helper_vftintrz_l_d)
508
-TRANS(vftintrp_w_s, LSX, gen_vv, gen_helper_vftintrp_w_s)
509
-TRANS(vftintrp_l_d, LSX, gen_vv, gen_helper_vftintrp_l_d)
510
-TRANS(vftintrm_w_s, LSX, gen_vv, gen_helper_vftintrm_w_s)
511
-TRANS(vftintrm_l_d, LSX, gen_vv, gen_helper_vftintrm_l_d)
512
-TRANS(vftint_w_s, LSX, gen_vv, gen_helper_vftint_w_s)
513
-TRANS(vftint_l_d, LSX, gen_vv, gen_helper_vftint_l_d)
514
-TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s)
515
-TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d)
516
-TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s)
517
-TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d)
518
+TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
519
+TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
520
+TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
521
+TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
522
+TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
523
+TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
524
+TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
525
+TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
526
+TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
527
+TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
528
+
529
+TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
530
+TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
531
+TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
532
+TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
533
+TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
534
+TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
535
+TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
536
+TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
537
+TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
538
+TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
539
+TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
540
+TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
541
+TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
542
+TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
543
TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
544
TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
545
TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
546
TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
547
TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
548
-TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s)
549
-TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s)
550
-TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s)
551
-TRANS(vftintrzh_l_s, LSX, gen_vv, gen_helper_vftintrzh_l_s)
552
-TRANS(vftintrpl_l_s, LSX, gen_vv, gen_helper_vftintrpl_l_s)
553
-TRANS(vftintrph_l_s, LSX, gen_vv, gen_helper_vftintrph_l_s)
554
-TRANS(vftintrml_l_s, LSX, gen_vv, gen_helper_vftintrml_l_s)
555
-TRANS(vftintrmh_l_s, LSX, gen_vv, gen_helper_vftintrmh_l_s)
556
-TRANS(vftintl_l_s, LSX, gen_vv, gen_helper_vftintl_l_s)
557
-TRANS(vftinth_l_s, LSX, gen_vv, gen_helper_vftinth_l_s)
558
-
559
-TRANS(vffint_s_w, LSX, gen_vv, gen_helper_vffint_s_w)
560
-TRANS(vffint_d_l, LSX, gen_vv, gen_helper_vffint_d_l)
561
-TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu)
562
-TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu)
563
-TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w)
564
-TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w)
565
+TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
566
+TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
567
+TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
568
+TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
569
+TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
570
+TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
571
+TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
572
+TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
573
+TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
574
+TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
575
+
576
+TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
577
+TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
578
+TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
579
+TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
580
+TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
581
+TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
582
TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
583
584
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
585
--
586
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-9-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 58 ++++-----
6
target/loongarch/vec_helper.c | 124 ++++++++++----------
7
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++-
8
3 files changed, 101 insertions(+), 97 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
15
DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
16
DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
17
18
-DEF_HELPER_3(vexth_h_b, void, env, i32, i32)
19
-DEF_HELPER_3(vexth_w_h, void, env, i32, i32)
20
-DEF_HELPER_3(vexth_d_w, void, env, i32, i32)
21
-DEF_HELPER_3(vexth_q_d, void, env, i32, i32)
22
-DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
23
-DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
24
-DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
25
-DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
26
+DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
35
DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
40
-DEF_HELPER_3(vmskltz_b, void, env, i32, i32)
41
-DEF_HELPER_3(vmskltz_h, void, env, i32, i32)
42
-DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
43
-DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
44
-DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
45
-DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
46
+DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
49
+DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
51
+DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
54
55
DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
56
DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
57
DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
58
-DEF_HELPER_3(vextl_q_d, void, env, i32, i32)
59
+DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
60
DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
61
DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
62
DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
63
-DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
64
+DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
65
66
DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
67
DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
68
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
69
DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
70
DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
71
72
-DEF_HELPER_3(vclo_b, void, env, i32, i32)
73
-DEF_HELPER_3(vclo_h, void, env, i32, i32)
74
-DEF_HELPER_3(vclo_w, void, env, i32, i32)
75
-DEF_HELPER_3(vclo_d, void, env, i32, i32)
76
-DEF_HELPER_3(vclz_b, void, env, i32, i32)
77
-DEF_HELPER_3(vclz_h, void, env, i32, i32)
78
-DEF_HELPER_3(vclz_w, void, env, i32, i32)
79
-DEF_HELPER_3(vclz_d, void, env, i32, i32)
80
-
81
-DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
82
-DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
83
-DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
84
-DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
85
+DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
86
+DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
87
+DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
88
+DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
89
+DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
90
+DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
91
+DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
92
+DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
93
+
94
+DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
95
+DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
96
+DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
97
+DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
98
99
DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
100
DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
101
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/target/loongarch/vec_helper.c
104
+++ b/target/loongarch/vec_helper.c
105
@@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_hu, 16, UH)
106
VSAT_U(vsat_wu, 32, UW)
107
VSAT_U(vsat_du, 64, UD)
108
109
-#define VEXTH(NAME, BIT, E1, E2) \
110
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
111
-{ \
112
- int i; \
113
- VReg *Vd = &(env->fpr[vd].vreg); \
114
- VReg *Vj = &(env->fpr[vj].vreg); \
115
- \
116
- for (i = 0; i < LSX_LEN/BIT; i++) { \
117
- Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
118
- } \
119
+#define VEXTH(NAME, BIT, E1, E2) \
120
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
121
+{ \
122
+ int i; \
123
+ VReg *Vd = (VReg *)vd; \
124
+ VReg *Vj = (VReg *)vj; \
125
+ \
126
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
127
+ Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
128
+ } \
129
}
130
131
-void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
132
+void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
133
{
134
- VReg *Vd = &(env->fpr[vd].vreg);
135
- VReg *Vj = &(env->fpr[vj].vreg);
136
+ VReg *Vd = (VReg *)vd;
137
+ VReg *Vj = (VReg *)vj;
138
139
Vd->Q(0) = int128_makes64(Vj->D(1));
140
}
141
142
-void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
143
+void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
144
{
145
- VReg *Vd = &(env->fpr[vd].vreg);
146
- VReg *Vj = &(env->fpr[vj].vreg);
147
+ VReg *Vd = (VReg *)vd;
148
+ VReg *Vj = (VReg *)vj;
149
150
Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
151
}
152
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_b(int64_t val)
153
return c >> 56;
154
}
155
156
-void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
157
+void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
158
{
159
uint16_t temp = 0;
160
- VReg *Vd = &(env->fpr[vd].vreg);
161
- VReg *Vj = &(env->fpr[vj].vreg);
162
+ VReg *Vd = (VReg *)vd;
163
+ VReg *Vj = (VReg *)vj;
164
165
temp = do_vmskltz_b(Vj->D(0));
166
temp |= (do_vmskltz_b(Vj->D(1)) << 8);
167
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_h(int64_t val)
168
return c >> 60;
169
}
170
171
-void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
172
+void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
173
{
174
uint16_t temp = 0;
175
- VReg *Vd = &(env->fpr[vd].vreg);
176
- VReg *Vj = &(env->fpr[vj].vreg);
177
+ VReg *Vd = (VReg *)vd;
178
+ VReg *Vj = (VReg *)vj;
179
180
temp = do_vmskltz_h(Vj->D(0));
181
temp |= (do_vmskltz_h(Vj->D(1)) << 4);
182
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_w(int64_t val)
183
return c >> 62;
184
}
185
186
-void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
187
+void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
188
{
189
uint16_t temp = 0;
190
- VReg *Vd = &(env->fpr[vd].vreg);
191
- VReg *Vj = &(env->fpr[vj].vreg);
192
+ VReg *Vd = (VReg *)vd;
193
+ VReg *Vj = (VReg *)vj;
194
195
temp = do_vmskltz_w(Vj->D(0));
196
temp |= (do_vmskltz_w(Vj->D(1)) << 2);
197
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_d(int64_t val)
198
{
199
return (uint64_t)val >> 63;
200
}
201
-void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
202
+void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
203
{
204
uint16_t temp = 0;
205
- VReg *Vd = &(env->fpr[vd].vreg);
206
- VReg *Vj = &(env->fpr[vj].vreg);
207
+ VReg *Vd = (VReg *)vd;
208
+ VReg *Vj = (VReg *)vj;
209
210
temp = do_vmskltz_d(Vj->D(0));
211
temp |= (do_vmskltz_d(Vj->D(1)) << 1);
212
@@ -XXX,XX +XXX,XX @@ void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
213
Vd->D(1) = 0;
214
}
215
216
-void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
217
+void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
218
{
219
uint16_t temp = 0;
220
- VReg *Vd = &(env->fpr[vd].vreg);
221
- VReg *Vj = &(env->fpr[vj].vreg);
222
+ VReg *Vd = (VReg *)vd;
223
+ VReg *Vj = (VReg *)vj;
224
225
temp = do_vmskltz_b(Vj->D(0));
226
temp |= (do_vmskltz_b(Vj->D(1)) << 8);
227
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskez_b(uint64_t a)
228
return c >> 56;
229
}
230
231
-void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
232
+void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
233
{
234
uint16_t temp = 0;
235
- VReg *Vd = &(env->fpr[vd].vreg);
236
- VReg *Vj = &(env->fpr[vj].vreg);
237
+ VReg *Vd = (VReg *)vd;
238
+ VReg *Vj = (VReg *)vj;
239
240
temp = do_vmskez_b(Vj->D(0));
241
temp |= (do_vmskez_b(Vj->D(1)) << 8);
242
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
243
*Vd = temp; \
244
}
245
246
-void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
247
+void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
248
{
249
- VReg *Vd = &(env->fpr[vd].vreg);
250
- VReg *Vj = &(env->fpr[vj].vreg);
251
+ VReg *Vd = (VReg *)vd;
252
+ VReg *Vj = (VReg *)vj;
253
254
Vd->Q(0) = int128_makes64(Vj->D(0));
255
}
256
257
-void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
258
+void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
259
{
260
- VReg *Vd = &(env->fpr[vd].vreg);
261
- VReg *Vj = &(env->fpr[vj].vreg);
262
+ VReg *Vd = (VReg *)vd;
263
+ VReg *Vj = (VReg *)vj;
264
265
Vd->Q(0) = int128_make64(Vj->D(0));
266
}
267
@@ -XXX,XX +XXX,XX @@ VSSRARNUI(vssrarni_bu_h, 16, B, H)
268
VSSRARNUI(vssrarni_hu_w, 32, H, W)
269
VSSRARNUI(vssrarni_wu_d, 64, W, D)
270
271
-#define DO_2OP(NAME, BIT, E, DO_OP) \
272
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
273
-{ \
274
- int i; \
275
- VReg *Vd = &(env->fpr[vd].vreg); \
276
- VReg *Vj = &(env->fpr[vj].vreg); \
277
- \
278
- for (i = 0; i < LSX_LEN/BIT; i++) \
279
- { \
280
- Vd->E(i) = DO_OP(Vj->E(i)); \
281
- } \
282
+#define DO_2OP(NAME, BIT, E, DO_OP) \
283
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
284
+{ \
285
+ int i; \
286
+ VReg *Vd = (VReg *)vd; \
287
+ VReg *Vj = (VReg *)vj; \
288
+ \
289
+ for (i = 0; i < LSX_LEN/BIT; i++) \
290
+ { \
291
+ Vd->E(i) = DO_OP(Vj->E(i)); \
292
+ } \
293
}
294
295
#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
296
@@ -XXX,XX +XXX,XX @@ DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
297
DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
298
DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
299
300
-#define VPCNT(NAME, BIT, E, FN) \
301
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
302
-{ \
303
- int i; \
304
- VReg *Vd = &(env->fpr[vd].vreg); \
305
- VReg *Vj = &(env->fpr[vj].vreg); \
306
- \
307
- for (i = 0; i < LSX_LEN/BIT; i++) \
308
- { \
309
- Vd->E(i) = FN(Vj->E(i)); \
310
- } \
311
+#define VPCNT(NAME, BIT, E, FN) \
312
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
313
+{ \
314
+ int i; \
315
+ VReg *Vd = (VReg *)vd; \
316
+ VReg *Vj = (VReg *)vj; \
317
+ \
318
+ for (i = 0; i < LSX_LEN/BIT; i++) \
319
+ { \
320
+ Vd->E(i) = FN(Vj->E(i)); \
321
+ } \
322
}
323
324
VPCNT(vpcnt_b, 8, UB, ctpop8)
325
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
326
index XXXXXXX..XXXXXXX 100644
327
--- a/target/loongarch/insn_trans/trans_vec.c.inc
328
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
329
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
330
return gen_vv_ptr_vl(ctx, a, 16, fn);
331
}
332
333
-static bool gen_vv(DisasContext *ctx, arg_vv *a,
334
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
335
+static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
336
+ gen_helper_gvec_2 *fn)
337
{
338
- TCGv_i32 vd = tcg_constant_i32(a->vd);
339
- TCGv_i32 vj = tcg_constant_i32(a->vj);
340
+ tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
341
+ vec_full_offset(a->vj),
342
+ oprsz, ctx->vl / 8, 0, fn);
343
+ return true;
344
+}
345
346
+static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
347
+{
348
CHECK_SXE;
349
- func(cpu_env, vd, vj);
350
- return true;
351
+ return gen_vv_vl(ctx, a, 16, fn);
352
}
353
354
static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
355
--
356
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-10-gaosong@loongson.cn>
4
---
5
target/loongarch/helper.h | 146 +++----
6
target/loongarch/vec_helper.c | 445 +++++++++-----------
7
target/loongarch/insn_trans/trans_vec.c.inc | 18 +-
8
3 files changed, 291 insertions(+), 318 deletions(-)
1
9
10
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/helper.h
13
+++ b/target/loongarch/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
15
16
DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
17
18
-DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
19
-DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
20
-DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
21
+DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
22
+DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
23
+DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
24
DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
25
-DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
26
-DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
27
-DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
28
+DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
29
+DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
30
+DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
31
DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
32
33
DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
-DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
38
-DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
39
-DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
40
-DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
41
+DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
42
+DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
43
+DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
44
+DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
45
46
DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
50
-DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
51
-DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
52
-DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
53
-DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
54
+DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
55
+DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
56
+DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
57
+DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
58
59
DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
60
DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
61
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
62
DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
63
DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
64
65
-DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
66
-DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
67
-DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32)
68
-DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32)
69
-DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
70
-DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
71
-DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
72
-DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
73
+DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
74
+DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
75
+DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
76
+DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
77
+DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
78
+DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
79
+DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
80
+DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
81
82
DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
83
DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
84
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
85
DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
86
DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
87
88
-DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
89
-DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
90
-DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32)
91
-DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32)
92
-DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
93
-DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
94
-DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
95
-DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
96
+DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
97
+DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
98
+DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
99
+DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
100
+DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
101
+DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
102
+DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
103
+DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
104
105
DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
106
DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
107
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
108
DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
109
DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
110
111
-DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
112
-DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
113
-DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
114
-DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
115
-DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
116
-DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
117
-DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
118
-DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
119
-DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
120
-DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
121
-DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
122
-DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
123
-DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
124
-DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
125
-DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
126
-DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
127
+DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
128
+DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
129
+DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
130
+DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
131
+DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
132
+DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
133
+DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
134
+DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
135
+DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
136
+DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
137
+DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
138
+DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
139
+DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
140
+DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
141
+DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
142
+DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
143
144
DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
145
DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
146
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
147
DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
148
DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
149
150
-DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
151
-DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
152
-DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32)
153
-DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32)
154
-DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32)
155
-DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32)
156
-DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32)
157
-DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32)
158
-DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32)
159
-DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32)
160
-DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32)
161
-DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32)
162
-DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
163
-DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
164
-DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
165
-DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
166
+DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
167
+DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
168
+DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
169
+DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
170
+DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
171
+DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
172
+DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
173
+DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
174
+DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
175
+DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
176
+DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
177
+DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
178
+DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
179
+DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
180
+DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
181
+DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
182
183
DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
184
DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
185
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
186
187
DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
188
DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
189
-DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
190
-DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
191
+DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
192
+DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
193
194
DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
195
DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
196
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
197
DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
198
DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
199
DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
200
-DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
201
-DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
202
-DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
203
-DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32)
204
+DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
205
+DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
206
+DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
207
+DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
208
209
-DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32)
210
+DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
211
212
-DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32)
213
-DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32)
214
-DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32)
215
-DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32)
216
+DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
217
+DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
218
+DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
219
+DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
220
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
221
index XXXXXXX..XXXXXXX 100644
222
--- a/target/loongarch/vec_helper.c
223
+++ b/target/loongarch/vec_helper.c
224
@@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
225
}
226
}
227
228
-#define VSLLWIL(NAME, BIT, E1, E2) \
229
-void HELPER(NAME)(CPULoongArchState *env, \
230
- uint32_t vd, uint32_t vj, uint32_t imm) \
231
-{ \
232
- int i; \
233
- VReg temp; \
234
- VReg *Vd = &(env->fpr[vd].vreg); \
235
- VReg *Vj = &(env->fpr[vj].vreg); \
236
- typedef __typeof(temp.E1(0)) TD; \
237
- \
238
- temp.D(0) = 0; \
239
- temp.D(1) = 0; \
240
- for (i = 0; i < LSX_LEN/BIT; i++) { \
241
- temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
242
- } \
243
- *Vd = temp; \
244
+#define VSLLWIL(NAME, BIT, E1, E2) \
245
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
246
+{ \
247
+ int i; \
248
+ VReg temp; \
249
+ VReg *Vd = (VReg *)vd; \
250
+ VReg *Vj = (VReg *)vj; \
251
+ typedef __typeof(temp.E1(0)) TD; \
252
+ \
253
+ temp.D(0) = 0; \
254
+ temp.D(1) = 0; \
255
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
256
+ temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
257
+ } \
258
+ *Vd = temp; \
259
}
260
261
void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
262
@@ -XXX,XX +XXX,XX @@ VSRLR(vsrlr_h, 16, uint16_t, H)
263
VSRLR(vsrlr_w, 32, uint32_t, W)
264
VSRLR(vsrlr_d, 64, uint64_t, D)
265
266
-#define VSRLRI(NAME, BIT, E) \
267
-void HELPER(NAME)(CPULoongArchState *env, \
268
- uint32_t vd, uint32_t vj, uint32_t imm) \
269
-{ \
270
- int i; \
271
- VReg *Vd = &(env->fpr[vd].vreg); \
272
- VReg *Vj = &(env->fpr[vj].vreg); \
273
- \
274
- for (i = 0; i < LSX_LEN/BIT; i++) { \
275
- Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
276
- } \
277
+#define VSRLRI(NAME, BIT, E) \
278
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
279
+{ \
280
+ int i; \
281
+ VReg *Vd = (VReg *)vd; \
282
+ VReg *Vj = (VReg *)vj; \
283
+ \
284
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
285
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
286
+ } \
287
}
288
289
VSRLRI(vsrlri_b, 8, B)
290
@@ -XXX,XX +XXX,XX @@ VSRAR(vsrar_h, 16, uint16_t, H)
291
VSRAR(vsrar_w, 32, uint32_t, W)
292
VSRAR(vsrar_d, 64, uint64_t, D)
293
294
-#define VSRARI(NAME, BIT, E) \
295
-void HELPER(NAME)(CPULoongArchState *env, \
296
- uint32_t vd, uint32_t vj, uint32_t imm) \
297
-{ \
298
- int i; \
299
- VReg *Vd = &(env->fpr[vd].vreg); \
300
- VReg *Vj = &(env->fpr[vj].vreg); \
301
- \
302
- for (i = 0; i < LSX_LEN/BIT; i++) { \
303
- Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
304
- } \
305
+#define VSRARI(NAME, BIT, E) \
306
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
307
+{ \
308
+ int i; \
309
+ VReg *Vd = (VReg *)vd; \
310
+ VReg *Vj = (VReg *)vj; \
311
+ \
312
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
313
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
314
+ } \
315
}
316
317
VSRARI(vsrari_b, 8, B)
318
@@ -XXX,XX +XXX,XX @@ VSRAN(vsran_b_h, 16, uint16_t, B, H)
319
VSRAN(vsran_h_w, 32, uint32_t, H, W)
320
VSRAN(vsran_w_d, 64, uint64_t, W, D)
321
322
-#define VSRLNI(NAME, BIT, T, E1, E2) \
323
-void HELPER(NAME)(CPULoongArchState *env, \
324
- uint32_t vd, uint32_t vj, uint32_t imm) \
325
-{ \
326
- int i, max; \
327
- VReg temp; \
328
- VReg *Vd = &(env->fpr[vd].vreg); \
329
- VReg *Vj = &(env->fpr[vj].vreg); \
330
- \
331
- temp.D(0) = 0; \
332
- temp.D(1) = 0; \
333
- max = LSX_LEN/BIT; \
334
- for (i = 0; i < max; i++) { \
335
- temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
336
- temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
337
- } \
338
- *Vd = temp; \
339
-}
340
-
341
-void HELPER(vsrlni_d_q)(CPULoongArchState *env,
342
- uint32_t vd, uint32_t vj, uint32_t imm)
343
+#define VSRLNI(NAME, BIT, T, E1, E2) \
344
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
345
+{ \
346
+ int i, max; \
347
+ VReg temp; \
348
+ VReg *Vd = (VReg *)vd; \
349
+ VReg *Vj = (VReg *)vj; \
350
+ \
351
+ temp.D(0) = 0; \
352
+ temp.D(1) = 0; \
353
+ max = LSX_LEN/BIT; \
354
+ for (i = 0; i < max; i++) { \
355
+ temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
356
+ temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
357
+ } \
358
+ *Vd = temp; \
359
+}
360
+
361
+void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
362
{
363
VReg temp;
364
- VReg *Vd = &(env->fpr[vd].vreg);
365
- VReg *Vj = &(env->fpr[vj].vreg);
366
+ VReg *Vd = (VReg *)vd;
367
+ VReg *Vj = (VReg *)vj;
368
369
temp.D(0) = 0;
370
temp.D(1) = 0;
371
@@ -XXX,XX +XXX,XX @@ VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
372
VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
373
VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
374
375
-#define VSRANI(NAME, BIT, E1, E2) \
376
-void HELPER(NAME)(CPULoongArchState *env, \
377
- uint32_t vd, uint32_t vj, uint32_t imm) \
378
-{ \
379
- int i, max; \
380
- VReg temp; \
381
- VReg *Vd = &(env->fpr[vd].vreg); \
382
- VReg *Vj = &(env->fpr[vj].vreg); \
383
- \
384
- temp.D(0) = 0; \
385
- temp.D(1) = 0; \
386
- max = LSX_LEN/BIT; \
387
- for (i = 0; i < max; i++) { \
388
- temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
389
- temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
390
- } \
391
- *Vd = temp; \
392
-}
393
-
394
-void HELPER(vsrani_d_q)(CPULoongArchState *env,
395
- uint32_t vd, uint32_t vj, uint32_t imm)
396
+#define VSRANI(NAME, BIT, E1, E2) \
397
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
398
+{ \
399
+ int i, max; \
400
+ VReg temp; \
401
+ VReg *Vd = (VReg *)vd; \
402
+ VReg *Vj = (VReg *)vj; \
403
+ \
404
+ temp.D(0) = 0; \
405
+ temp.D(1) = 0; \
406
+ max = LSX_LEN/BIT; \
407
+ for (i = 0; i < max; i++) { \
408
+ temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
409
+ temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
410
+ } \
411
+ *Vd = temp; \
412
+}
413
+
414
+void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
415
{
416
VReg temp;
417
- VReg *Vd = &(env->fpr[vd].vreg);
418
- VReg *Vj = &(env->fpr[vj].vreg);
419
+ VReg *Vd = (VReg *)vd;
420
+ VReg *Vj = (VReg *)vj;
421
422
temp.D(0) = 0;
423
temp.D(1) = 0;
424
@@ -XXX,XX +XXX,XX @@ VSRARN(vsrarn_b_h, 16, uint8_t, B, H)
425
VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
426
VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
427
428
-#define VSRLRNI(NAME, BIT, E1, E2) \
429
-void HELPER(NAME)(CPULoongArchState *env, \
430
- uint32_t vd, uint32_t vj, uint32_t imm) \
431
-{ \
432
- int i, max; \
433
- VReg temp; \
434
- VReg *Vd = &(env->fpr[vd].vreg); \
435
- VReg *Vj = &(env->fpr[vj].vreg); \
436
- \
437
- temp.D(0) = 0; \
438
- temp.D(1) = 0; \
439
- max = LSX_LEN/BIT; \
440
- for (i = 0; i < max; i++) { \
441
- temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
442
- temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
443
- } \
444
- *Vd = temp; \
445
-}
446
-
447
-void HELPER(vsrlrni_d_q)(CPULoongArchState *env,
448
- uint32_t vd, uint32_t vj, uint32_t imm)
449
+#define VSRLRNI(NAME, BIT, E1, E2) \
450
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
451
+{ \
452
+ int i, max; \
453
+ VReg temp; \
454
+ VReg *Vd = (VReg *)vd; \
455
+ VReg *Vj = (VReg *)vj; \
456
+ \
457
+ temp.D(0) = 0; \
458
+ temp.D(1) = 0; \
459
+ max = LSX_LEN/BIT; \
460
+ for (i = 0; i < max; i++) { \
461
+ temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
462
+ temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
463
+ } \
464
+ *Vd = temp; \
465
+}
466
+
467
+void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
468
{
469
VReg temp;
470
- VReg *Vd = &(env->fpr[vd].vreg);
471
- VReg *Vj = &(env->fpr[vj].vreg);
472
+ VReg *Vd = (VReg *)vd;
473
+ VReg *Vj = (VReg *)vj;
474
Int128 r1, r2;
475
476
if (imm == 0) {
477
@@ -XXX,XX +XXX,XX @@ VSRLRNI(vsrlrni_b_h, 16, B, H)
478
VSRLRNI(vsrlrni_h_w, 32, H, W)
479
VSRLRNI(vsrlrni_w_d, 64, W, D)
480
481
-#define VSRARNI(NAME, BIT, E1, E2) \
482
-void HELPER(NAME)(CPULoongArchState *env, \
483
- uint32_t vd, uint32_t vj, uint32_t imm) \
484
-{ \
485
- int i, max; \
486
- VReg temp; \
487
- VReg *Vd = &(env->fpr[vd].vreg); \
488
- VReg *Vj = &(env->fpr[vj].vreg); \
489
- \
490
- temp.D(0) = 0; \
491
- temp.D(1) = 0; \
492
- max = LSX_LEN/BIT; \
493
- for (i = 0; i < max; i++) { \
494
- temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
495
- temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
496
- } \
497
- *Vd = temp; \
498
-}
499
-
500
-void HELPER(vsrarni_d_q)(CPULoongArchState *env,
501
- uint32_t vd, uint32_t vj, uint32_t imm)
502
+#define VSRARNI(NAME, BIT, E1, E2) \
503
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
504
+{ \
505
+ int i, max; \
506
+ VReg temp; \
507
+ VReg *Vd = (VReg *)vd; \
508
+ VReg *Vj = (VReg *)vj; \
509
+ \
510
+ temp.D(0) = 0; \
511
+ temp.D(1) = 0; \
512
+ max = LSX_LEN/BIT; \
513
+ for (i = 0; i < max; i++) { \
514
+ temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
515
+ temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
516
+ } \
517
+ *Vd = temp; \
518
+}
519
+
520
+void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
521
{
522
VReg temp;
523
- VReg *Vd = &(env->fpr[vd].vreg);
524
- VReg *Vj = &(env->fpr[vj].vreg);
525
+ VReg *Vd = (VReg *)vd;
526
+ VReg *Vj = (VReg *)vj;
527
Int128 r1, r2;
528
529
if (imm == 0) {
530
@@ -XXX,XX +XXX,XX @@ VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
531
VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
532
533
#define VSSRLNI(NAME, BIT, E1, E2) \
534
-void HELPER(NAME)(CPULoongArchState *env, \
535
- uint32_t vd, uint32_t vj, uint32_t imm) \
536
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
537
{ \
538
int i; \
539
VReg temp; \
540
- VReg *Vd = &(env->fpr[vd].vreg); \
541
- VReg *Vj = &(env->fpr[vj].vreg); \
542
+ VReg *Vd = (VReg *)vd; \
543
+ VReg *Vj = (VReg *)vj; \
544
\
545
for (i = 0; i < LSX_LEN/BIT; i++) { \
546
temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
547
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
548
*Vd = temp; \
549
}
550
551
-void HELPER(vssrlni_d_q)(CPULoongArchState *env,
552
- uint32_t vd, uint32_t vj, uint32_t imm)
553
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
554
{
555
Int128 shft_res1, shft_res2, mask;
556
- VReg *Vd = &(env->fpr[vd].vreg);
557
- VReg *Vj = &(env->fpr[vj].vreg);
558
+ VReg *Vd = (VReg *)vd;
559
+ VReg *Vj = (VReg *)vj;
560
561
if (imm == 0) {
562
shft_res1 = Vj->Q(0);
563
@@ -XXX,XX +XXX,XX @@ VSSRLNI(vssrlni_h_w, 32, H, W)
564
VSSRLNI(vssrlni_w_d, 64, W, D)
565
566
#define VSSRANI(NAME, BIT, E1, E2) \
567
-void HELPER(NAME)(CPULoongArchState *env, \
568
- uint32_t vd, uint32_t vj, uint32_t imm) \
569
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
570
{ \
571
int i; \
572
VReg temp; \
573
- VReg *Vd = &(env->fpr[vd].vreg); \
574
- VReg *Vj = &(env->fpr[vj].vreg); \
575
+ VReg *Vd = (VReg *)vd; \
576
+ VReg *Vj = (VReg *)vj; \
577
\
578
for (i = 0; i < LSX_LEN/BIT; i++) { \
579
temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
580
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
581
*Vd = temp; \
582
}
583
584
-void HELPER(vssrani_d_q)(CPULoongArchState *env,
585
- uint32_t vd, uint32_t vj, uint32_t imm)
586
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
587
{
588
Int128 shft_res1, shft_res2, mask, min;
589
- VReg *Vd = &(env->fpr[vd].vreg);
590
- VReg *Vj = &(env->fpr[vj].vreg);
591
+ VReg *Vd = (VReg *)vd;
592
+ VReg *Vj = (VReg *)vj;
593
594
if (imm == 0) {
595
shft_res1 = Vj->Q(0);
596
@@ -XXX,XX +XXX,XX @@ VSSRANI(vssrani_h_w, 32, H, W)
597
VSSRANI(vssrani_w_d, 64, W, D)
598
599
#define VSSRLNUI(NAME, BIT, E1, E2) \
600
-void HELPER(NAME)(CPULoongArchState *env, \
601
- uint32_t vd, uint32_t vj, uint32_t imm) \
602
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
603
{ \
604
int i; \
605
VReg temp; \
606
- VReg *Vd = &(env->fpr[vd].vreg); \
607
- VReg *Vj = &(env->fpr[vj].vreg); \
608
+ VReg *Vd = (VReg *)vd; \
609
+ VReg *Vj = (VReg *)vj; \
610
\
611
for (i = 0; i < LSX_LEN/BIT; i++) { \
612
temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
613
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
614
*Vd = temp; \
615
}
616
617
-void HELPER(vssrlni_du_q)(CPULoongArchState *env,
618
- uint32_t vd, uint32_t vj, uint32_t imm)
619
+void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
620
{
621
Int128 shft_res1, shft_res2, mask;
622
- VReg *Vd = &(env->fpr[vd].vreg);
623
- VReg *Vj = &(env->fpr[vj].vreg);
624
+ VReg *Vd = (VReg *)vd;
625
+ VReg *Vj = (VReg *)vj;
626
627
if (imm == 0) {
628
shft_res1 = Vj->Q(0);
629
@@ -XXX,XX +XXX,XX @@ VSSRLNUI(vssrlni_hu_w, 32, H, W)
630
VSSRLNUI(vssrlni_wu_d, 64, W, D)
631
632
#define VSSRANUI(NAME, BIT, E1, E2) \
633
-void HELPER(NAME)(CPULoongArchState *env, \
634
- uint32_t vd, uint32_t vj, uint32_t imm) \
635
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
636
{ \
637
int i; \
638
VReg temp; \
639
- VReg *Vd = &(env->fpr[vd].vreg); \
640
- VReg *Vj = &(env->fpr[vj].vreg); \
641
+ VReg *Vd = (VReg *)vd; \
642
+ VReg *Vj = (VReg *)vj; \
643
\
644
for (i = 0; i < LSX_LEN/BIT; i++) { \
645
temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
646
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
647
*Vd = temp; \
648
}
649
650
-void HELPER(vssrani_du_q)(CPULoongArchState *env,
651
- uint32_t vd, uint32_t vj, uint32_t imm)
652
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
653
{
654
Int128 shft_res1, shft_res2, mask;
655
- VReg *Vd = &(env->fpr[vd].vreg);
656
- VReg *Vj = &(env->fpr[vj].vreg);
657
+ VReg *Vd = (VReg *)vd;
658
+ VReg *Vj = (VReg *)vj;
659
660
if (imm == 0) {
661
shft_res1 = Vj->Q(0);
662
@@ -XXX,XX +XXX,XX @@ VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
663
VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
664
665
#define VSSRLRNI(NAME, BIT, E1, E2) \
666
-void HELPER(NAME)(CPULoongArchState *env, \
667
- uint32_t vd, uint32_t vj, uint32_t imm) \
668
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
669
{ \
670
int i; \
671
VReg temp; \
672
- VReg *Vd = &(env->fpr[vd].vreg); \
673
- VReg *Vj = &(env->fpr[vj].vreg); \
674
+ VReg *Vd = (VReg *)vd; \
675
+ VReg *Vj = (VReg *)vj; \
676
\
677
for (i = 0; i < LSX_LEN/BIT; i++) { \
678
temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
679
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
680
}
681
682
#define VSSRLRNI_Q(NAME, sh) \
683
-void HELPER(NAME)(CPULoongArchState *env, \
684
- uint32_t vd, uint32_t vj, uint32_t imm) \
685
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
686
{ \
687
Int128 shft_res1, shft_res2, mask, r1, r2; \
688
- VReg *Vd = &(env->fpr[vd].vreg); \
689
- VReg *Vj = &(env->fpr[vj].vreg); \
690
+ VReg *Vd = (VReg *)vd; \
691
+ VReg *Vj = (VReg *)vj; \
692
\
693
if (imm == 0) { \
694
shft_res1 = Vj->Q(0); \
695
@@ -XXX,XX +XXX,XX @@ VSSRLRNI(vssrlrni_w_d, 64, W, D)
696
VSSRLRNI_Q(vssrlrni_d_q, 63)
697
698
#define VSSRARNI(NAME, BIT, E1, E2) \
699
-void HELPER(NAME)(CPULoongArchState *env, \
700
- uint32_t vd, uint32_t vj, uint32_t imm) \
701
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
702
{ \
703
int i; \
704
VReg temp; \
705
- VReg *Vd = &(env->fpr[vd].vreg); \
706
- VReg *Vj = &(env->fpr[vj].vreg); \
707
+ VReg *Vd = (VReg *)vd; \
708
+ VReg *Vj = (VReg *)vj; \
709
\
710
for (i = 0; i < LSX_LEN/BIT; i++) { \
711
temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
712
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env,
713
*Vd = temp; \
714
}
715
716
-void HELPER(vssrarni_d_q)(CPULoongArchState *env,
717
- uint32_t vd, uint32_t vj, uint32_t imm)
718
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
719
{
720
Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
721
- VReg *Vd = &(env->fpr[vd].vreg);
722
- VReg *Vj = &(env->fpr[vj].vreg);
723
+ VReg *Vd = (VReg *)vd;
724
+ VReg *Vj = (VReg *)vj;
725
726
if (imm == 0) {
727
shft_res1 = Vj->Q(0);
728
@@ -XXX,XX +XXX,XX @@ VSSRARNI(vssrarni_h_w, 32, H, W)
729
VSSRARNI(vssrarni_w_d, 64, W, D)
730
731
#define VSSRLRNUI(NAME, BIT, E1, E2) \
732
-void HELPER(NAME)(CPULoongArchState *env, \
733
- uint32_t vd, uint32_t vj, uint32_t imm) \
734
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
735
{ \
736
int i; \
737
VReg temp; \
738
- VReg *Vd = &(env->fpr[vd].vreg); \
739
- VReg *Vj = &(env->fpr[vj].vreg); \
740
+ VReg *Vd = (VReg *)vd; \
741
+ VReg *Vj = (VReg *)vj; \
742
\
743
for (i = 0; i < LSX_LEN/BIT; i++) { \
744
temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \
745
@@ -XXX,XX +XXX,XX @@ VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
746
VSSRLRNI_Q(vssrlrni_du_q, 64)
747
748
#define VSSRARNUI(NAME, BIT, E1, E2) \
749
-void HELPER(NAME)(CPULoongArchState *env, \
750
- uint32_t vd, uint32_t vj, uint32_t imm) \
751
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
752
{ \
753
int i; \
754
VReg temp; \
755
- VReg *Vd = &(env->fpr[vd].vreg); \
756
- VReg *Vj = &(env->fpr[vj].vreg); \
757
+ VReg *Vd = (VReg *)vd; \
758
+ VReg *Vj = (VReg *)vj; \
759
\
760
for (i = 0; i < LSX_LEN/BIT; i++) { \
761
temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \
762
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
763
*Vd = temp; \
764
}
765
766
-void HELPER(vssrarni_du_q)(CPULoongArchState *env,
767
- uint32_t vd, uint32_t vj, uint32_t imm)
768
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
769
{
770
Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
771
- VReg *Vd = &(env->fpr[vd].vreg);
772
- VReg *Vj = &(env->fpr[vj].vreg);
773
+ VReg *Vd = (VReg *)vd;
774
+ VReg *Vj = (VReg *)vj;
775
776
if (imm == 0) {
777
shft_res1 = Vj->Q(0);
778
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
779
VFRSTP(vfrstp_b, 8, 0xf, B)
780
VFRSTP(vfrstp_h, 16, 0x7, H)
781
782
-#define VFRSTPI(NAME, BIT, E) \
783
-void HELPER(NAME)(CPULoongArchState *env, \
784
- uint32_t vd, uint32_t vj, uint32_t imm) \
785
-{ \
786
- int i, m; \
787
- VReg *Vd = &(env->fpr[vd].vreg); \
788
- VReg *Vj = &(env->fpr[vj].vreg); \
789
- \
790
- for (i = 0; i < LSX_LEN/BIT; i++) { \
791
- if (Vj->E(i) < 0) { \
792
- break; \
793
- } \
794
- } \
795
- m = imm % (LSX_LEN/BIT); \
796
- Vd->E(m) = i; \
797
+#define VFRSTPI(NAME, BIT, E) \
798
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
799
+{ \
800
+ int i, m; \
801
+ VReg *Vd = (VReg *)vd; \
802
+ VReg *Vj = (VReg *)vj; \
803
+ \
804
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
805
+ if (Vj->E(i) < 0) { \
806
+ break; \
807
+ } \
808
+ } \
809
+ m = imm % (LSX_LEN/BIT); \
810
+ Vd->E(m) = i; \
811
}
812
813
VFRSTPI(vfrstpi_b, 8, B)
814
@@ -XXX,XX +XXX,XX @@ VSHUF(vshuf_h, 16, H)
815
VSHUF(vshuf_w, 32, W)
816
VSHUF(vshuf_d, 64, D)
817
818
-#define VSHUF4I(NAME, BIT, E) \
819
-void HELPER(NAME)(CPULoongArchState *env, \
820
- uint32_t vd, uint32_t vj, uint32_t imm) \
821
-{ \
822
- int i; \
823
- VReg temp; \
824
- VReg *Vd = &(env->fpr[vd].vreg); \
825
- VReg *Vj = &(env->fpr[vj].vreg); \
826
- \
827
- for (i = 0; i < LSX_LEN/BIT; i++) { \
828
- temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
829
- (2 * ((i) & 0x03))) & 0x03)); \
830
- } \
831
- *Vd = temp; \
832
+#define VSHUF4I(NAME, BIT, E) \
833
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
834
+{ \
835
+ int i; \
836
+ VReg temp; \
837
+ VReg *Vd = (VReg *)vd; \
838
+ VReg *Vj = (VReg *)vj; \
839
+ \
840
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
841
+ temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
842
+ (2 * ((i) & 0x03))) & 0x03)); \
843
+ } \
844
+ *Vd = temp; \
845
}
846
847
VSHUF4I(vshuf4i_b, 8, B)
848
VSHUF4I(vshuf4i_h, 16, H)
849
VSHUF4I(vshuf4i_w, 32, W)
850
851
-void HELPER(vshuf4i_d)(CPULoongArchState *env,
852
- uint32_t vd, uint32_t vj, uint32_t imm)
853
+void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
854
{
855
- VReg *Vd = &(env->fpr[vd].vreg);
856
- VReg *Vj = &(env->fpr[vj].vreg);
857
+ VReg *Vd = (VReg *)vd;
858
+ VReg *Vj = (VReg *)vj;
859
860
VReg temp;
861
temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
862
@@ -XXX,XX +XXX,XX @@ void HELPER(vshuf4i_d)(CPULoongArchState *env,
863
*Vd = temp;
864
}
865
866
-void HELPER(vpermi_w)(CPULoongArchState *env,
867
- uint32_t vd, uint32_t vj, uint32_t imm)
868
+void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
869
{
870
VReg temp;
871
- VReg *Vd = &(env->fpr[vd].vreg);
872
- VReg *Vj = &(env->fpr[vj].vreg);
873
+ VReg *Vd = (VReg *)vd;
874
+ VReg *Vj = (VReg *)vj;
875
876
temp.W(0) = Vj->W(imm & 0x3);
877
temp.W(1) = Vj->W((imm >> 2) & 0x3);
878
@@ -XXX,XX +XXX,XX @@ void HELPER(vpermi_w)(CPULoongArchState *env,
879
*Vd = temp;
880
}
881
882
-#define VEXTRINS(NAME, BIT, E, MASK) \
883
-void HELPER(NAME)(CPULoongArchState *env, \
884
- uint32_t vd, uint32_t vj, uint32_t imm) \
885
-{ \
886
- int ins, extr; \
887
- VReg *Vd = &(env->fpr[vd].vreg); \
888
- VReg *Vj = &(env->fpr[vj].vreg); \
889
- \
890
- ins = (imm >> 4) & MASK; \
891
- extr = imm & MASK; \
892
- Vd->E(ins) = Vj->E(extr); \
893
+#define VEXTRINS(NAME, BIT, E, MASK) \
894
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
895
+{ \
896
+ int ins, extr; \
897
+ VReg *Vd = (VReg *)vd; \
898
+ VReg *Vj = (VReg *)vj; \
899
+ \
900
+ ins = (imm >> 4) & MASK; \
901
+ extr = imm & MASK; \
902
+ Vd->E(ins) = Vj->E(extr); \
903
}
904
905
VEXTRINS(vextrins_b, 8, B, 0xf)
906
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
907
index XXXXXXX..XXXXXXX 100644
908
--- a/target/loongarch/insn_trans/trans_vec.c.inc
909
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
910
@@ -XXX,XX +XXX,XX @@ static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
911
return gen_vv_vl(ctx, a, 16, fn);
912
}
913
914
-static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
915
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
916
+static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
917
+ gen_helper_gvec_2i *fn)
918
{
919
- TCGv_i32 vd = tcg_constant_i32(a->vd);
920
- TCGv_i32 vj = tcg_constant_i32(a->vj);
921
- TCGv_i32 imm = tcg_constant_i32(a->imm);
922
+ tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
923
+ vec_full_offset(a->vj),
924
+ tcg_constant_i64(a->imm),
925
+ oprsz, ctx->vl / 8, 0, fn);
926
+ return true;
927
+}
928
929
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
930
+{
931
CHECK_SXE;
932
- func(cpu_env, vd, vj, imm);
933
- return true;
934
+ return gen_vv_i_vl(ctx, a, 16, fn);
935
}
936
937
static bool gen_cv(DisasContext *ctx, arg_cv *a,
938
--
939
2.39.1
diff view generated by jsdifflib
New patch
1
Introduce a new function check_vec to replace CHECK_SXE
1
2
3
Signed-off-by: Song Gao <gaosong@loongson.cn>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230914022645.1151356-11-gaosong@loongson.cn>
6
---
7
target/loongarch/insn_trans/trans_vec.c.inc | 248 +++++++++++++++-----
8
1 file changed, 192 insertions(+), 56 deletions(-)
9
10
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/insn_trans/trans_vec.c.inc
13
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
14
@@ -XXX,XX +XXX,XX @@
15
*/
16
17
#ifndef CONFIG_USER_ONLY
18
-#define CHECK_SXE do { \
19
- if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
20
- generate_exception(ctx, EXCCODE_SXD); \
21
- return true; \
22
- } \
23
-} while (0)
24
+
25
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
26
+{
27
+ if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
28
+ generate_exception(ctx, EXCCODE_SXD);
29
+ return false;
30
+ }
31
+ return true;
32
+}
33
+
34
#else
35
-#define CHECK_SXE
36
+
37
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
38
+{
39
+ return true;
40
+}
41
+
42
#endif
43
44
static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
45
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
46
static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
47
gen_helper_gvec_4_ptr *fn)
48
{
49
- CHECK_SXE;
50
+ if (!check_vec(ctx, 16)) {
51
+ return true;
52
+ }
53
+
54
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
58
static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
59
gen_helper_gvec_4 *fn)
60
{
61
- CHECK_SXE;
62
+ if (!check_vec(ctx, 16)) {
63
+ return true;
64
+ }
65
+
66
return gen_vvvv_vl(ctx, a, 16, fn);
67
}
68
69
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
70
static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
71
gen_helper_gvec_3_ptr *fn)
72
{
73
- CHECK_SXE;
74
+ if (!check_vec(ctx, 16)) {
75
+ return true;
76
+ }
77
+
78
return gen_vvv_ptr_vl(ctx, a, 16, fn);
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
82
83
static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
84
{
85
- CHECK_SXE;
86
+ if (!check_vec(ctx, 16)) {
87
+ return true;
88
+ }
89
+
90
return gen_vvv_vl(ctx, a, 16, fn);
91
}
92
93
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
94
static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
95
gen_helper_gvec_2_ptr *fn)
96
{
97
- CHECK_SXE;
98
+ if (!check_vec(ctx, 16)) {
99
+ return true;
100
+ }
101
+
102
return gen_vv_ptr_vl(ctx, a, 16, fn);
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
106
107
static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
108
{
109
- CHECK_SXE;
110
+ if (!check_vec(ctx, 16)) {
111
+ return true;
112
+ }
113
+
114
return gen_vv_vl(ctx, a, 16, fn);
115
}
116
117
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
118
119
static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
120
{
121
- CHECK_SXE;
122
+ if (!check_vec(ctx, 16)) {
123
+ return true;
124
+ }
125
+
126
return gen_vv_i_vl(ctx, a, 16, fn);
127
}
128
129
@@ -XXX,XX +XXX,XX @@ static bool gen_cv(DisasContext *ctx, arg_cv *a,
130
TCGv_i32 vj = tcg_constant_i32(a->vj);
131
TCGv_i32 cd = tcg_constant_i32(a->cd);
132
133
- CHECK_SXE;
134
+ if (!check_vec(ctx, 16)) {
135
+ return true;
136
+ }
137
+
138
func(cpu_env, cd, vj);
139
return true;
140
}
141
@@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
142
void (*func)(unsigned, uint32_t, uint32_t,
143
uint32_t, uint32_t, uint32_t))
144
{
145
- CHECK_SXE;
146
+ if (!check_vec(ctx, 16)) {
147
+ return true;
148
+ }
149
+
150
return gvec_vvv_vl(ctx, a, 16, mop, func);
151
}
152
153
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
154
void (*func)(unsigned, uint32_t, uint32_t,
155
uint32_t, uint32_t))
156
{
157
- CHECK_SXE;
158
+ if (!check_vec(ctx, 16)) {
159
+ return true;
160
+ }
161
+
162
return gvec_vv_vl(ctx, a, 16, mop, func);
163
}
164
165
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
166
void (*func)(unsigned, uint32_t, uint32_t,
167
int64_t, uint32_t, uint32_t))
168
{
169
- CHECK_SXE;
170
+ if (!check_vec(ctx, 16)) {
171
+ return true;
172
+ }
173
+
174
return gvec_vv_i_vl(ctx, a, 16, mop, func);
175
}
176
177
@@ -XXX,XX +XXX,XX @@ static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
178
179
static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
180
{
181
- CHECK_SXE;
182
+ if (!check_vec(ctx, 16)) {
183
+ return true;
184
+ }
185
+
186
return gvec_subi_vl(ctx, a, 16, mop);
187
}
188
189
@@ -XXX,XX +XXX,XX @@ static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
190
return false; \
191
} \
192
\
193
- CHECK_SXE; \
194
+ if (!check_vec(ctx, 16)) { \
195
+ return true; \
196
+ } \
197
\
198
rh = tcg_temp_new_i64(); \
199
rl = tcg_temp_new_i64(); \
200
@@ -XXX,XX +XXX,XX @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
201
return false;
202
}
203
204
- CHECK_SXE;
205
+ if (!check_vec(ctx, 16)) {
206
+ return true;
207
+ }
208
209
sel = (a->imm >> 12) & 0x1;
210
211
@@ -XXX,XX +XXX,XX @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
212
return false;
213
}
214
215
- CHECK_SXE;
216
+ if (!check_vec(ctx, 16)) {
217
+ return true;
218
+ }
219
220
vd_ofs = vec_full_offset(a->vd);
221
vj_ofs = vec_full_offset(a->vj);
222
@@ -XXX,XX +XXX,XX @@ static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
223
{
224
uint32_t vd_ofs, vj_ofs, vk_ofs;
225
226
- CHECK_SXE;
227
+ if (!check_vec(ctx, 16)) {
228
+ return true;
229
+ }
230
231
vd_ofs = vec_full_offset(a->vd);
232
vj_ofs = vec_full_offset(a->vj);
233
@@ -XXX,XX +XXX,XX @@ static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
234
{ \
235
uint32_t vd_ofs, vj_ofs; \
236
\
237
- CHECK_SXE; \
238
+ if (!check_vec(ctx, 16)) { \
239
+ return true; \
240
+ } \
241
\
242
static const TCGOpcode vecop_list[] = { \
243
INDEX_op_cmp_vec, 0 \
244
@@ -XXX,XX +XXX,XX @@ static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
245
{ \
246
uint32_t vd_ofs, vj_ofs; \
247
\
248
- CHECK_SXE; \
249
+ if (!check_vec(ctx, 16)) { \
250
+ return true; \
251
+ } \
252
\
253
static const TCGOpcode vecop_list[] = { \
254
INDEX_op_cmp_vec, 0 \
255
@@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
256
return false;
257
}
258
259
- CHECK_SXE;
260
+ if (!check_vec(ctx, 16)) {
261
+ return true;
262
+ }
263
264
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
265
flags = get_fcmp_flags(a->fcond >> 1);
266
@@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
267
return false;
268
}
269
270
- CHECK_SXE;
271
+ if (!check_vec(ctx, 16)) {
272
+ return true;
273
+ }
274
275
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
276
flags = get_fcmp_flags(a->fcond >> 1);
277
@@ -XXX,XX +XXX,XX @@ static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
278
return false;
279
}
280
281
- CHECK_SXE;
282
+ if (!check_vec(ctx, 16)) {
283
+ return true;
284
+ }
285
286
tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
287
vec_full_offset(a->vk), vec_full_offset(a->vj),
288
@@ -XXX,XX +XXX,XX @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
289
return false;
290
}
291
292
- CHECK_SXE;
293
+ if (!check_vec(ctx, 16)) {
294
+ return true;
295
+ }
296
297
tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
298
16, ctx->vl/8, a->imm, &op);
299
@@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
300
return false; \
301
} \
302
\
303
- CHECK_SXE; \
304
+ if (!check_vec(ctx, 16)) { \
305
+ return true; \
306
+ } \
307
+ \
308
tcg_gen_or_i64(t1, al, ah); \
309
tcg_gen_setcondi_i64(COND, t1, t1, 0); \
310
tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
311
@@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
312
return false;
313
}
314
315
- CHECK_SXE;
316
+ if (!check_vec(ctx, 16)) {
317
+ return true;
318
+ }
319
+
320
tcg_gen_st8_i64(src, cpu_env,
321
offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
322
return true;
323
@@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
324
return false;
325
}
326
327
- CHECK_SXE;
328
+ if (!check_vec(ctx, 16)) {
329
+ return true;
330
+ }
331
+
332
tcg_gen_st16_i64(src, cpu_env,
333
offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
334
return true;
335
@@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
336
return false;
337
}
338
339
- CHECK_SXE;
340
+ if (!check_vec(ctx, 16)) {
341
+ return true;
342
+ }
343
+
344
tcg_gen_st32_i64(src, cpu_env,
345
offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
346
return true;
347
@@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
348
return false;
349
}
350
351
- CHECK_SXE;
352
+ if (!check_vec(ctx, 16)) {
353
+ return true;
354
+ }
355
+
356
tcg_gen_st_i64(src, cpu_env,
357
offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
358
return true;
359
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
360
return false;
361
}
362
363
- CHECK_SXE;
364
+ if (!check_vec(ctx, 16)) {
365
+ return true;
366
+ }
367
+
368
tcg_gen_ld8s_i64(dst, cpu_env,
369
offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
370
return true;
371
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
372
return false;
373
}
374
375
- CHECK_SXE;
376
+ if (!check_vec(ctx, 16)) {
377
+ return true;
378
+ }
379
+
380
tcg_gen_ld16s_i64(dst, cpu_env,
381
offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
382
return true;
383
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
384
return false;
385
}
386
387
- CHECK_SXE;
388
+ if (!check_vec(ctx, 16)) {
389
+ return true;
390
+ }
391
+
392
tcg_gen_ld32s_i64(dst, cpu_env,
393
offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
394
return true;
395
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
396
return false;
397
}
398
399
- CHECK_SXE;
400
+ if (!check_vec(ctx, 16)) {
401
+ return true;
402
+ }
403
+
404
tcg_gen_ld_i64(dst, cpu_env,
405
offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
406
return true;
407
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
408
return false;
409
}
410
411
- CHECK_SXE;
412
+ if (!check_vec(ctx, 16)) {
413
+ return true;
414
+ }
415
+
416
tcg_gen_ld8u_i64(dst, cpu_env,
417
offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
418
return true;
419
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
420
return false;
421
}
422
423
- CHECK_SXE;
424
+ if (!check_vec(ctx, 16)) {
425
+ return true;
426
+ }
427
+
428
tcg_gen_ld16u_i64(dst, cpu_env,
429
offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
430
return true;
431
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
432
return false;
433
}
434
435
- CHECK_SXE;
436
+ if (!check_vec(ctx, 16)) {
437
+ return true;
438
+ }
439
+
440
tcg_gen_ld32u_i64(dst, cpu_env,
441
offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
442
return true;
443
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
444
return false;
445
}
446
447
- CHECK_SXE;
448
+ if (!check_vec(ctx, 16)) {
449
+ return true;
450
+ }
451
+
452
tcg_gen_ld_i64(dst, cpu_env,
453
offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
454
return true;
455
@@ -XXX,XX +XXX,XX @@ static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
456
return false;
457
}
458
459
- CHECK_SXE;
460
+ if (!check_vec(ctx, 16)) {
461
+ return true;
462
+ }
463
464
tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
465
16, ctx->vl/8, src);
466
@@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
467
return false;
468
}
469
470
- CHECK_SXE;
471
+ if (!check_vec(ctx, 16)) {
472
+ return true;
473
+ }
474
+
475
tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
476
offsetof(CPULoongArchState,
477
fpr[a->vj].vreg.B((a->imm))),
478
@@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
479
return false;
480
}
481
482
- CHECK_SXE;
483
+ if (!check_vec(ctx, 16)) {
484
+ return true;
485
+ }
486
+
487
tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
488
offsetof(CPULoongArchState,
489
fpr[a->vj].vreg.H((a->imm))),
490
@@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
491
return false;
492
}
493
494
- CHECK_SXE;
495
+ if (!check_vec(ctx, 16)) {
496
+ return true;
497
+ }
498
+
499
tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
500
offsetof(CPULoongArchState,
501
fpr[a->vj].vreg.W((a->imm))),
502
@@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
503
return false;
504
}
505
506
- CHECK_SXE;
507
+ if (!check_vec(ctx, 16)) {
508
+ return true;
509
+ }
510
+
511
tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
512
offsetof(CPULoongArchState,
513
fpr[a->vj].vreg.D((a->imm))),
514
@@ -XXX,XX +XXX,XX @@ static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
515
return false;
516
}
517
518
- CHECK_SXE;
519
+ if (!check_vec(ctx, 16)) {
520
+ return true;
521
+ }
522
523
tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
524
tcg_gen_shli_i64(t0, t0, vece);
525
@@ -XXX,XX +XXX,XX @@ static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
526
return false;
527
}
528
529
- CHECK_SXE;
530
+ if (!check_vec(ctx, 16)) {
531
+ return true;
532
+ }
533
534
desthigh = tcg_temp_new_i64();
535
destlow = tcg_temp_new_i64();
536
@@ -XXX,XX +XXX,XX @@ static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
537
return false;
538
}
539
540
- CHECK_SXE;
541
+ if (!check_vec(ctx, 16)) {
542
+ return true;
543
+ }
544
545
desthigh = tcg_temp_new_i64();
546
destlow = tcg_temp_new_i64();
547
@@ -XXX,XX +XXX,XX @@ static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
548
return false;
549
}
550
551
- CHECK_SXE;
552
+ if (!check_vec(ctx, 16)) {
553
+ return true;
554
+ }
555
556
addr = gpr_src(ctx, a->rj, EXT_NONE);
557
val = tcg_temp_new_i128();
558
@@ -XXX,XX +XXX,XX @@ static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
559
return false;
560
}
561
562
- CHECK_SXE;
563
+ if (!check_vec(ctx, 16)) {
564
+ return true;
565
+ }
566
567
addr = gpr_src(ctx, a->rj, EXT_NONE);
568
val = tcg_temp_new_i128();
569
@@ -XXX,XX +XXX,XX @@ static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
570
return false;
571
}
572
573
- CHECK_SXE;
574
+ if (!check_vec(ctx, 16)) {
575
+ return true;
576
+ }
577
578
src1 = gpr_src(ctx, a->rj, EXT_NONE);
579
src2 = gpr_src(ctx, a->rk, EXT_NONE);
580
@@ -XXX,XX +XXX,XX @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
581
return false;
582
}
583
584
- CHECK_SXE;
585
+ if (!check_vec(ctx, 16)) {
586
+ return true;
587
+ }
588
589
src1 = gpr_src(ctx, a->rj, EXT_NONE);
590
src2 = gpr_src(ctx, a->rk, EXT_NONE);
591
@@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
592
return false; \
593
} \
594
\
595
- CHECK_SXE; \
596
+ if (!check_vec(ctx, 16)) { \
597
+ return true; \
598
+ } \
599
\
600
addr = gpr_src(ctx, a->rj, EXT_NONE); \
601
val = tcg_temp_new_i64(); \
602
@@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
603
return false; \
604
} \
605
\
606
- CHECK_SXE; \
607
+ if (!check_vec(ctx, 16)) { \
608
+ return true; \
609
+ } \
610
\
611
addr = gpr_src(ctx, a->rj, EXT_NONE); \
612
val = tcg_temp_new_i64(); \
613
--
614
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-12-gaosong@loongson.cn>
4
---
5
target/loongarch/cpu.h | 24 ++++++++++++----------
6
target/loongarch/internals.h | 22 --------------------
7
target/loongarch/vec.h | 33 ++++++++++++++++++++++++++++++
8
linux-user/loongarch64/signal.c | 1 +
9
target/loongarch/cpu.c | 1 +
10
target/loongarch/gdbstub.c | 1 +
11
target/loongarch/machine.c | 36 ++++++++++++++++++++++++++++++++-
12
target/loongarch/translate.c | 1 +
13
target/loongarch/vec_helper.c | 1 +
14
9 files changed, 86 insertions(+), 34 deletions(-)
15
create mode 100644 target/loongarch/vec.h
1
16
17
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/cpu.h
20
+++ b/target/loongarch/cpu.h
21
@@ -XXX,XX +XXX,XX @@ FIELD(TLB_MISC, ASID, 1, 10)
22
FIELD(TLB_MISC, VPPN, 13, 35)
23
FIELD(TLB_MISC, PS, 48, 6)
24
25
-#define LSX_LEN (128)
26
+#define LSX_LEN (128)
27
+#define LASX_LEN (256)
28
+
29
typedef union VReg {
30
- int8_t B[LSX_LEN / 8];
31
- int16_t H[LSX_LEN / 16];
32
- int32_t W[LSX_LEN / 32];
33
- int64_t D[LSX_LEN / 64];
34
- uint8_t UB[LSX_LEN / 8];
35
- uint16_t UH[LSX_LEN / 16];
36
- uint32_t UW[LSX_LEN / 32];
37
- uint64_t UD[LSX_LEN / 64];
38
- Int128 Q[LSX_LEN / 128];
39
-}VReg;
40
+ int8_t B[LASX_LEN / 8];
41
+ int16_t H[LASX_LEN / 16];
42
+ int32_t W[LASX_LEN / 32];
43
+ int64_t D[LASX_LEN / 64];
44
+ uint8_t UB[LASX_LEN / 8];
45
+ uint16_t UH[LASX_LEN / 16];
46
+ uint32_t UW[LASX_LEN / 32];
47
+ uint64_t UD[LASX_LEN / 64];
48
+ Int128 Q[LASX_LEN / 128];
49
+} VReg;
50
51
typedef union fpr_t fpr_t;
52
union fpr_t {
53
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/loongarch/internals.h
56
+++ b/target/loongarch/internals.h
57
@@ -XXX,XX +XXX,XX @@
58
/* Global bit for huge page */
59
#define LOONGARCH_HGLOBAL_SHIFT 12
60
61
-#if HOST_BIG_ENDIAN
62
-#define B(x) B[15 - (x)]
63
-#define H(x) H[7 - (x)]
64
-#define W(x) W[3 - (x)]
65
-#define D(x) D[1 - (x)]
66
-#define UB(x) UB[15 - (x)]
67
-#define UH(x) UH[7 - (x)]
68
-#define UW(x) UW[3 - (x)]
69
-#define UD(x) UD[1 -(x)]
70
-#define Q(x) Q[x]
71
-#else
72
-#define B(x) B[x]
73
-#define H(x) H[x]
74
-#define W(x) W[x]
75
-#define D(x) D[x]
76
-#define UB(x) UB[x]
77
-#define UH(x) UH[x]
78
-#define UW(x) UW[x]
79
-#define UD(x) UD[x]
80
-#define Q(x) Q[x]
81
-#endif
82
-
83
void loongarch_translate_init(void);
84
85
void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
86
diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
87
new file mode 100644
88
index XXXXXXX..XXXXXXX
89
--- /dev/null
90
+++ b/target/loongarch/vec.h
91
@@ -XXX,XX +XXX,XX @@
92
+/* SPDX-License-Identifier: GPL-2.0-or-later */
93
+/*
94
+ * QEMU LoongArch vector utilitites
95
+ *
96
+ * Copyright (c) 2023 Loongson Technology Corporation Limited
97
+ */
98
+
99
+#ifndef LOONGARCH_VEC_H
100
+#define LOONGARCH_VEC_H
101
+
102
+#if HOST_BIG_ENDIAN
103
+#define B(x) B[(x) ^ 15]
104
+#define H(x) H[(x) ^ 7]
105
+#define W(x) W[(x) ^ 3]
106
+#define D(x) D[(x) ^ 1]
107
+#define UB(x) UB[(x) ^ 15]
108
+#define UH(x) UH[(x) ^ 7]
109
+#define UW(x) UW[(x) ^ 3]
110
+#define UD(x) UD[(x) ^ 1]
111
+#define Q(x) Q[x]
112
+#else
113
+#define B(x) B[x]
114
+#define H(x) H[x]
115
+#define W(x) W[x]
116
+#define D(x) D[x]
117
+#define UB(x) UB[x]
118
+#define UH(x) UH[x]
119
+#define UW(x) UW[x]
120
+#define UD(x) UD[x]
121
+#define Q(x) Q[x]
122
+#endif /* HOST_BIG_ENDIAN */
123
+
124
+#endif /* LOONGARCH_VEC_H */
125
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/linux-user/loongarch64/signal.c
128
+++ b/linux-user/loongarch64/signal.c
129
@@ -XXX,XX +XXX,XX @@
130
#include "linux-user/trace.h"
131
132
#include "target/loongarch/internals.h"
133
+#include "target/loongarch/vec.h"
134
135
/* FP context was used */
136
#define SC_USED_FP (1 << 0)
137
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/loongarch/cpu.c
140
+++ b/target/loongarch/cpu.c
141
@@ -XXX,XX +XXX,XX @@
142
#include "cpu-csr.h"
143
#include "sysemu/reset.h"
144
#include "tcg/tcg.h"
145
+#include "vec.h"
146
147
const char * const regnames[32] = {
148
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
149
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
150
index XXXXXXX..XXXXXXX 100644
151
--- a/target/loongarch/gdbstub.c
152
+++ b/target/loongarch/gdbstub.c
153
@@ -XXX,XX +XXX,XX @@
154
#include "internals.h"
155
#include "exec/gdbstub.h"
156
#include "gdbstub/helpers.h"
157
+#include "vec.h"
158
159
uint64_t read_fcc(CPULoongArchState *env)
160
{
161
diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c
162
index XXXXXXX..XXXXXXX 100644
163
--- a/target/loongarch/machine.c
164
+++ b/target/loongarch/machine.c
165
@@ -XXX,XX +XXX,XX @@
166
#include "qemu/osdep.h"
167
#include "cpu.h"
168
#include "migration/cpu.h"
169
-#include "internals.h"
170
+#include "vec.h"
171
172
static const VMStateDescription vmstate_fpu_reg = {
173
.name = "fpu_reg",
174
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_lsx = {
175
},
176
};
177
178
+static const VMStateDescription vmstate_lasxh_reg = {
179
+ .name = "lasxh_reg",
180
+ .version_id = 1,
181
+ .minimum_version_id = 1,
182
+ .fields = (VMStateField[]) {
183
+ VMSTATE_UINT64(UD(2), VReg),
184
+ VMSTATE_UINT64(UD(3), VReg),
185
+ VMSTATE_END_OF_LIST()
186
+ }
187
+};
188
+
189
+#define VMSTATE_LASXH_REGS(_field, _state, _start) \
190
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \
191
+ vmstate_lasxh_reg, fpr_t)
192
+
193
+static bool lasx_needed(void *opaque)
194
+{
195
+ LoongArchCPU *cpu = opaque;
196
+
197
+ return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LASX);
198
+}
199
+
200
+static const VMStateDescription vmstate_lasx = {
201
+ .name = "cpu/lasx",
202
+ .version_id = 1,
203
+ .minimum_version_id = 1,
204
+ .needed = lasx_needed,
205
+ .fields = (VMStateField[]) {
206
+ VMSTATE_LASXH_REGS(env.fpr, LoongArchCPU, 0),
207
+ VMSTATE_END_OF_LIST()
208
+ },
209
+};
210
+
211
/* TLB state */
212
const VMStateDescription vmstate_tlb = {
213
.name = "cpu/tlb",
214
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_loongarch_cpu = {
215
.subsections = (const VMStateDescription*[]) {
216
&vmstate_fpu,
217
&vmstate_lsx,
218
+ &vmstate_lasx,
219
NULL
220
}
221
};
222
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
223
index XXXXXXX..XXXXXXX 100644
224
--- a/target/loongarch/translate.c
225
+++ b/target/loongarch/translate.c
226
@@ -XXX,XX +XXX,XX @@
227
#include "fpu/softfloat.h"
228
#include "translate.h"
229
#include "internals.h"
230
+#include "vec.h"
231
232
/* Global register indices */
233
TCGv cpu_gpr[32], cpu_pc;
234
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
235
index XXXXXXX..XXXXXXX 100644
236
--- a/target/loongarch/vec_helper.c
237
+++ b/target/loongarch/vec_helper.c
238
@@ -XXX,XX +XXX,XX @@
239
#include "fpu/softfloat.h"
240
#include "internals.h"
241
#include "tcg/tcg.h"
242
+#include "vec.h"
243
244
#define DO_ADD(a, b) (a + b)
245
#define DO_SUB(a, b) (a - b)
246
--
247
2.39.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Song Gao <gaosong@loongson.cn>
3
Message-Id: <20230914022645.1151356-13-gaosong@loongson.cn>
4
---
5
target/loongarch/cpu.h | 2 ++
6
target/loongarch/cpu.c | 2 ++
7
target/loongarch/insn_trans/trans_vec.c.inc | 6 ++++++
8
3 files changed, 10 insertions(+)
1
9
10
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/cpu.h
13
+++ b/target/loongarch/cpu.h
14
@@ -XXX,XX +XXX,XX @@ static inline void set_pc(CPULoongArchState *env, uint64_t value)
15
#define HW_FLAGS_CRMD_PG R_CSR_CRMD_PG_MASK /* 0x10 */
16
#define HW_FLAGS_EUEN_FPE 0x04
17
#define HW_FLAGS_EUEN_SXE 0x08
18
+#define HW_FLAGS_EUEN_ASXE 0x10
19
#define HW_FLAGS_VA32 0x20
20
21
static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
22
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
23
*flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK);
24
*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE;
25
*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE;
26
+ *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE;
27
*flags |= is_va32(env) * HW_FLAGS_VA32;
28
}
29
30
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/loongarch/cpu.c
33
+++ b/target/loongarch/cpu.c
34
@@ -XXX,XX +XXX,XX @@ static const char * const excp_names[] = {
35
[EXCCODE_DBP] = "Debug breakpoint",
36
[EXCCODE_BCE] = "Bound Check Exception",
37
[EXCCODE_SXD] = "128 bit vector instructions Disable exception",
38
+ [EXCCODE_ASXD] = "256 bit vector instructions Disable exception",
39
};
40
41
const char *loongarch_exception_name(int32_t exception)
42
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
43
case EXCCODE_FPD:
44
case EXCCODE_FPE:
45
case EXCCODE_SXD:
46
+ case EXCCODE_ASXD:
47
env->CSR_BADV = env->pc;
48
QEMU_FALLTHROUGH;
49
case EXCCODE_BCE:
50
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/insn_trans/trans_vec.c.inc
53
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
54
@@ -XXX,XX +XXX,XX @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz)
55
generate_exception(ctx, EXCCODE_SXD);
56
return false;
57
}
58
+
59
+ if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
60
+ generate_exception(ctx, EXCCODE_ASXD);
61
+ return false;
62
+ }
63
+
64
return true;
65
}
66
67
--
68
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-14-gaosong@loongson.cn>
4
---
5
target/loongarch/translate.h | 1 +
6
1 file changed, 1 insertion(+)
1
7
8
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/loongarch/translate.h
11
+++ b/target/loongarch/translate.h
12
@@ -XXX,XX +XXX,XX @@
13
#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
14
#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
15
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
16
+#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
17
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
18
19
/*
20
--
21
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVADD.{B/H/W/D/Q};
3
- XVSUB.{B/H/W/D/Q}.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-15-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 14 +++
10
target/loongarch/disas.c | 23 +++++
11
target/loongarch/translate.c | 4 +
12
target/loongarch/insn_trans/trans_vec.c.inc | 107 +++++++++++++-------
13
4 files changed, 109 insertions(+), 39 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1
20
vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2
21
vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3
22
vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
23
+
24
+#
25
+# LoongArch LASX instructions
26
+#
27
+xvadd_b 0111 01000000 10100 ..... ..... ..... @vvv
28
+xvadd_h 0111 01000000 10101 ..... ..... ..... @vvv
29
+xvadd_w 0111 01000000 10110 ..... ..... ..... @vvv
30
+xvadd_d 0111 01000000 10111 ..... ..... ..... @vvv
31
+xvadd_q 0111 01010010 11010 ..... ..... ..... @vvv
32
+xvsub_b 0111 01000000 11000 ..... ..... ..... @vvv
33
+xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv
34
+xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
35
+xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
36
+xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
37
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/loongarch/disas.c
40
+++ b/target/loongarch/disas.c
41
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vstelm_d, vr_ii)
42
INSN_LSX(vstelm_w, vr_ii)
43
INSN_LSX(vstelm_h, vr_ii)
44
INSN_LSX(vstelm_b, vr_ii)
45
+
46
+#define INSN_LASX(insn, type) \
47
+static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
48
+{ \
49
+ output_##type ## _x(ctx, a, #insn); \
50
+ return true; \
51
+}
52
+
53
+static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
54
+{
55
+ output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
56
+}
57
+
58
+INSN_LASX(xvadd_b, vvv)
59
+INSN_LASX(xvadd_h, vvv)
60
+INSN_LASX(xvadd_w, vvv)
61
+INSN_LASX(xvadd_d, vvv)
62
+INSN_LASX(xvadd_q, vvv)
63
+INSN_LASX(xvsub_b, vvv)
64
+INSN_LASX(xvsub_h, vvv)
65
+INSN_LASX(xvsub_w, vvv)
66
+INSN_LASX(xvsub_d, vvv)
67
+INSN_LASX(xvsub_q, vvv)
68
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/translate.c
71
+++ b/target/loongarch/translate.c
72
@@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
73
ctx->vl = LSX_LEN;
74
}
75
76
+ if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) {
77
+ ctx->vl = LASX_LEN;
78
+ }
79
+
80
ctx->la64 = is_la64(env);
81
ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0;
82
83
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/loongarch/insn_trans/trans_vec.c.inc
86
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
87
@@ -XXX,XX +XXX,XX @@ static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
88
uint32_t vj_ofs = vec_full_offset(a->vj);
89
uint32_t vk_ofs = vec_full_offset(a->vk);
90
91
+ if (!check_vec(ctx, oprsz)) {
92
+ return true;
93
+ }
94
+
95
func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
96
return true;
97
}
98
@@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
99
void (*func)(unsigned, uint32_t, uint32_t,
100
uint32_t, uint32_t, uint32_t))
101
{
102
- if (!check_vec(ctx, 16)) {
103
- return true;
104
- }
105
-
106
return gvec_vvv_vl(ctx, a, 16, mop, func);
107
}
108
109
+static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
110
+ void (*func)(unsigned, uint32_t, uint32_t,
111
+ uint32_t, uint32_t, uint32_t))
112
+{
113
+ return gvec_vvv_vl(ctx, a, 32, mop, func);
114
+}
115
116
static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
117
uint32_t oprsz, MemOp mop,
118
@@ -XXX,XX +XXX,XX @@ TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
119
TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
120
TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
121
TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
122
+TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
123
+TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
124
+TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
125
+TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
126
+
127
+static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
128
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
129
+ TCGv_i64, TCGv_i64, TCGv_i64))
130
+{
131
+ int i;
132
+ TCGv_i64 rh, rl, ah, al, bh, bl;
133
+
134
+ if (!check_vec(ctx, oprsz)) {
135
+ return true;
136
+ }
137
138
-#define VADDSUB_Q(NAME) \
139
-static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
140
-{ \
141
- TCGv_i64 rh, rl, ah, al, bh, bl; \
142
- \
143
- if (!avail_LSX(ctx)) { \
144
- return false; \
145
- } \
146
- \
147
- if (!check_vec(ctx, 16)) { \
148
- return true; \
149
- } \
150
- \
151
- rh = tcg_temp_new_i64(); \
152
- rl = tcg_temp_new_i64(); \
153
- ah = tcg_temp_new_i64(); \
154
- al = tcg_temp_new_i64(); \
155
- bh = tcg_temp_new_i64(); \
156
- bl = tcg_temp_new_i64(); \
157
- \
158
- get_vreg64(ah, a->vj, 1); \
159
- get_vreg64(al, a->vj, 0); \
160
- get_vreg64(bh, a->vk, 1); \
161
- get_vreg64(bl, a->vk, 0); \
162
- \
163
- tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \
164
- \
165
- set_vreg64(rh, a->vd, 1); \
166
- set_vreg64(rl, a->vd, 0); \
167
- \
168
- return true; \
169
-}
170
-
171
-VADDSUB_Q(add)
172
-VADDSUB_Q(sub)
173
+ rh = tcg_temp_new_i64();
174
+ rl = tcg_temp_new_i64();
175
+ ah = tcg_temp_new_i64();
176
+ al = tcg_temp_new_i64();
177
+ bh = tcg_temp_new_i64();
178
+ bl = tcg_temp_new_i64();
179
+
180
+ for (i = 0; i < oprsz / 16; i++) {
181
+ get_vreg64(ah, a->vj, 1 + i * 2);
182
+ get_vreg64(al, a->vj, i * 2);
183
+ get_vreg64(bh, a->vk, 1 + i * 2);
184
+ get_vreg64(bl, a->vk, i * 2);
185
+
186
+ func(rl, rh, al, ah, bl, bh);
187
+
188
+ set_vreg64(rh, a->vd, 1 + i * 2);
189
+ set_vreg64(rl, a->vd, i * 2);
190
+ }
191
+ return true;
192
+}
193
+
194
+static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
195
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
196
+ TCGv_i64, TCGv_i64, TCGv_i64))
197
+{
198
+ return gen_vaddsub_q_vl(ctx, a, 16, func);
199
+}
200
+
201
+static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
202
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
203
+ TCGv_i64, TCGv_i64, TCGv_i64))
204
+{
205
+ return gen_vaddsub_q_vl(ctx, a, 32, func);
206
+}
207
208
TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
209
TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
210
TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
211
TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
212
+TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
213
+TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
214
+TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
215
+TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
216
+
217
+TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
218
+TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
219
+TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
220
+TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
221
222
TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
223
TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
224
--
225
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVREPLGR2VR.{B/H/W/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-16-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 +++++
9
target/loongarch/disas.c | 10 +++++++++
10
target/loongarch/insn_trans/trans_vec.c.inc | 25 +++++++++++++++------
11
3 files changed, 33 insertions(+), 7 deletions(-)
12
13
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/insns.decode
16
+++ b/target/loongarch/insns.decode
17
@@ -XXX,XX +XXX,XX @@ xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv
18
xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
19
xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
20
xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
21
+
22
+xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
23
+xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
24
+xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
25
+xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr
26
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/loongarch/disas.c
29
+++ b/target/loongarch/disas.c
30
@@ -XXX,XX +XXX,XX @@ static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
31
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
32
}
33
34
+static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
35
+{
36
+ output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
37
+}
38
+
39
INSN_LASX(xvadd_b, vvv)
40
INSN_LASX(xvadd_h, vvv)
41
INSN_LASX(xvadd_w, vvv)
42
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsub_h, vvv)
43
INSN_LASX(xvsub_w, vvv)
44
INSN_LASX(xvsub_d, vvv)
45
INSN_LASX(xvsub_q, vvv)
46
+
47
+INSN_LASX(xvreplgr2vr_b, vr)
48
+INSN_LASX(xvreplgr2vr_h, vr)
49
+INSN_LASX(xvreplgr2vr_w, vr)
50
+INSN_LASX(xvreplgr2vr_d, vr)
51
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/loongarch/insn_trans/trans_vec.c.inc
54
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
55
@@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
56
return true;
57
}
58
59
-static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
60
+static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
61
+ uint32_t oprsz, MemOp mop)
62
{
63
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
64
65
- if (!avail_LSX(ctx)) {
66
- return false;
67
- }
68
-
69
- if (!check_vec(ctx, 16)) {
70
+ if (!check_vec(ctx, oprsz)) {
71
return true;
72
}
73
74
tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
75
- 16, ctx->vl/8, src);
76
+ oprsz, ctx->vl/8, src);
77
return true;
78
}
79
80
+static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
81
+{
82
+ return gvec_dup_vl(ctx, a, 16, mop);
83
+}
84
+
85
+static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
86
+{
87
+ return gvec_dup_vl(ctx, a, 32, mop);
88
+}
89
+
90
TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
91
TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
92
TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
93
TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
94
+TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
95
+TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
96
+TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
97
+TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
98
99
static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
100
{
101
--
102
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVADDI.{B/H/W/D}U;
3
- XVSUBI.{B/H/W/D}U.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-17-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 9 ++++++
10
target/loongarch/disas.c | 14 ++++++++
11
target/loongarch/insn_trans/trans_vec.c.inc | 36 ++++++++++++++++-----
12
3 files changed, 51 insertions(+), 8 deletions(-)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
19
xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
20
xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
21
22
+xvaddi_bu 0111 01101000 10100 ..... ..... ..... @vv_ui5
23
+xvaddi_hu 0111 01101000 10101 ..... ..... ..... @vv_ui5
24
+xvaddi_wu 0111 01101000 10110 ..... ..... ..... @vv_ui5
25
+xvaddi_du 0111 01101000 10111 ..... ..... ..... @vv_ui5
26
+xvsubi_bu 0111 01101000 11000 ..... ..... ..... @vv_ui5
27
+xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5
28
+xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5
29
+xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5
30
+
31
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
32
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
33
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
34
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/loongarch/disas.c
37
+++ b/target/loongarch/disas.c
38
@@ -XXX,XX +XXX,XX @@ static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
39
output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
40
}
41
42
+static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
43
+{
44
+ output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm);
45
+}
46
+
47
INSN_LASX(xvadd_b, vvv)
48
INSN_LASX(xvadd_h, vvv)
49
INSN_LASX(xvadd_w, vvv)
50
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsub_w, vvv)
51
INSN_LASX(xvsub_d, vvv)
52
INSN_LASX(xvsub_q, vvv)
53
54
+INSN_LASX(xvaddi_bu, vv_i)
55
+INSN_LASX(xvaddi_hu, vv_i)
56
+INSN_LASX(xvaddi_wu, vv_i)
57
+INSN_LASX(xvaddi_du, vv_i)
58
+INSN_LASX(xvsubi_bu, vv_i)
59
+INSN_LASX(xvsubi_hu, vv_i)
60
+INSN_LASX(xvsubi_wu, vv_i)
61
+INSN_LASX(xvsubi_du, vv_i)
62
+
63
INSN_LASX(xvreplgr2vr_b, vr)
64
INSN_LASX(xvreplgr2vr_h, vr)
65
INSN_LASX(xvreplgr2vr_w, vr)
66
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/loongarch/insn_trans/trans_vec.c.inc
69
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
70
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
71
uint32_t vd_ofs = vec_full_offset(a->vd);
72
uint32_t vj_ofs = vec_full_offset(a->vj);
73
74
+ if (!check_vec(ctx, oprsz)) {
75
+ return true;
76
+ }
77
+
78
func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
79
return true;
80
}
81
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
82
void (*func)(unsigned, uint32_t, uint32_t,
83
int64_t, uint32_t, uint32_t))
84
{
85
- if (!check_vec(ctx, 16)) {
86
- return true;
87
- }
88
-
89
return gvec_vv_i_vl(ctx, a, 16, mop, func);
90
}
91
92
+static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
93
+ void (*func)(unsigned, uint32_t, uint32_t,
94
+ int64_t, uint32_t, uint32_t))
95
+{
96
+ return gvec_vv_i_vl(ctx,a, 32, mop, func);
97
+}
98
+
99
static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
100
uint32_t oprsz, MemOp mop)
101
{
102
uint32_t vd_ofs = vec_full_offset(a->vd);
103
uint32_t vj_ofs = vec_full_offset(a->vj);
104
105
+ if (!check_vec(ctx, oprsz)) {
106
+ return true;
107
+ }
108
+
109
tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
110
return true;
111
}
112
113
static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
114
{
115
- if (!check_vec(ctx, 16)) {
116
- return true;
117
- }
118
-
119
return gvec_subi_vl(ctx, a, 16, mop);
120
}
121
122
+static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
123
+{
124
+ return gvec_subi_vl(ctx, a, 32, mop);
125
+}
126
+
127
TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
128
TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
129
TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
130
@@ -XXX,XX +XXX,XX @@ TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
131
TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
132
TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
133
TRANS(vsubi_du, LSX, gvec_subi, MO_64)
134
+TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
135
+TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
136
+TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
137
+TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
138
+TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
139
+TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
140
+TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
141
+TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
142
143
TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
144
TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
145
--
146
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVNEG.{B/H/W/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-18-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 +++++
9
target/loongarch/disas.c | 10 ++++++++++
10
target/loongarch/insn_trans/trans_vec.c.inc | 19 +++++++++++++++----
11
3 files changed, 30 insertions(+), 4 deletions(-)
12
13
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/insns.decode
16
+++ b/target/loongarch/insns.decode
17
@@ -XXX,XX +XXX,XX @@ xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5
18
xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5
19
xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5
20
21
+xvneg_b 0111 01101001 11000 01100 ..... ..... @vv
22
+xvneg_h 0111 01101001 11000 01101 ..... ..... @vv
23
+xvneg_w 0111 01101001 11000 01110 ..... ..... @vv
24
+xvneg_d 0111 01101001 11000 01111 ..... ..... @vv
25
+
26
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
27
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
28
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
29
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/loongarch/disas.c
32
+++ b/target/loongarch/disas.c
33
@@ -XXX,XX +XXX,XX @@ static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
34
output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm);
35
}
36
37
+static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic)
38
+{
39
+ output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj);
40
+}
41
+
42
INSN_LASX(xvadd_b, vvv)
43
INSN_LASX(xvadd_h, vvv)
44
INSN_LASX(xvadd_w, vvv)
45
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsubi_hu, vv_i)
46
INSN_LASX(xvsubi_wu, vv_i)
47
INSN_LASX(xvsubi_du, vv_i)
48
49
+INSN_LASX(xvneg_b, vv)
50
+INSN_LASX(xvneg_h, vv)
51
+INSN_LASX(xvneg_w, vv)
52
+INSN_LASX(xvneg_d, vv)
53
+
54
INSN_LASX(xvreplgr2vr_b, vr)
55
INSN_LASX(xvreplgr2vr_h, vr)
56
INSN_LASX(xvreplgr2vr_w, vr)
57
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/loongarch/insn_trans/trans_vec.c.inc
60
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
61
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
62
uint32_t vd_ofs = vec_full_offset(a->vd);
63
uint32_t vj_ofs = vec_full_offset(a->vj);
64
65
+ if (!check_vec(ctx, oprsz)) {
66
+ return true;
67
+ }
68
+
69
func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
70
return true;
71
}
72
@@ -XXX,XX +XXX,XX @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
73
void (*func)(unsigned, uint32_t, uint32_t,
74
uint32_t, uint32_t))
75
{
76
- if (!check_vec(ctx, 16)) {
77
- return true;
78
- }
79
-
80
return gvec_vv_vl(ctx, a, 16, mop, func);
81
}
82
83
+static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
84
+ void (*func)(unsigned, uint32_t, uint32_t,
85
+ uint32_t, uint32_t))
86
+{
87
+ return gvec_vv_vl(ctx, a, 32, mop, func);
88
+}
89
+
90
static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
91
uint32_t oprsz, MemOp mop,
92
void (*func)(unsigned, uint32_t, uint32_t,
93
@@ -XXX,XX +XXX,XX @@ TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
94
TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
95
TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
96
TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
97
+TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
98
+TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
99
+TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
100
+TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
101
102
TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
103
TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
104
--
105
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSADD.{B/H/W/D}[U];
3
- XVSSUB.{B/H/W/D}[U].
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-19-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 18 ++++++++++++++++++
10
target/loongarch/disas.c | 17 +++++++++++++++++
11
target/loongarch/insn_trans/trans_vec.c.inc | 17 +++++++++++++++++
12
3 files changed, 52 insertions(+)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvneg_h 0111 01101001 11000 01101 ..... ..... @vv
19
xvneg_w 0111 01101001 11000 01110 ..... ..... @vv
20
xvneg_d 0111 01101001 11000 01111 ..... ..... @vv
21
22
+xvsadd_b 0111 01000100 01100 ..... ..... ..... @vvv
23
+xvsadd_h 0111 01000100 01101 ..... ..... ..... @vvv
24
+xvsadd_w 0111 01000100 01110 ..... ..... ..... @vvv
25
+xvsadd_d 0111 01000100 01111 ..... ..... ..... @vvv
26
+xvsadd_bu 0111 01000100 10100 ..... ..... ..... @vvv
27
+xvsadd_hu 0111 01000100 10101 ..... ..... ..... @vvv
28
+xvsadd_wu 0111 01000100 10110 ..... ..... ..... @vvv
29
+xvsadd_du 0111 01000100 10111 ..... ..... ..... @vvv
30
+
31
+xvssub_b 0111 01000100 10000 ..... ..... ..... @vvv
32
+xvssub_h 0111 01000100 10001 ..... ..... ..... @vvv
33
+xvssub_w 0111 01000100 10010 ..... ..... ..... @vvv
34
+xvssub_d 0111 01000100 10011 ..... ..... ..... @vvv
35
+xvssub_bu 0111 01000100 11000 ..... ..... ..... @vvv
36
+xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv
37
+xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv
38
+xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv
39
+
40
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
41
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
42
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
43
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/loongarch/disas.c
46
+++ b/target/loongarch/disas.c
47
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvneg_h, vv)
48
INSN_LASX(xvneg_w, vv)
49
INSN_LASX(xvneg_d, vv)
50
51
+INSN_LASX(xvsadd_b, vvv)
52
+INSN_LASX(xvsadd_h, vvv)
53
+INSN_LASX(xvsadd_w, vvv)
54
+INSN_LASX(xvsadd_d, vvv)
55
+INSN_LASX(xvsadd_bu, vvv)
56
+INSN_LASX(xvsadd_hu, vvv)
57
+INSN_LASX(xvsadd_wu, vvv)
58
+INSN_LASX(xvsadd_du, vvv)
59
+INSN_LASX(xvssub_b, vvv)
60
+INSN_LASX(xvssub_h, vvv)
61
+INSN_LASX(xvssub_w, vvv)
62
+INSN_LASX(xvssub_d, vvv)
63
+INSN_LASX(xvssub_bu, vvv)
64
+INSN_LASX(xvssub_hu, vvv)
65
+INSN_LASX(xvssub_wu, vvv)
66
+INSN_LASX(xvssub_du, vvv)
67
+
68
INSN_LASX(xvreplgr2vr_b, vr)
69
INSN_LASX(xvreplgr2vr_h, vr)
70
INSN_LASX(xvreplgr2vr_w, vr)
71
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/loongarch/insn_trans/trans_vec.c.inc
74
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
75
@@ -XXX,XX +XXX,XX @@ TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
76
TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
77
TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
78
79
+TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
80
+TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
81
+TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
82
+TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
83
+TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
84
+TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
85
+TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
86
+TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
87
+TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
88
+TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
89
+TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
90
+TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
91
+TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
92
+TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
93
+TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
94
+TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
95
+
96
TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
97
TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
98
TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
99
--
100
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU};
3
- XVHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-20-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 18 +++++++++++
10
target/loongarch/disas.c | 17 +++++++++++
11
target/loongarch/vec_helper.c | 34 ++++++++++++++++-----
12
target/loongarch/insn_trans/trans_vec.c.inc | 30 +++++++++++++++---
13
4 files changed, 88 insertions(+), 11 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv
20
xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv
21
xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv
22
23
+xvhaddw_h_b 0111 01000101 01000 ..... ..... ..... @vvv
24
+xvhaddw_w_h 0111 01000101 01001 ..... ..... ..... @vvv
25
+xvhaddw_d_w 0111 01000101 01010 ..... ..... ..... @vvv
26
+xvhaddw_q_d 0111 01000101 01011 ..... ..... ..... @vvv
27
+xvhaddw_hu_bu 0111 01000101 10000 ..... ..... ..... @vvv
28
+xvhaddw_wu_hu 0111 01000101 10001 ..... ..... ..... @vvv
29
+xvhaddw_du_wu 0111 01000101 10010 ..... ..... ..... @vvv
30
+xvhaddw_qu_du 0111 01000101 10011 ..... ..... ..... @vvv
31
+
32
+xvhsubw_h_b 0111 01000101 01100 ..... ..... ..... @vvv
33
+xvhsubw_w_h 0111 01000101 01101 ..... ..... ..... @vvv
34
+xvhsubw_d_w 0111 01000101 01110 ..... ..... ..... @vvv
35
+xvhsubw_q_d 0111 01000101 01111 ..... ..... ..... @vvv
36
+xvhsubw_hu_bu 0111 01000101 10100 ..... ..... ..... @vvv
37
+xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv
38
+xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv
39
+xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv
40
+
41
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
42
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
43
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
44
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/loongarch/disas.c
47
+++ b/target/loongarch/disas.c
48
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssub_hu, vvv)
49
INSN_LASX(xvssub_wu, vvv)
50
INSN_LASX(xvssub_du, vvv)
51
52
+INSN_LASX(xvhaddw_h_b, vvv)
53
+INSN_LASX(xvhaddw_w_h, vvv)
54
+INSN_LASX(xvhaddw_d_w, vvv)
55
+INSN_LASX(xvhaddw_q_d, vvv)
56
+INSN_LASX(xvhaddw_hu_bu, vvv)
57
+INSN_LASX(xvhaddw_wu_hu, vvv)
58
+INSN_LASX(xvhaddw_du_wu, vvv)
59
+INSN_LASX(xvhaddw_qu_du, vvv)
60
+INSN_LASX(xvhsubw_h_b, vvv)
61
+INSN_LASX(xvhsubw_w_h, vvv)
62
+INSN_LASX(xvhsubw_d_w, vvv)
63
+INSN_LASX(xvhsubw_q_d, vvv)
64
+INSN_LASX(xvhsubw_hu_bu, vvv)
65
+INSN_LASX(xvhsubw_wu_hu, vvv)
66
+INSN_LASX(xvhsubw_du_wu, vvv)
67
+INSN_LASX(xvhsubw_qu_du, vvv)
68
+
69
INSN_LASX(xvreplgr2vr_b, vr)
70
INSN_LASX(xvreplgr2vr_h, vr)
71
INSN_LASX(xvreplgr2vr_w, vr)
72
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/loongarch/vec_helper.c
75
+++ b/target/loongarch/vec_helper.c
76
@@ -XXX,XX +XXX,XX @@
77
#include "internals.h"
78
#include "tcg/tcg.h"
79
#include "vec.h"
80
+#include "tcg/tcg-gvec-desc.h"
81
82
#define DO_ADD(a, b) (a + b)
83
#define DO_SUB(a, b) (a - b)
84
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
85
VReg *Vj = (VReg *)vj; \
86
VReg *Vk = (VReg *)vk; \
87
typedef __typeof(Vd->E1(0)) TD; \
88
+ int oprsz = simd_oprsz(desc); \
89
\
90
- for (i = 0; i < LSX_LEN/BIT; i++) { \
91
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
92
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
93
} \
94
}
95
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
96
97
void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
98
{
99
+ int i;
100
VReg *Vd = (VReg *)vd;
101
VReg *Vj = (VReg *)vj;
102
VReg *Vk = (VReg *)vk;
103
+ int oprsz = simd_oprsz(desc);
104
105
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
106
+ for (i = 0; i < oprsz / 16 ; i++) {
107
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
108
+ int128_makes64(Vk->D(2 * i)));
109
+ }
110
}
111
112
DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
113
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
114
115
void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
116
{
117
+ int i;
118
VReg *Vd = (VReg *)vd;
119
VReg *Vj = (VReg *)vj;
120
VReg *Vk = (VReg *)vk;
121
+ int oprsz = simd_oprsz(desc);
122
123
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
124
+ for (i = 0; i < oprsz / 16; i++) {
125
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
126
+ int128_makes64(Vk->D(2 * i)));
127
+ }
128
}
129
130
DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
131
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
132
133
void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
134
{
135
+ int i;
136
VReg *Vd = (VReg *)vd;
137
VReg *Vj = (VReg *)vj;
138
VReg *Vk = (VReg *)vk;
139
+ int oprsz = simd_oprsz(desc);
140
141
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
142
- int128_make64((uint64_t)Vk->D(0)));
143
+ for (i = 0; i < oprsz / 16; i ++) {
144
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
145
+ int128_make64(Vk->UD(2 * i)));
146
+ }
147
}
148
149
DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
150
@@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
151
152
void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
153
{
154
+ int i;
155
VReg *Vd = (VReg *)vd;
156
VReg *Vj = (VReg *)vj;
157
VReg *Vk = (VReg *)vk;
158
+ int oprsz = simd_oprsz(desc);
159
160
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
161
- int128_make64((uint64_t)Vk->D(0)));
162
+ for (i = 0; i < oprsz / 16; i++) {
163
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
164
+ int128_make64(Vk->UD(2 * i)));
165
+ }
166
}
167
168
#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
169
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/loongarch/insn_trans/trans_vec.c.inc
172
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
173
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
174
static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
175
gen_helper_gvec_3 *fn)
176
{
177
+ if (!check_vec(ctx, oprsz)) {
178
+ return true;
179
+ }
180
+
181
tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
182
vec_full_offset(a->vj),
183
vec_full_offset(a->vk),
184
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
185
186
static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
187
{
188
- if (!check_vec(ctx, 16)) {
189
- return true;
190
- }
191
-
192
return gen_vvv_vl(ctx, a, 16, fn);
193
}
194
195
+static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
196
+{
197
+ return gen_vvv_vl(ctx, a, 32, fn);
198
+}
199
+
200
static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
201
gen_helper_gvec_2_ptr *fn)
202
{
203
@@ -XXX,XX +XXX,XX @@ TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
204
TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
205
TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
206
207
+TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
208
+TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
209
+TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
210
+TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
211
+TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
212
+TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
213
+TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
214
+TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
215
+TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
216
+TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
217
+TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
218
+TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
219
+TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
220
+TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
221
+TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
222
+TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
223
+
224
static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
225
{
226
TCGv_vec t1, t2;
227
--
228
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
3
- XVSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
4
- XVADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-21-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 45 ++++++++
11
target/loongarch/disas.c | 43 +++++++
12
target/loongarch/vec_helper.c | 120 ++++++++++++++------
13
target/loongarch/insn_trans/trans_vec.c.inc | 41 +++++++
14
4 files changed, 215 insertions(+), 34 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv
21
xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv
22
xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv
23
24
+xvaddwev_h_b 0111 01000001 11100 ..... ..... ..... @vvv
25
+xvaddwev_w_h 0111 01000001 11101 ..... ..... ..... @vvv
26
+xvaddwev_d_w 0111 01000001 11110 ..... ..... ..... @vvv
27
+xvaddwev_q_d 0111 01000001 11111 ..... ..... ..... @vvv
28
+xvaddwod_h_b 0111 01000010 00100 ..... ..... ..... @vvv
29
+xvaddwod_w_h 0111 01000010 00101 ..... ..... ..... @vvv
30
+xvaddwod_d_w 0111 01000010 00110 ..... ..... ..... @vvv
31
+xvaddwod_q_d 0111 01000010 00111 ..... ..... ..... @vvv
32
+
33
+xvsubwev_h_b 0111 01000010 00000 ..... ..... ..... @vvv
34
+xvsubwev_w_h 0111 01000010 00001 ..... ..... ..... @vvv
35
+xvsubwev_d_w 0111 01000010 00010 ..... ..... ..... @vvv
36
+xvsubwev_q_d 0111 01000010 00011 ..... ..... ..... @vvv
37
+xvsubwod_h_b 0111 01000010 01000 ..... ..... ..... @vvv
38
+xvsubwod_w_h 0111 01000010 01001 ..... ..... ..... @vvv
39
+xvsubwod_d_w 0111 01000010 01010 ..... ..... ..... @vvv
40
+xvsubwod_q_d 0111 01000010 01011 ..... ..... ..... @vvv
41
+
42
+xvaddwev_h_bu 0111 01000010 11100 ..... ..... ..... @vvv
43
+xvaddwev_w_hu 0111 01000010 11101 ..... ..... ..... @vvv
44
+xvaddwev_d_wu 0111 01000010 11110 ..... ..... ..... @vvv
45
+xvaddwev_q_du 0111 01000010 11111 ..... ..... ..... @vvv
46
+xvaddwod_h_bu 0111 01000011 00100 ..... ..... ..... @vvv
47
+xvaddwod_w_hu 0111 01000011 00101 ..... ..... ..... @vvv
48
+xvaddwod_d_wu 0111 01000011 00110 ..... ..... ..... @vvv
49
+xvaddwod_q_du 0111 01000011 00111 ..... ..... ..... @vvv
50
+
51
+xvsubwev_h_bu 0111 01000011 00000 ..... ..... ..... @vvv
52
+xvsubwev_w_hu 0111 01000011 00001 ..... ..... ..... @vvv
53
+xvsubwev_d_wu 0111 01000011 00010 ..... ..... ..... @vvv
54
+xvsubwev_q_du 0111 01000011 00011 ..... ..... ..... @vvv
55
+xvsubwod_h_bu 0111 01000011 01000 ..... ..... ..... @vvv
56
+xvsubwod_w_hu 0111 01000011 01001 ..... ..... ..... @vvv
57
+xvsubwod_d_wu 0111 01000011 01010 ..... ..... ..... @vvv
58
+xvsubwod_q_du 0111 01000011 01011 ..... ..... ..... @vvv
59
+
60
+xvaddwev_h_bu_b 0111 01000011 11100 ..... ..... ..... @vvv
61
+xvaddwev_w_hu_h 0111 01000011 11101 ..... ..... ..... @vvv
62
+xvaddwev_d_wu_w 0111 01000011 11110 ..... ..... ..... @vvv
63
+xvaddwev_q_du_d 0111 01000011 11111 ..... ..... ..... @vvv
64
+xvaddwod_h_bu_b 0111 01000100 00000 ..... ..... ..... @vvv
65
+xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv
66
+xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv
67
+xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv
68
+
69
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
70
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
71
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
72
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/loongarch/disas.c
75
+++ b/target/loongarch/disas.c
76
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvhsubw_wu_hu, vvv)
77
INSN_LASX(xvhsubw_du_wu, vvv)
78
INSN_LASX(xvhsubw_qu_du, vvv)
79
80
+INSN_LASX(xvaddwev_h_b, vvv)
81
+INSN_LASX(xvaddwev_w_h, vvv)
82
+INSN_LASX(xvaddwev_d_w, vvv)
83
+INSN_LASX(xvaddwev_q_d, vvv)
84
+INSN_LASX(xvaddwod_h_b, vvv)
85
+INSN_LASX(xvaddwod_w_h, vvv)
86
+INSN_LASX(xvaddwod_d_w, vvv)
87
+INSN_LASX(xvaddwod_q_d, vvv)
88
+INSN_LASX(xvsubwev_h_b, vvv)
89
+INSN_LASX(xvsubwev_w_h, vvv)
90
+INSN_LASX(xvsubwev_d_w, vvv)
91
+INSN_LASX(xvsubwev_q_d, vvv)
92
+INSN_LASX(xvsubwod_h_b, vvv)
93
+INSN_LASX(xvsubwod_w_h, vvv)
94
+INSN_LASX(xvsubwod_d_w, vvv)
95
+INSN_LASX(xvsubwod_q_d, vvv)
96
+
97
+INSN_LASX(xvaddwev_h_bu, vvv)
98
+INSN_LASX(xvaddwev_w_hu, vvv)
99
+INSN_LASX(xvaddwev_d_wu, vvv)
100
+INSN_LASX(xvaddwev_q_du, vvv)
101
+INSN_LASX(xvaddwod_h_bu, vvv)
102
+INSN_LASX(xvaddwod_w_hu, vvv)
103
+INSN_LASX(xvaddwod_d_wu, vvv)
104
+INSN_LASX(xvaddwod_q_du, vvv)
105
+INSN_LASX(xvsubwev_h_bu, vvv)
106
+INSN_LASX(xvsubwev_w_hu, vvv)
107
+INSN_LASX(xvsubwev_d_wu, vvv)
108
+INSN_LASX(xvsubwev_q_du, vvv)
109
+INSN_LASX(xvsubwod_h_bu, vvv)
110
+INSN_LASX(xvsubwod_w_hu, vvv)
111
+INSN_LASX(xvsubwod_d_wu, vvv)
112
+INSN_LASX(xvsubwod_q_du, vvv)
113
+
114
+INSN_LASX(xvaddwev_h_bu_b, vvv)
115
+INSN_LASX(xvaddwev_w_hu_h, vvv)
116
+INSN_LASX(xvaddwev_d_wu_w, vvv)
117
+INSN_LASX(xvaddwev_q_du_d, vvv)
118
+INSN_LASX(xvaddwod_h_bu_b, vvv)
119
+INSN_LASX(xvaddwod_w_hu_h, vvv)
120
+INSN_LASX(xvaddwod_d_wu_w, vvv)
121
+INSN_LASX(xvaddwod_q_du_d, vvv)
122
+
123
INSN_LASX(xvreplgr2vr_b, vr)
124
INSN_LASX(xvreplgr2vr_h, vr)
125
INSN_LASX(xvreplgr2vr_w, vr)
126
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/loongarch/vec_helper.c
129
+++ b/target/loongarch/vec_helper.c
130
@@ -XXX,XX +XXX,XX @@ void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
131
}
132
133
#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
134
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
135
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
136
{ \
137
int i; \
138
VReg *Vd = (VReg *)vd; \
139
VReg *Vj = (VReg *)vj; \
140
VReg *Vk = (VReg *)vk; \
141
typedef __typeof(Vd->E1(0)) TD; \
142
- for (i = 0; i < LSX_LEN/BIT; i++) { \
143
+ int oprsz = simd_oprsz(desc); \
144
+ \
145
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
146
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
147
} \
148
}
149
150
#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \
151
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
152
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
153
{ \
154
int i; \
155
VReg *Vd = (VReg *)vd; \
156
VReg *Vj = (VReg *)vj; \
157
VReg *Vk = (VReg *)vk; \
158
typedef __typeof(Vd->E1(0)) TD; \
159
- for (i = 0; i < LSX_LEN/BIT; i++) { \
160
+ int oprsz = simd_oprsz(desc); \
161
+ \
162
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
163
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
164
} \
165
}
166
167
-void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
168
+void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
169
{
170
+ int i;
171
VReg *Vd = (VReg *)vd;
172
VReg *Vj = (VReg *)vj;
173
VReg *Vk = (VReg *)vk;
174
+ int oprsz = simd_oprsz(desc);
175
176
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
177
+ for (i = 0; i < oprsz / 16; i++) {
178
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
179
+ int128_makes64(Vk->D(2 * i)));
180
+ }
181
}
182
183
DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
184
DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
185
DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
186
187
-void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
188
+void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
189
{
190
+ int i;
191
VReg *Vd = (VReg *)vd;
192
VReg *Vj = (VReg *)vj;
193
VReg *Vk = (VReg *)vk;
194
+ int oprsz = simd_oprsz(desc);
195
196
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
197
+ for (i = 0; i < oprsz / 16; i++) {
198
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
199
+ int128_makes64(Vk->D(2 * i +1)));
200
+ }
201
}
202
203
DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
204
DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
205
DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
206
207
-void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
208
+void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
209
{
210
+ int i;
211
VReg *Vd = (VReg *)vd;
212
VReg *Vj = (VReg *)vj;
213
VReg *Vk = (VReg *)vk;
214
+ int oprsz = simd_oprsz(desc);
215
216
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
217
+ for (i = 0; i < oprsz / 16; i++) {
218
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
219
+ int128_makes64(Vk->D(2 * i)));
220
+ }
221
}
222
223
DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
224
DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
225
DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
226
227
-void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
228
+void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
229
{
230
+ int i;
231
VReg *Vd = (VReg *)vd;
232
VReg *Vj = (VReg *)vj;
233
VReg *Vk = (VReg *)vk;
234
+ int oprsz = simd_oprsz(desc);
235
236
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
237
+ for (i = 0; i < oprsz / 16; i++) {
238
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
239
+ int128_makes64(Vk->D(2 * i + 1)));
240
+ }
241
}
242
243
DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
244
DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
245
DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
246
247
-void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
248
+void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
249
{
250
+ int i;
251
VReg *Vd = (VReg *)vd;
252
VReg *Vj = (VReg *)vj;
253
VReg *Vk = (VReg *)vk;
254
+ int oprsz = simd_oprsz(desc);
255
256
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
257
- int128_make64((uint64_t)Vk->D(0)));
258
+ for (i = 0; i < oprsz / 16; i++) {
259
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
260
+ int128_make64(Vk->UD(2 * i)));
261
+ }
262
}
263
264
DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
265
DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
266
DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
267
268
-void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
269
+void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
270
{
271
+ int i;
272
VReg *Vd = (VReg *)vd;
273
VReg *Vj = (VReg *)vj;
274
VReg *Vk = (VReg *)vk;
275
+ int oprsz = simd_oprsz(desc);
276
277
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
278
- int128_make64((uint64_t)Vk->D(1)));
279
+ for (i = 0; i < oprsz / 16; i++) {
280
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
281
+ int128_make64(Vk->UD(2 * i + 1)));
282
+ }
283
}
284
285
DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
286
DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
287
DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
288
289
-void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
290
+void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
291
{
292
+ int i;
293
VReg *Vd = (VReg *)vd;
294
VReg *Vj = (VReg *)vj;
295
VReg *Vk = (VReg *)vk;
296
+ int oprsz = simd_oprsz(desc);
297
298
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)),
299
- int128_make64((uint64_t)Vk->D(0)));
300
+ for (i = 0; i < oprsz / 16; i++) {
301
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
302
+ int128_make64(Vk->UD(2 * i)));
303
+ }
304
}
305
306
DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
307
DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
308
DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
309
310
-void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
311
+void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
312
{
313
+ int i;
314
VReg *Vd = (VReg *)vd;
315
VReg *Vj = (VReg *)vj;
316
VReg *Vk = (VReg *)vk;
317
+ int oprsz = simd_oprsz(desc);
318
319
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
320
- int128_make64((uint64_t)Vk->D(1)));
321
+ for (i = 0; i < oprsz / 16; i++) {
322
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
323
+ int128_make64(Vk->UD(2 * i + 1)));
324
+ }
325
}
326
327
DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
328
@@ -XXX,XX +XXX,XX @@ DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
329
DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
330
331
#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
332
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
333
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
334
{ \
335
int i; \
336
VReg *Vd = (VReg *)vd; \
337
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
338
VReg *Vk = (VReg *)vk; \
339
typedef __typeof(Vd->ES1(0)) TDS; \
340
typedef __typeof(Vd->EU1(0)) TDU; \
341
- for (i = 0; i < LSX_LEN/BIT; i++) { \
342
+ int oprsz = simd_oprsz(desc); \
343
+ \
344
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
345
Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
346
} \
347
}
348
349
#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
350
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
351
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
352
{ \
353
int i; \
354
VReg *Vd = (VReg *)vd; \
355
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
356
VReg *Vk = (VReg *)vk; \
357
typedef __typeof(Vd->ES1(0)) TDS; \
358
typedef __typeof(Vd->EU1(0)) TDU; \
359
- for (i = 0; i < LSX_LEN/BIT; i++) { \
360
+ int oprsz = simd_oprsz(desc); \
361
+ \
362
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
363
Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
364
} \
365
}
366
367
-void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
368
+void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
369
{
370
+ int i;
371
VReg *Vd = (VReg *)vd;
372
VReg *Vj = (VReg *)vj;
373
VReg *Vk = (VReg *)vk;
374
+ int oprsz = simd_oprsz(desc);
375
376
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
377
- int128_makes64(Vk->D(0)));
378
+ for (i = 0; i < oprsz / 16; i++) {
379
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
380
+ int128_makes64(Vk->D(2 * i)));
381
+ }
382
}
383
384
DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
385
DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
386
DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
387
388
-void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
389
+void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
390
{
391
+ int i;
392
VReg *Vd = (VReg *)vd;
393
VReg *Vj = (VReg *)vj;
394
VReg *Vk = (VReg *)vk;
395
+ int oprsz = simd_oprsz(desc);
396
397
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
398
- int128_makes64(Vk->D(1)));
399
+ for (i = 0; i < oprsz / 16; i++) {
400
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
401
+ int128_makes64(Vk->D(2 * i + 1)));
402
+ }
403
}
404
405
DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
406
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
407
index XXXXXXX..XXXXXXX 100644
408
--- a/target/loongarch/insn_trans/trans_vec.c.inc
409
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
410
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
411
TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
412
TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
413
TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
414
+TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
415
+TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
416
+TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
417
+TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
418
419
static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
420
{
421
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
422
TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
423
TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
424
TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
425
+TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
426
+TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
427
+TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
428
+TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
429
+
430
431
static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
432
{
433
@@ -XXX,XX +XXX,XX @@ TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
434
TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
435
TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
436
TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
437
+TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
438
+TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
439
+TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
440
+TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
441
442
static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
443
{
444
@@ -XXX,XX +XXX,XX @@ TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
445
TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
446
TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
447
TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
448
+TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
449
+TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
450
+TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
451
+TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
452
453
static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
454
{
455
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
456
TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
457
TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
458
TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
459
+TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
460
+TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
461
+TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
462
+TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
463
464
static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
465
{
466
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
467
TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
468
TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
469
TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
470
+TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
471
+TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
472
+TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
473
+TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
474
475
static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
476
{
477
@@ -XXX,XX +XXX,XX @@ TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
478
TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
479
TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
480
TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
481
+TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
482
+TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
483
+TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
484
+TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
485
486
static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
487
{
488
@@ -XXX,XX +XXX,XX @@ TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
489
TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
490
TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
491
TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
492
+TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
493
+TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
494
+TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
495
+TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
496
497
static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
498
{
499
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
500
TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
501
TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
502
TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
503
+TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
504
+TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
505
+TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
506
+TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
507
508
static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
509
{
510
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
511
TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
512
TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
513
TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
514
+TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
515
+TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
516
+TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
517
+TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
518
519
static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
520
void (*gen_shr_vec)(unsigned, TCGv_vec,
521
--
522
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVAVG.{B/H/W/D/}[U];
3
- XVAVGR.{B/H/W/D}[U].
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-22-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 17 ++++++++++++++++
10
target/loongarch/disas.c | 17 ++++++++++++++++
11
target/loongarch/vec_helper.c | 22 +++++++++++----------
12
target/loongarch/insn_trans/trans_vec.c.inc | 16 +++++++++++++++
13
4 files changed, 62 insertions(+), 10 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv
20
xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv
21
xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv
22
23
+xvavg_b 0111 01000110 01000 ..... ..... ..... @vvv
24
+xvavg_h 0111 01000110 01001 ..... ..... ..... @vvv
25
+xvavg_w 0111 01000110 01010 ..... ..... ..... @vvv
26
+xvavg_d 0111 01000110 01011 ..... ..... ..... @vvv
27
+xvavg_bu 0111 01000110 01100 ..... ..... ..... @vvv
28
+xvavg_hu 0111 01000110 01101 ..... ..... ..... @vvv
29
+xvavg_wu 0111 01000110 01110 ..... ..... ..... @vvv
30
+xvavg_du 0111 01000110 01111 ..... ..... ..... @vvv
31
+xvavgr_b 0111 01000110 10000 ..... ..... ..... @vvv
32
+xvavgr_h 0111 01000110 10001 ..... ..... ..... @vvv
33
+xvavgr_w 0111 01000110 10010 ..... ..... ..... @vvv
34
+xvavgr_d 0111 01000110 10011 ..... ..... ..... @vvv
35
+xvavgr_bu 0111 01000110 10100 ..... ..... ..... @vvv
36
+xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv
37
+xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv
38
+xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv
39
+
40
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
41
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
42
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
43
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/loongarch/disas.c
46
+++ b/target/loongarch/disas.c
47
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvaddwod_w_hu_h, vvv)
48
INSN_LASX(xvaddwod_d_wu_w, vvv)
49
INSN_LASX(xvaddwod_q_du_d, vvv)
50
51
+INSN_LASX(xvavg_b, vvv)
52
+INSN_LASX(xvavg_h, vvv)
53
+INSN_LASX(xvavg_w, vvv)
54
+INSN_LASX(xvavg_d, vvv)
55
+INSN_LASX(xvavg_bu, vvv)
56
+INSN_LASX(xvavg_hu, vvv)
57
+INSN_LASX(xvavg_wu, vvv)
58
+INSN_LASX(xvavg_du, vvv)
59
+INSN_LASX(xvavgr_b, vvv)
60
+INSN_LASX(xvavgr_h, vvv)
61
+INSN_LASX(xvavgr_w, vvv)
62
+INSN_LASX(xvavgr_d, vvv)
63
+INSN_LASX(xvavgr_bu, vvv)
64
+INSN_LASX(xvavgr_hu, vvv)
65
+INSN_LASX(xvavgr_wu, vvv)
66
+INSN_LASX(xvavgr_du, vvv)
67
+
68
INSN_LASX(xvreplgr2vr_b, vr)
69
INSN_LASX(xvreplgr2vr_h, vr)
70
INSN_LASX(xvreplgr2vr_w, vr)
71
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/loongarch/vec_helper.c
74
+++ b/target/loongarch/vec_helper.c
75
@@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
76
#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
77
#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
78
79
-#define DO_3OP(NAME, BIT, E, DO_OP) \
80
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
81
-{ \
82
- int i; \
83
- VReg *Vd = (VReg *)vd; \
84
- VReg *Vj = (VReg *)vj; \
85
- VReg *Vk = (VReg *)vk; \
86
- for (i = 0; i < LSX_LEN/BIT; i++) { \
87
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
88
- } \
89
+#define DO_3OP(NAME, BIT, E, DO_OP) \
90
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
91
+{ \
92
+ int i; \
93
+ VReg *Vd = (VReg *)vd; \
94
+ VReg *Vj = (VReg *)vj; \
95
+ VReg *Vk = (VReg *)vk; \
96
+ int oprsz = simd_oprsz(desc); \
97
+ \
98
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
99
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
100
+ } \
101
}
102
103
DO_3OP(vavg_b, 8, B, DO_VAVG)
104
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/loongarch/insn_trans/trans_vec.c.inc
107
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
108
@@ -XXX,XX +XXX,XX @@ TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
109
TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
110
TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
111
TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
112
+TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
113
+TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
114
+TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
115
+TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
116
+TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
117
+TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
118
+TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
119
+TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
120
121
static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
122
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
123
@@ -XXX,XX +XXX,XX @@ TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
124
TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
125
TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
126
TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
127
+TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
128
+TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
129
+TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
130
+TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
131
+TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
132
+TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
133
+TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
134
+TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
135
136
static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
137
{
138
--
139
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVABSD.{B/H/W/D}[U].
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-23-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 9 +++++++++
9
target/loongarch/disas.c | 9 +++++++++
10
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++
11
3 files changed, 26 insertions(+)
12
13
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/insns.decode
16
+++ b/target/loongarch/insns.decode
17
@@ -XXX,XX +XXX,XX @@ xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv
18
xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv
19
xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv
20
21
+xvabsd_b 0111 01000110 00000 ..... ..... ..... @vvv
22
+xvabsd_h 0111 01000110 00001 ..... ..... ..... @vvv
23
+xvabsd_w 0111 01000110 00010 ..... ..... ..... @vvv
24
+xvabsd_d 0111 01000110 00011 ..... ..... ..... @vvv
25
+xvabsd_bu 0111 01000110 00100 ..... ..... ..... @vvv
26
+xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv
27
+xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv
28
+xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv
29
+
30
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
31
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
32
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
33
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/loongarch/disas.c
36
+++ b/target/loongarch/disas.c
37
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvavgr_hu, vvv)
38
INSN_LASX(xvavgr_wu, vvv)
39
INSN_LASX(xvavgr_du, vvv)
40
41
+INSN_LASX(xvabsd_b, vvv)
42
+INSN_LASX(xvabsd_h, vvv)
43
+INSN_LASX(xvabsd_w, vvv)
44
+INSN_LASX(xvabsd_d, vvv)
45
+INSN_LASX(xvabsd_bu, vvv)
46
+INSN_LASX(xvabsd_hu, vvv)
47
+INSN_LASX(xvabsd_wu, vvv)
48
+INSN_LASX(xvabsd_du, vvv)
49
+
50
INSN_LASX(xvreplgr2vr_b, vr)
51
INSN_LASX(xvreplgr2vr_h, vr)
52
INSN_LASX(xvreplgr2vr_w, vr)
53
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/loongarch/insn_trans/trans_vec.c.inc
56
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
57
@@ -XXX,XX +XXX,XX @@ TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
58
TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
59
TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
60
TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
61
+TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
62
+TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
63
+TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
64
+TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
65
+TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
66
+TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
67
+TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
68
+TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
69
70
static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
71
{
72
--
73
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVADDA.{B/H/W/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-24-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 ++++
9
target/loongarch/disas.c | 5 ++++
10
target/loongarch/vec_helper.c | 30 +++++++++++----------
11
target/loongarch/insn_trans/trans_vec.c.inc | 4 +++
12
4 files changed, 30 insertions(+), 14 deletions(-)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv
19
xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv
20
xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv
21
22
+xvadda_b 0111 01000101 11000 ..... ..... ..... @vvv
23
+xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv
24
+xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv
25
+xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv
26
+
27
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
28
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
29
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
30
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/loongarch/disas.c
33
+++ b/target/loongarch/disas.c
34
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvabsd_hu, vvv)
35
INSN_LASX(xvabsd_wu, vvv)
36
INSN_LASX(xvabsd_du, vvv)
37
38
+INSN_LASX(xvadda_b, vvv)
39
+INSN_LASX(xvadda_h, vvv)
40
+INSN_LASX(xvadda_w, vvv)
41
+INSN_LASX(xvadda_d, vvv)
42
+
43
INSN_LASX(xvreplgr2vr_b, vr)
44
INSN_LASX(xvreplgr2vr_h, vr)
45
INSN_LASX(xvreplgr2vr_w, vr)
46
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/loongarch/vec_helper.c
49
+++ b/target/loongarch/vec_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_du, 64, UD, DO_VABSD)
51
52
#define DO_VABS(a) ((a < 0) ? (-a) : (a))
53
54
-#define DO_VADDA(NAME, BIT, E, DO_OP) \
55
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
56
-{ \
57
- int i; \
58
- VReg *Vd = (VReg *)vd; \
59
- VReg *Vj = (VReg *)vj; \
60
- VReg *Vk = (VReg *)vk; \
61
- for (i = 0; i < LSX_LEN/BIT; i++) { \
62
- Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \
63
- } \
64
+#define DO_VADDA(NAME, BIT, E) \
65
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
66
+{ \
67
+ int i; \
68
+ VReg *Vd = (VReg *)vd; \
69
+ VReg *Vj = (VReg *)vj; \
70
+ VReg *Vk = (VReg *)vk; \
71
+ int oprsz = simd_oprsz(desc); \
72
+ \
73
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
74
+ Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \
75
+ } \
76
}
77
78
-DO_VADDA(vadda_b, 8, B, DO_VABS)
79
-DO_VADDA(vadda_h, 16, H, DO_VABS)
80
-DO_VADDA(vadda_w, 32, W, DO_VABS)
81
-DO_VADDA(vadda_d, 64, D, DO_VABS)
82
+DO_VADDA(vadda_b, 8, B)
83
+DO_VADDA(vadda_h, 16, H)
84
+DO_VADDA(vadda_w, 32, W)
85
+DO_VADDA(vadda_d, 64, D)
86
87
#define DO_MIN(a, b) (a < b ? a : b)
88
#define DO_MAX(a, b) (a > b ? a : b)
89
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/loongarch/insn_trans/trans_vec.c.inc
92
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
93
@@ -XXX,XX +XXX,XX @@ TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
94
TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
95
TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
96
TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
97
+TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
98
+TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
99
+TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
100
+TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
101
102
TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
103
TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
104
--
105
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVMAX[I].{B/H/W/D}[U];
3
- XVMIN[I].{B/H/W/D}[U].
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-25-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 36 +++++++++++++++++++++
10
target/loongarch/disas.c | 34 +++++++++++++++++++
11
target/loongarch/vec_helper.c | 23 ++++++-------
12
target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++
13
4 files changed, 114 insertions(+), 11 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv
20
xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv
21
xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv
22
23
+xvmax_b 0111 01000111 00000 ..... ..... ..... @vvv
24
+xvmax_h 0111 01000111 00001 ..... ..... ..... @vvv
25
+xvmax_w 0111 01000111 00010 ..... ..... ..... @vvv
26
+xvmax_d 0111 01000111 00011 ..... ..... ..... @vvv
27
+xvmax_bu 0111 01000111 01000 ..... ..... ..... @vvv
28
+xvmax_hu 0111 01000111 01001 ..... ..... ..... @vvv
29
+xvmax_wu 0111 01000111 01010 ..... ..... ..... @vvv
30
+xvmax_du 0111 01000111 01011 ..... ..... ..... @vvv
31
+
32
+xvmaxi_b 0111 01101001 00000 ..... ..... ..... @vv_i5
33
+xvmaxi_h 0111 01101001 00001 ..... ..... ..... @vv_i5
34
+xvmaxi_w 0111 01101001 00010 ..... ..... ..... @vv_i5
35
+xvmaxi_d 0111 01101001 00011 ..... ..... ..... @vv_i5
36
+xvmaxi_bu 0111 01101001 01000 ..... ..... ..... @vv_ui5
37
+xvmaxi_hu 0111 01101001 01001 ..... ..... ..... @vv_ui5
38
+xvmaxi_wu 0111 01101001 01010 ..... ..... ..... @vv_ui5
39
+xvmaxi_du 0111 01101001 01011 ..... ..... ..... @vv_ui5
40
+
41
+xvmin_b 0111 01000111 00100 ..... ..... ..... @vvv
42
+xvmin_h 0111 01000111 00101 ..... ..... ..... @vvv
43
+xvmin_w 0111 01000111 00110 ..... ..... ..... @vvv
44
+xvmin_d 0111 01000111 00111 ..... ..... ..... @vvv
45
+xvmin_bu 0111 01000111 01100 ..... ..... ..... @vvv
46
+xvmin_hu 0111 01000111 01101 ..... ..... ..... @vvv
47
+xvmin_wu 0111 01000111 01110 ..... ..... ..... @vvv
48
+xvmin_du 0111 01000111 01111 ..... ..... ..... @vvv
49
+
50
+xvmini_b 0111 01101001 00100 ..... ..... ..... @vv_i5
51
+xvmini_h 0111 01101001 00101 ..... ..... ..... @vv_i5
52
+xvmini_w 0111 01101001 00110 ..... ..... ..... @vv_i5
53
+xvmini_d 0111 01101001 00111 ..... ..... ..... @vv_i5
54
+xvmini_bu 0111 01101001 01100 ..... ..... ..... @vv_ui5
55
+xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5
56
+xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5
57
+xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5
58
+
59
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
60
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
61
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
62
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/disas.c
65
+++ b/target/loongarch/disas.c
66
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvadda_h, vvv)
67
INSN_LASX(xvadda_w, vvv)
68
INSN_LASX(xvadda_d, vvv)
69
70
+INSN_LASX(xvmax_b, vvv)
71
+INSN_LASX(xvmax_h, vvv)
72
+INSN_LASX(xvmax_w, vvv)
73
+INSN_LASX(xvmax_d, vvv)
74
+INSN_LASX(xvmin_b, vvv)
75
+INSN_LASX(xvmin_h, vvv)
76
+INSN_LASX(xvmin_w, vvv)
77
+INSN_LASX(xvmin_d, vvv)
78
+INSN_LASX(xvmax_bu, vvv)
79
+INSN_LASX(xvmax_hu, vvv)
80
+INSN_LASX(xvmax_wu, vvv)
81
+INSN_LASX(xvmax_du, vvv)
82
+INSN_LASX(xvmin_bu, vvv)
83
+INSN_LASX(xvmin_hu, vvv)
84
+INSN_LASX(xvmin_wu, vvv)
85
+INSN_LASX(xvmin_du, vvv)
86
+
87
+INSN_LASX(xvmaxi_b, vv_i)
88
+INSN_LASX(xvmaxi_h, vv_i)
89
+INSN_LASX(xvmaxi_w, vv_i)
90
+INSN_LASX(xvmaxi_d, vv_i)
91
+INSN_LASX(xvmini_b, vv_i)
92
+INSN_LASX(xvmini_h, vv_i)
93
+INSN_LASX(xvmini_w, vv_i)
94
+INSN_LASX(xvmini_d, vv_i)
95
+INSN_LASX(xvmaxi_bu, vv_i)
96
+INSN_LASX(xvmaxi_hu, vv_i)
97
+INSN_LASX(xvmaxi_wu, vv_i)
98
+INSN_LASX(xvmaxi_du, vv_i)
99
+INSN_LASX(xvmini_bu, vv_i)
100
+INSN_LASX(xvmini_hu, vv_i)
101
+INSN_LASX(xvmini_wu, vv_i)
102
+INSN_LASX(xvmini_du, vv_i)
103
+
104
INSN_LASX(xvreplgr2vr_b, vr)
105
INSN_LASX(xvreplgr2vr_h, vr)
106
INSN_LASX(xvreplgr2vr_w, vr)
107
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/loongarch/vec_helper.c
110
+++ b/target/loongarch/vec_helper.c
111
@@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_d, 64, D)
112
#define DO_MIN(a, b) (a < b ? a : b)
113
#define DO_MAX(a, b) (a > b ? a : b)
114
115
-#define VMINMAXI(NAME, BIT, E, DO_OP) \
116
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
117
-{ \
118
- int i; \
119
- VReg *Vd = (VReg *)vd; \
120
- VReg *Vj = (VReg *)vj; \
121
- typedef __typeof(Vd->E(0)) TD; \
122
- \
123
- for (i = 0; i < LSX_LEN/BIT; i++) { \
124
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
125
- } \
126
+#define VMINMAXI(NAME, BIT, E, DO_OP) \
127
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
128
+{ \
129
+ int i; \
130
+ VReg *Vd = (VReg *)vd; \
131
+ VReg *Vj = (VReg *)vj; \
132
+ typedef __typeof(Vd->E(0)) TD; \
133
+ int oprsz = simd_oprsz(desc); \
134
+ \
135
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
136
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
137
+ } \
138
}
139
140
VMINMAXI(vmini_b, 8, B, DO_MIN)
141
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
142
index XXXXXXX..XXXXXXX 100644
143
--- a/target/loongarch/insn_trans/trans_vec.c.inc
144
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
145
@@ -XXX,XX +XXX,XX @@ TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
146
TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
147
TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
148
TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
149
+TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
150
+TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
151
+TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
152
+TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
153
+TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
154
+TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
155
+TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
156
+TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
157
158
TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
159
TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
160
@@ -XXX,XX +XXX,XX @@ TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
161
TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
162
TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
163
TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
164
+TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
165
+TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
166
+TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
167
+TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
168
+TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
169
+TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
170
+TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
171
+TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
172
173
static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
174
{
175
@@ -XXX,XX +XXX,XX @@ TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
176
TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
177
TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
178
TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
179
+TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
180
+TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
181
+TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
182
+TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
183
+TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
184
+TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
185
+TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
186
+TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
187
188
static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
189
int64_t imm, uint32_t oprsz, uint32_t maxsz)
190
@@ -XXX,XX +XXX,XX @@ TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
191
TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
192
TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
193
TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
194
+TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
195
+TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
196
+TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
197
+TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
198
+TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
199
+TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
200
+TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
201
+TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
202
203
TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
204
TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
205
--
206
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVMUL.{B/H/W/D};
3
- XVMUH.{B/H/W/D}[U];
4
- XVMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
5
- XVMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
1
6
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230914022645.1151356-26-gaosong@loongson.cn>
10
---
11
target/loongarch/insns.decode | 38 +++++++
12
target/loongarch/disas.c | 38 +++++++
13
target/loongarch/vec_helper.c | 55 +++++-----
14
target/loongarch/insn_trans/trans_vec.c.inc | 113 +++++++++++++++-----
15
4 files changed, 189 insertions(+), 55 deletions(-)
16
17
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/insns.decode
20
+++ b/target/loongarch/insns.decode
21
@@ -XXX,XX +XXX,XX @@ xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5
22
xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5
23
xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5
24
25
+xvmul_b 0111 01001000 01000 ..... ..... ..... @vvv
26
+xvmul_h 0111 01001000 01001 ..... ..... ..... @vvv
27
+xvmul_w 0111 01001000 01010 ..... ..... ..... @vvv
28
+xvmul_d 0111 01001000 01011 ..... ..... ..... @vvv
29
+xvmuh_b 0111 01001000 01100 ..... ..... ..... @vvv
30
+xvmuh_h 0111 01001000 01101 ..... ..... ..... @vvv
31
+xvmuh_w 0111 01001000 01110 ..... ..... ..... @vvv
32
+xvmuh_d 0111 01001000 01111 ..... ..... ..... @vvv
33
+xvmuh_bu 0111 01001000 10000 ..... ..... ..... @vvv
34
+xvmuh_hu 0111 01001000 10001 ..... ..... ..... @vvv
35
+xvmuh_wu 0111 01001000 10010 ..... ..... ..... @vvv
36
+xvmuh_du 0111 01001000 10011 ..... ..... ..... @vvv
37
+
38
+xvmulwev_h_b 0111 01001001 00000 ..... ..... ..... @vvv
39
+xvmulwev_w_h 0111 01001001 00001 ..... ..... ..... @vvv
40
+xvmulwev_d_w 0111 01001001 00010 ..... ..... ..... @vvv
41
+xvmulwev_q_d 0111 01001001 00011 ..... ..... ..... @vvv
42
+xvmulwod_h_b 0111 01001001 00100 ..... ..... ..... @vvv
43
+xvmulwod_w_h 0111 01001001 00101 ..... ..... ..... @vvv
44
+xvmulwod_d_w 0111 01001001 00110 ..... ..... ..... @vvv
45
+xvmulwod_q_d 0111 01001001 00111 ..... ..... ..... @vvv
46
+xvmulwev_h_bu 0111 01001001 10000 ..... ..... ..... @vvv
47
+xvmulwev_w_hu 0111 01001001 10001 ..... ..... ..... @vvv
48
+xvmulwev_d_wu 0111 01001001 10010 ..... ..... ..... @vvv
49
+xvmulwev_q_du 0111 01001001 10011 ..... ..... ..... @vvv
50
+xvmulwod_h_bu 0111 01001001 10100 ..... ..... ..... @vvv
51
+xvmulwod_w_hu 0111 01001001 10101 ..... ..... ..... @vvv
52
+xvmulwod_d_wu 0111 01001001 10110 ..... ..... ..... @vvv
53
+xvmulwod_q_du 0111 01001001 10111 ..... ..... ..... @vvv
54
+xvmulwev_h_bu_b 0111 01001010 00000 ..... ..... ..... @vvv
55
+xvmulwev_w_hu_h 0111 01001010 00001 ..... ..... ..... @vvv
56
+xvmulwev_d_wu_w 0111 01001010 00010 ..... ..... ..... @vvv
57
+xvmulwev_q_du_d 0111 01001010 00011 ..... ..... ..... @vvv
58
+xvmulwod_h_bu_b 0111 01001010 00100 ..... ..... ..... @vvv
59
+xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv
60
+xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv
61
+xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv
62
+
63
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
64
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
65
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
66
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/loongarch/disas.c
69
+++ b/target/loongarch/disas.c
70
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmini_hu, vv_i)
71
INSN_LASX(xvmini_wu, vv_i)
72
INSN_LASX(xvmini_du, vv_i)
73
74
+INSN_LASX(xvmul_b, vvv)
75
+INSN_LASX(xvmul_h, vvv)
76
+INSN_LASX(xvmul_w, vvv)
77
+INSN_LASX(xvmul_d, vvv)
78
+INSN_LASX(xvmuh_b, vvv)
79
+INSN_LASX(xvmuh_h, vvv)
80
+INSN_LASX(xvmuh_w, vvv)
81
+INSN_LASX(xvmuh_d, vvv)
82
+INSN_LASX(xvmuh_bu, vvv)
83
+INSN_LASX(xvmuh_hu, vvv)
84
+INSN_LASX(xvmuh_wu, vvv)
85
+INSN_LASX(xvmuh_du, vvv)
86
+
87
+INSN_LASX(xvmulwev_h_b, vvv)
88
+INSN_LASX(xvmulwev_w_h, vvv)
89
+INSN_LASX(xvmulwev_d_w, vvv)
90
+INSN_LASX(xvmulwev_q_d, vvv)
91
+INSN_LASX(xvmulwod_h_b, vvv)
92
+INSN_LASX(xvmulwod_w_h, vvv)
93
+INSN_LASX(xvmulwod_d_w, vvv)
94
+INSN_LASX(xvmulwod_q_d, vvv)
95
+INSN_LASX(xvmulwev_h_bu, vvv)
96
+INSN_LASX(xvmulwev_w_hu, vvv)
97
+INSN_LASX(xvmulwev_d_wu, vvv)
98
+INSN_LASX(xvmulwev_q_du, vvv)
99
+INSN_LASX(xvmulwod_h_bu, vvv)
100
+INSN_LASX(xvmulwod_w_hu, vvv)
101
+INSN_LASX(xvmulwod_d_wu, vvv)
102
+INSN_LASX(xvmulwod_q_du, vvv)
103
+INSN_LASX(xvmulwev_h_bu_b, vvv)
104
+INSN_LASX(xvmulwev_w_hu_h, vvv)
105
+INSN_LASX(xvmulwev_d_wu_w, vvv)
106
+INSN_LASX(xvmulwev_q_du_d, vvv)
107
+INSN_LASX(xvmulwod_h_bu_b, vvv)
108
+INSN_LASX(xvmulwod_w_hu_h, vvv)
109
+INSN_LASX(xvmulwod_d_wu_w, vvv)
110
+INSN_LASX(xvmulwod_q_du_d, vvv)
111
+
112
INSN_LASX(xvreplgr2vr_b, vr)
113
INSN_LASX(xvreplgr2vr_h, vr)
114
INSN_LASX(xvreplgr2vr_w, vr)
115
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/loongarch/vec_helper.c
118
+++ b/target/loongarch/vec_helper.c
119
@@ -XXX,XX +XXX,XX @@ VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
120
VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
121
VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
122
123
-#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
124
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
125
-{ \
126
- int i; \
127
- VReg *Vd = (VReg *)vd; \
128
- VReg *Vj = (VReg *)vj; \
129
- VReg *Vk = (VReg *)vk; \
130
- typedef __typeof(Vd->E1(0)) T; \
131
- \
132
- for (i = 0; i < LSX_LEN/BIT; i++) { \
133
- Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
134
- } \
135
+#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
136
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
137
+{ \
138
+ int i; \
139
+ VReg *Vd = (VReg *)vd; \
140
+ VReg *Vj = (VReg *)vj; \
141
+ VReg *Vk = (VReg *)vk; \
142
+ typedef __typeof(Vd->E1(0)) T; \
143
+ int oprsz = simd_oprsz(desc); \
144
+ \
145
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
146
+ Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
147
+ } \
148
}
149
150
-void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v)
151
+void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
152
{
153
- uint64_t l, h1, h2;
154
+ int i;
155
+ uint64_t l, h;
156
VReg *Vd = (VReg *)vd;
157
VReg *Vj = (VReg *)vj;
158
VReg *Vk = (VReg *)vk;
159
+ int oprsz = simd_oprsz(desc);
160
161
- muls64(&l, &h1, Vj->D(0), Vk->D(0));
162
- muls64(&l, &h2, Vj->D(1), Vk->D(1));
163
-
164
- Vd->D(0) = h1;
165
- Vd->D(1) = h2;
166
+ for (i = 0; i < oprsz / 8; i++) {
167
+ muls64(&l, &h, Vj->D(i), Vk->D(i));
168
+ Vd->D(i) = h;
169
+ }
170
}
171
172
DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
173
DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
174
DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
175
176
-void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v)
177
+void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
178
{
179
- uint64_t l, h1, h2;
180
+ int i;
181
+ uint64_t l, h;
182
VReg *Vd = (VReg *)vd;
183
VReg *Vj = (VReg *)vj;
184
VReg *Vk = (VReg *)vk;
185
+ int oprsz = simd_oprsz(desc);
186
187
- mulu64(&l, &h1, Vj->D(0), Vk->D(0));
188
- mulu64(&l, &h2, Vj->D(1), Vk->D(1));
189
-
190
- Vd->D(0) = h1;
191
- Vd->D(1) = h2;
192
+ for (i = 0; i < oprsz / 8; i++) {
193
+ mulu64(&l, &h, Vj->D(i), Vk->D(i));
194
+ Vd->D(i) = h;
195
+ }
196
}
197
198
DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
199
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/insn_trans/trans_vec.c.inc
202
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
203
@@ -XXX,XX +XXX,XX @@ TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
204
TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
205
TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
206
TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
207
+TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
208
+TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
209
+TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
210
+TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
211
212
static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
213
{
214
@@ -XXX,XX +XXX,XX @@ TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
215
TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
216
TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
217
TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
218
+TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
219
+TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
220
+TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
221
+TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
222
223
static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
224
{
225
@@ -XXX,XX +XXX,XX @@ TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
226
TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
227
TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
228
TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
229
+TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u)
230
+TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
231
+TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
232
+TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
233
234
static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
235
{
236
@@ -XXX,XX +XXX,XX @@ static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
237
TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
238
TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
239
TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
240
+TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
241
+TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
242
+TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
243
244
static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
245
TCGv_i64 arg1, TCGv_i64 arg2)
246
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
247
tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
248
}
249
250
-#define VMUL_Q(NAME, FN, idx1, idx2) \
251
-static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
252
-{ \
253
- TCGv_i64 rh, rl, arg1, arg2; \
254
- \
255
- if (!avail_LSX(ctx)) { \
256
- return false; \
257
- } \
258
- \
259
- rh = tcg_temp_new_i64(); \
260
- rl = tcg_temp_new_i64(); \
261
- arg1 = tcg_temp_new_i64(); \
262
- arg2 = tcg_temp_new_i64(); \
263
- \
264
- get_vreg64(arg1, a->vj, idx1); \
265
- get_vreg64(arg2, a->vk, idx2); \
266
- \
267
- tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \
268
- \
269
- set_vreg64(rh, a->vd, 1); \
270
- set_vreg64(rl, a->vd, 0); \
271
- \
272
- return true; \
273
+static bool gen_vmul_q_vl(DisasContext *ctx,
274
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
275
+ void (*func)(TCGv_i64, TCGv_i64,
276
+ TCGv_i64, TCGv_i64))
277
+{
278
+ TCGv_i64 rh, rl, arg1, arg2;
279
+ int i;
280
+
281
+ if (!check_vec(ctx, oprsz)) {
282
+ return true;
283
+ }
284
+
285
+ rh = tcg_temp_new_i64();
286
+ rl = tcg_temp_new_i64();
287
+ arg1 = tcg_temp_new_i64();
288
+ arg2 = tcg_temp_new_i64();
289
+
290
+ for (i = 0; i < oprsz / 16; i++) {
291
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
292
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
293
+
294
+ func(rl, rh, arg1, arg2);
295
+
296
+ set_vreg64(rh, a->vd, 2 * i + 1);
297
+ set_vreg64(rl, a->vd, 2 * i);
298
+ }
299
+
300
+ return true;
301
+}
302
+
303
+static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
304
+ void (*func)(TCGv_i64, TCGv_i64,
305
+ TCGv_i64, TCGv_i64))
306
+{
307
+ return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
308
+}
309
+
310
+static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
311
+ void (*func)(TCGv_i64, TCGv_i64,
312
+ TCGv_i64, TCGv_i64))
313
+{
314
+ return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
315
}
316
317
-VMUL_Q(vmulwev_q_d, muls2, 0, 0)
318
-VMUL_Q(vmulwod_q_d, muls2, 1, 1)
319
-VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
320
-VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
321
-VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
322
-VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
323
+TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
324
+TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
325
+TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
326
+TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
327
+TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
328
+TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
329
+TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
330
+TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
331
+TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
332
+TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
333
+TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
334
+TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
335
336
static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
337
{
338
@@ -XXX,XX +XXX,XX @@ static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
339
TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
340
TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
341
TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
342
+TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
343
+TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
344
+TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
345
346
static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
347
{
348
@@ -XXX,XX +XXX,XX @@ static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
349
TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
350
TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
351
TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
352
+TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
353
+TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
354
+TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
355
356
static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
357
{
358
@@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
359
TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
360
TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
361
TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
362
+TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
363
+TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
364
+TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
365
366
static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
367
{
368
@@ -XXX,XX +XXX,XX @@ static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
369
TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
370
TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
371
TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
372
+TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
373
+TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
374
+TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
375
376
static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
377
{
378
@@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
379
TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
380
TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
381
TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
382
+TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
383
+TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
384
+TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
385
386
static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
387
{
388
--
389
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVMADD.{B/H/W/D};
3
- XVMSUB.{B/H/W/D};
4
- XVMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
5
- XVMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
1
6
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230914022645.1151356-27-gaosong@loongson.cn>
10
---
11
target/loongarch/insns.decode | 34 ++++++
12
target/loongarch/disas.c | 34 ++++++
13
target/loongarch/vec_helper.c | 112 +++++++++---------
14
target/loongarch/insn_trans/trans_vec.c.inc | 121 ++++++++++++++------
15
4 files changed, 212 insertions(+), 89 deletions(-)
16
17
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/insns.decode
20
+++ b/target/loongarch/insns.decode
21
@@ -XXX,XX +XXX,XX @@ xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv
22
xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv
23
xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv
24
25
+xvmadd_b 0111 01001010 10000 ..... ..... ..... @vvv
26
+xvmadd_h 0111 01001010 10001 ..... ..... ..... @vvv
27
+xvmadd_w 0111 01001010 10010 ..... ..... ..... @vvv
28
+xvmadd_d 0111 01001010 10011 ..... ..... ..... @vvv
29
+xvmsub_b 0111 01001010 10100 ..... ..... ..... @vvv
30
+xvmsub_h 0111 01001010 10101 ..... ..... ..... @vvv
31
+xvmsub_w 0111 01001010 10110 ..... ..... ..... @vvv
32
+xvmsub_d 0111 01001010 10111 ..... ..... ..... @vvv
33
+
34
+xvmaddwev_h_b 0111 01001010 11000 ..... ..... ..... @vvv
35
+xvmaddwev_w_h 0111 01001010 11001 ..... ..... ..... @vvv
36
+xvmaddwev_d_w 0111 01001010 11010 ..... ..... ..... @vvv
37
+xvmaddwev_q_d 0111 01001010 11011 ..... ..... ..... @vvv
38
+xvmaddwod_h_b 0111 01001010 11100 ..... ..... ..... @vvv
39
+xvmaddwod_w_h 0111 01001010 11101 ..... ..... ..... @vvv
40
+xvmaddwod_d_w 0111 01001010 11110 ..... ..... ..... @vvv
41
+xvmaddwod_q_d 0111 01001010 11111 ..... ..... ..... @vvv
42
+xvmaddwev_h_bu 0111 01001011 01000 ..... ..... ..... @vvv
43
+xvmaddwev_w_hu 0111 01001011 01001 ..... ..... ..... @vvv
44
+xvmaddwev_d_wu 0111 01001011 01010 ..... ..... ..... @vvv
45
+xvmaddwev_q_du 0111 01001011 01011 ..... ..... ..... @vvv
46
+xvmaddwod_h_bu 0111 01001011 01100 ..... ..... ..... @vvv
47
+xvmaddwod_w_hu 0111 01001011 01101 ..... ..... ..... @vvv
48
+xvmaddwod_d_wu 0111 01001011 01110 ..... ..... ..... @vvv
49
+xvmaddwod_q_du 0111 01001011 01111 ..... ..... ..... @vvv
50
+xvmaddwev_h_bu_b 0111 01001011 11000 ..... ..... ..... @vvv
51
+xvmaddwev_w_hu_h 0111 01001011 11001 ..... ..... ..... @vvv
52
+xvmaddwev_d_wu_w 0111 01001011 11010 ..... ..... ..... @vvv
53
+xvmaddwev_q_du_d 0111 01001011 11011 ..... ..... ..... @vvv
54
+xvmaddwod_h_bu_b 0111 01001011 11100 ..... ..... ..... @vvv
55
+xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv
56
+xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv
57
+xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv
58
+
59
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
60
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
61
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
62
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/disas.c
65
+++ b/target/loongarch/disas.c
66
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmulwod_w_hu_h, vvv)
67
INSN_LASX(xvmulwod_d_wu_w, vvv)
68
INSN_LASX(xvmulwod_q_du_d, vvv)
69
70
+INSN_LASX(xvmadd_b, vvv)
71
+INSN_LASX(xvmadd_h, vvv)
72
+INSN_LASX(xvmadd_w, vvv)
73
+INSN_LASX(xvmadd_d, vvv)
74
+INSN_LASX(xvmsub_b, vvv)
75
+INSN_LASX(xvmsub_h, vvv)
76
+INSN_LASX(xvmsub_w, vvv)
77
+INSN_LASX(xvmsub_d, vvv)
78
+
79
+INSN_LASX(xvmaddwev_h_b, vvv)
80
+INSN_LASX(xvmaddwev_w_h, vvv)
81
+INSN_LASX(xvmaddwev_d_w, vvv)
82
+INSN_LASX(xvmaddwev_q_d, vvv)
83
+INSN_LASX(xvmaddwod_h_b, vvv)
84
+INSN_LASX(xvmaddwod_w_h, vvv)
85
+INSN_LASX(xvmaddwod_d_w, vvv)
86
+INSN_LASX(xvmaddwod_q_d, vvv)
87
+INSN_LASX(xvmaddwev_h_bu, vvv)
88
+INSN_LASX(xvmaddwev_w_hu, vvv)
89
+INSN_LASX(xvmaddwev_d_wu, vvv)
90
+INSN_LASX(xvmaddwev_q_du, vvv)
91
+INSN_LASX(xvmaddwod_h_bu, vvv)
92
+INSN_LASX(xvmaddwod_w_hu, vvv)
93
+INSN_LASX(xvmaddwod_d_wu, vvv)
94
+INSN_LASX(xvmaddwod_q_du, vvv)
95
+INSN_LASX(xvmaddwev_h_bu_b, vvv)
96
+INSN_LASX(xvmaddwev_w_hu_h, vvv)
97
+INSN_LASX(xvmaddwev_d_wu_w, vvv)
98
+INSN_LASX(xvmaddwev_q_du_d, vvv)
99
+INSN_LASX(xvmaddwod_h_bu_b, vvv)
100
+INSN_LASX(xvmaddwod_w_hu_h, vvv)
101
+INSN_LASX(xvmaddwod_d_wu_w, vvv)
102
+INSN_LASX(xvmaddwod_q_du_d, vvv)
103
+
104
INSN_LASX(xvreplgr2vr_b, vr)
105
INSN_LASX(xvreplgr2vr_h, vr)
106
INSN_LASX(xvreplgr2vr_w, vr)
107
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/loongarch/vec_helper.c
110
+++ b/target/loongarch/vec_helper.c
111
@@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
112
#define DO_MADD(a, b, c) (a + b * c)
113
#define DO_MSUB(a, b, c) (a - b * c)
114
115
-#define VMADDSUB(NAME, BIT, E, DO_OP) \
116
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
117
-{ \
118
- int i; \
119
- VReg *Vd = (VReg *)vd; \
120
- VReg *Vj = (VReg *)vj; \
121
- VReg *Vk = (VReg *)vk; \
122
- for (i = 0; i < LSX_LEN/BIT; i++) { \
123
- Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
124
- } \
125
+#define VMADDSUB(NAME, BIT, E, DO_OP) \
126
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
127
+{ \
128
+ int i; \
129
+ VReg *Vd = (VReg *)vd; \
130
+ VReg *Vj = (VReg *)vj; \
131
+ VReg *Vk = (VReg *)vk; \
132
+ int oprsz = simd_oprsz(desc); \
133
+ \
134
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
135
+ Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
136
+ } \
137
}
138
139
VMADDSUB(vmadd_b, 8, B, DO_MADD)
140
@@ -XXX,XX +XXX,XX @@ VMADDSUB(vmsub_w, 32, W, DO_MSUB)
141
VMADDSUB(vmsub_d, 64, D, DO_MSUB)
142
143
#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \
144
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
145
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
146
{ \
147
int i; \
148
VReg *Vd = (VReg *)vd; \
149
VReg *Vj = (VReg *)vj; \
150
VReg *Vk = (VReg *)vk; \
151
typedef __typeof(Vd->E1(0)) TD; \
152
+ int oprsz = simd_oprsz(desc); \
153
\
154
- for (i = 0; i < LSX_LEN/BIT; i++) { \
155
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
156
Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
157
} \
158
}
159
@@ -XXX,XX +XXX,XX @@ VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
160
VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
161
VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
162
163
-#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
164
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
165
-{ \
166
- int i; \
167
- VReg *Vd = (VReg *)vd; \
168
- VReg *Vj = (VReg *)vj; \
169
- VReg *Vk = (VReg *)vk; \
170
- typedef __typeof(Vd->E1(0)) TD; \
171
- \
172
- for (i = 0; i < LSX_LEN/BIT; i++) { \
173
- Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
174
- (TD)Vk->E2(2 * i + 1)); \
175
- } \
176
+#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
177
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
178
+{ \
179
+ int i; \
180
+ VReg *Vd = (VReg *)vd; \
181
+ VReg *Vj = (VReg *)vj; \
182
+ VReg *Vk = (VReg *)vk; \
183
+ typedef __typeof(Vd->E1(0)) TD; \
184
+ int oprsz = simd_oprsz(desc); \
185
+ \
186
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
187
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
188
+ (TD)Vk->E2(2 * i + 1)); \
189
+ } \
190
}
191
192
VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
193
@@ -XXX,XX +XXX,XX @@ VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL)
194
VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL)
195
VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL)
196
197
-#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
198
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
199
-{ \
200
- int i; \
201
- VReg *Vd = (VReg *)vd; \
202
- VReg *Vj = (VReg *)vj; \
203
- VReg *Vk = (VReg *)vk; \
204
- typedef __typeof(Vd->ES1(0)) TS1; \
205
- typedef __typeof(Vd->EU1(0)) TU1; \
206
- \
207
- for (i = 0; i < LSX_LEN/BIT; i++) { \
208
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
209
- (TS1)Vk->ES2(2 * i)); \
210
- } \
211
+#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
212
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
213
+{ \
214
+ int i; \
215
+ VReg *Vd = (VReg *)vd; \
216
+ VReg *Vj = (VReg *)vj; \
217
+ VReg *Vk = (VReg *)vk; \
218
+ typedef __typeof(Vd->ES1(0)) TS1; \
219
+ typedef __typeof(Vd->EU1(0)) TU1; \
220
+ int oprsz = simd_oprsz(desc); \
221
+ \
222
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
223
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
224
+ (TS1)Vk->ES2(2 * i)); \
225
+ } \
226
}
227
228
VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
229
VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
230
VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
231
232
-#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
233
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
234
-{ \
235
- int i; \
236
- VReg *Vd = (VReg *)vd; \
237
- VReg *Vj = (VReg *)vj; \
238
- VReg *Vk = (VReg *)vk; \
239
- typedef __typeof(Vd->ES1(0)) TS1; \
240
- typedef __typeof(Vd->EU1(0)) TU1; \
241
- \
242
- for (i = 0; i < LSX_LEN/BIT; i++) { \
243
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
244
- (TS1)Vk->ES2(2 * i + 1)); \
245
- } \
246
+#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
247
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
248
+{ \
249
+ int i; \
250
+ VReg *Vd = (VReg *)vd; \
251
+ VReg *Vj = (VReg *)vj; \
252
+ VReg *Vk = (VReg *)vk; \
253
+ typedef __typeof(Vd->ES1(0)) TS1; \
254
+ typedef __typeof(Vd->EU1(0)) TU1; \
255
+ int oprsz = simd_oprsz(desc); \
256
+ \
257
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
258
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
259
+ (TS1)Vk->ES2(2 * i + 1)); \
260
+ } \
261
}
262
263
VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
264
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/loongarch/insn_trans/trans_vec.c.inc
267
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
268
@@ -XXX,XX +XXX,XX @@ TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
269
TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
270
TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
271
TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
272
+TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
273
+TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
274
+TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
275
+TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
276
277
static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
278
{
279
@@ -XXX,XX +XXX,XX @@ TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
280
TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
281
TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
282
TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
283
+TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
284
+TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
285
+TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
286
+TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
287
288
static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
289
{
290
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
291
TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
292
TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
293
TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
294
+TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
295
+TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
296
+TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
297
298
-#define VMADD_Q(NAME, FN, idx1, idx2) \
299
-static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
300
-{ \
301
- TCGv_i64 rh, rl, arg1, arg2, th, tl; \
302
- \
303
- if (!avail_LSX(ctx)) { \
304
- return false; \
305
- } \
306
- \
307
- rh = tcg_temp_new_i64(); \
308
- rl = tcg_temp_new_i64(); \
309
- arg1 = tcg_temp_new_i64(); \
310
- arg2 = tcg_temp_new_i64(); \
311
- th = tcg_temp_new_i64(); \
312
- tl = tcg_temp_new_i64(); \
313
- \
314
- get_vreg64(arg1, a->vj, idx1); \
315
- get_vreg64(arg2, a->vk, idx2); \
316
- get_vreg64(rh, a->vd, 1); \
317
- get_vreg64(rl, a->vd, 0); \
318
- \
319
- tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
320
- tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
321
- \
322
- set_vreg64(rh, a->vd, 1); \
323
- set_vreg64(rl, a->vd, 0); \
324
- \
325
- return true; \
326
-}
327
-
328
-VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
329
-VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
330
-VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
331
-VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
332
-VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
333
-VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
334
+static bool gen_vmadd_q_vl(DisasContext * ctx,
335
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
336
+ void (*func)(TCGv_i64, TCGv_i64,
337
+ TCGv_i64, TCGv_i64))
338
+{
339
+ TCGv_i64 rh, rl, arg1, arg2, th, tl;
340
+ int i;
341
+
342
+ if (!check_vec(ctx, oprsz)) {
343
+ return true;
344
+ }
345
+
346
+ rh = tcg_temp_new_i64();
347
+ rl = tcg_temp_new_i64();
348
+ arg1 = tcg_temp_new_i64();
349
+ arg2 = tcg_temp_new_i64();
350
+ th = tcg_temp_new_i64();
351
+ tl = tcg_temp_new_i64();
352
+
353
+ for (i = 0; i < oprsz / 16; i++) {
354
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
355
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
356
+ get_vreg64(rh, a->vd, 2 * i + 1);
357
+ get_vreg64(rl, a->vd, 2 * i);
358
+
359
+ func(tl, th, arg1, arg2);
360
+ tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
361
+
362
+ set_vreg64(rh, a->vd, 2 * i + 1);
363
+ set_vreg64(rl, a->vd, 2 * i);
364
+ }
365
+
366
+ return true;
367
+}
368
+
369
+static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
370
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
371
+{
372
+ return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
373
+}
374
+
375
+static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
376
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
377
+{
378
+ return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
379
+}
380
+
381
+TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
382
+TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
383
+TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
384
+TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
385
+TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
386
+TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
387
+TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
388
+TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
389
+TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
390
+TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
391
+TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
392
+TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
393
394
static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
395
{
396
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
397
TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
398
TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
399
TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
400
+TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
401
+TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
402
+TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
403
404
static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
405
{
406
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
407
TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
408
TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
409
TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
410
+TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
411
+TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
412
+TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
413
414
static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
415
{
416
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
417
TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
418
TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
419
TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
420
+TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
421
+TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
422
+TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
423
424
static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
425
{
426
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
427
TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
428
TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
429
TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
430
+TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
431
+TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
432
+TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
433
434
static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
435
{
436
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
437
TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
438
TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
439
TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
440
+TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
441
+TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
442
+TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
443
444
TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
445
TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
446
--
447
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVDIV.{B/H/W/D}[U];
3
- XVMOD.{B/H/W/D}[U].
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-28-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 17 +++++++++++++++++
10
target/loongarch/disas.c | 17 +++++++++++++++++
11
target/loongarch/vec_helper.c | 4 +++-
12
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++
13
4 files changed, 53 insertions(+), 1 deletion(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv
20
xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv
21
xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv
22
23
+xvdiv_b 0111 01001110 00000 ..... ..... ..... @vvv
24
+xvdiv_h 0111 01001110 00001 ..... ..... ..... @vvv
25
+xvdiv_w 0111 01001110 00010 ..... ..... ..... @vvv
26
+xvdiv_d 0111 01001110 00011 ..... ..... ..... @vvv
27
+xvmod_b 0111 01001110 00100 ..... ..... ..... @vvv
28
+xvmod_h 0111 01001110 00101 ..... ..... ..... @vvv
29
+xvmod_w 0111 01001110 00110 ..... ..... ..... @vvv
30
+xvmod_d 0111 01001110 00111 ..... ..... ..... @vvv
31
+xvdiv_bu 0111 01001110 01000 ..... ..... ..... @vvv
32
+xvdiv_hu 0111 01001110 01001 ..... ..... ..... @vvv
33
+xvdiv_wu 0111 01001110 01010 ..... ..... ..... @vvv
34
+xvdiv_du 0111 01001110 01011 ..... ..... ..... @vvv
35
+xvmod_bu 0111 01001110 01100 ..... ..... ..... @vvv
36
+xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv
37
+xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv
38
+xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv
39
+
40
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
41
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
42
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
43
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/loongarch/disas.c
46
+++ b/target/loongarch/disas.c
47
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmaddwod_w_hu_h, vvv)
48
INSN_LASX(xvmaddwod_d_wu_w, vvv)
49
INSN_LASX(xvmaddwod_q_du_d, vvv)
50
51
+INSN_LASX(xvdiv_b, vvv)
52
+INSN_LASX(xvdiv_h, vvv)
53
+INSN_LASX(xvdiv_w, vvv)
54
+INSN_LASX(xvdiv_d, vvv)
55
+INSN_LASX(xvdiv_bu, vvv)
56
+INSN_LASX(xvdiv_hu, vvv)
57
+INSN_LASX(xvdiv_wu, vvv)
58
+INSN_LASX(xvdiv_du, vvv)
59
+INSN_LASX(xvmod_b, vvv)
60
+INSN_LASX(xvmod_h, vvv)
61
+INSN_LASX(xvmod_w, vvv)
62
+INSN_LASX(xvmod_d, vvv)
63
+INSN_LASX(xvmod_bu, vvv)
64
+INSN_LASX(xvmod_hu, vvv)
65
+INSN_LASX(xvmod_wu, vvv)
66
+INSN_LASX(xvmod_du, vvv)
67
+
68
INSN_LASX(xvreplgr2vr_b, vr)
69
INSN_LASX(xvreplgr2vr_h, vr)
70
INSN_LASX(xvreplgr2vr_w, vr)
71
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/loongarch/vec_helper.c
74
+++ b/target/loongarch/vec_helper.c
75
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
76
VReg *Vd = (VReg *)vd; \
77
VReg *Vj = (VReg *)vj; \
78
VReg *Vk = (VReg *)vk; \
79
- for (i = 0; i < LSX_LEN/BIT; i++) { \
80
+ int oprsz = simd_oprsz(desc); \
81
+ \
82
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
83
Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
84
} \
85
}
86
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/loongarch/insn_trans/trans_vec.c.inc
89
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
90
@@ -XXX,XX +XXX,XX @@ TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
91
TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
92
TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
93
TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
94
+TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
95
+TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
96
+TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
97
+TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
98
+TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
99
+TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
100
+TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
101
+TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
102
+TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
103
+TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
104
+TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
105
+TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
106
+TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
107
+TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
108
+TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
109
+TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
110
111
static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
112
{
113
--
114
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSAT.{B/H/W/D}[U].
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-29-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 9 ++++
9
target/loongarch/disas.c | 9 ++++
10
target/loongarch/vec_helper.c | 48 +++++++++++----------
11
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++
12
4 files changed, 51 insertions(+), 23 deletions(-)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv
19
xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv
20
xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv
21
22
+xvsat_b 0111 01110010 01000 01 ... ..... ..... @vv_ui3
23
+xvsat_h 0111 01110010 01000 1 .... ..... ..... @vv_ui4
24
+xvsat_w 0111 01110010 01001 ..... ..... ..... @vv_ui5
25
+xvsat_d 0111 01110010 0101 ...... ..... ..... @vv_ui6
26
+xvsat_bu 0111 01110010 10000 01 ... ..... ..... @vv_ui3
27
+xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4
28
+xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5
29
+xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6
30
+
31
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
32
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
33
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
34
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/loongarch/disas.c
37
+++ b/target/loongarch/disas.c
38
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmod_hu, vvv)
39
INSN_LASX(xvmod_wu, vvv)
40
INSN_LASX(xvmod_du, vvv)
41
42
+INSN_LASX(xvsat_b, vv_i)
43
+INSN_LASX(xvsat_h, vv_i)
44
+INSN_LASX(xvsat_w, vv_i)
45
+INSN_LASX(xvsat_d, vv_i)
46
+INSN_LASX(xvsat_bu, vv_i)
47
+INSN_LASX(xvsat_hu, vv_i)
48
+INSN_LASX(xvsat_wu, vv_i)
49
+INSN_LASX(xvsat_du, vv_i)
50
+
51
INSN_LASX(xvreplgr2vr_b, vr)
52
INSN_LASX(xvreplgr2vr_h, vr)
53
INSN_LASX(xvreplgr2vr_w, vr)
54
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/loongarch/vec_helper.c
57
+++ b/target/loongarch/vec_helper.c
58
@@ -XXX,XX +XXX,XX @@ VDIV(vmod_hu, 16, UH, DO_REMU)
59
VDIV(vmod_wu, 32, UW, DO_REMU)
60
VDIV(vmod_du, 64, UD, DO_REMU)
61
62
-#define VSAT_S(NAME, BIT, E) \
63
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
64
-{ \
65
- int i; \
66
- VReg *Vd = (VReg *)vd; \
67
- VReg *Vj = (VReg *)vj; \
68
- typedef __typeof(Vd->E(0)) TD; \
69
- \
70
- for (i = 0; i < LSX_LEN/BIT; i++) { \
71
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
72
- Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
73
- } \
74
+#define VSAT_S(NAME, BIT, E) \
75
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
76
+{ \
77
+ int i; \
78
+ VReg *Vd = (VReg *)vd; \
79
+ VReg *Vj = (VReg *)vj; \
80
+ typedef __typeof(Vd->E(0)) TD; \
81
+ int oprsz = simd_oprsz(desc); \
82
+ \
83
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
84
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
85
+ Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
86
+ } \
87
}
88
89
VSAT_S(vsat_b, 8, B)
90
@@ -XXX,XX +XXX,XX @@ VSAT_S(vsat_h, 16, H)
91
VSAT_S(vsat_w, 32, W)
92
VSAT_S(vsat_d, 64, D)
93
94
-#define VSAT_U(NAME, BIT, E) \
95
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
96
-{ \
97
- int i; \
98
- VReg *Vd = (VReg *)vd; \
99
- VReg *Vj = (VReg *)vj; \
100
- typedef __typeof(Vd->E(0)) TD; \
101
- \
102
- for (i = 0; i < LSX_LEN/BIT; i++) { \
103
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
104
- } \
105
+#define VSAT_U(NAME, BIT, E) \
106
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
107
+{ \
108
+ int i; \
109
+ VReg *Vd = (VReg *)vd; \
110
+ VReg *Vj = (VReg *)vj; \
111
+ typedef __typeof(Vd->E(0)) TD; \
112
+ int oprsz = simd_oprsz(desc); \
113
+ \
114
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
115
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
116
+ } \
117
}
118
119
VSAT_U(vsat_bu, 8, UB)
120
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/loongarch/insn_trans/trans_vec.c.inc
123
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
124
@@ -XXX,XX +XXX,XX @@ TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
125
TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
126
TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
127
TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
128
+TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
129
+TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
130
+TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
131
+TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
132
133
static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
134
{
135
@@ -XXX,XX +XXX,XX @@ TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
136
TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
137
TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
138
TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
139
+TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
140
+TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
141
+TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
142
+TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
143
144
TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
145
TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
146
--
147
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVEXTH.{H.B/W.H/D.W/Q.D};
3
- XVEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-30-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 9 ++++++
10
target/loongarch/disas.c | 9 ++++++
11
target/loongarch/vec_helper.c | 36 ++++++++++++++-------
12
target/loongarch/insn_trans/trans_vec.c.inc | 21 +++++++++---
13
4 files changed, 59 insertions(+), 16 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4
20
xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5
21
xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6
22
23
+xvexth_h_b 0111 01101001 11101 11000 ..... ..... @vv
24
+xvexth_w_h 0111 01101001 11101 11001 ..... ..... @vv
25
+xvexth_d_w 0111 01101001 11101 11010 ..... ..... @vv
26
+xvexth_q_d 0111 01101001 11101 11011 ..... ..... @vv
27
+xvexth_hu_bu 0111 01101001 11101 11100 ..... ..... @vv
28
+xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv
29
+xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv
30
+xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv
31
+
32
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
33
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
34
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
35
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/loongarch/disas.c
38
+++ b/target/loongarch/disas.c
39
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsat_hu, vv_i)
40
INSN_LASX(xvsat_wu, vv_i)
41
INSN_LASX(xvsat_du, vv_i)
42
43
+INSN_LASX(xvexth_h_b, vv)
44
+INSN_LASX(xvexth_w_h, vv)
45
+INSN_LASX(xvexth_d_w, vv)
46
+INSN_LASX(xvexth_q_d, vv)
47
+INSN_LASX(xvexth_hu_bu, vv)
48
+INSN_LASX(xvexth_wu_hu, vv)
49
+INSN_LASX(xvexth_du_wu, vv)
50
+INSN_LASX(xvexth_qu_du, vv)
51
+
52
INSN_LASX(xvreplgr2vr_b, vr)
53
INSN_LASX(xvreplgr2vr_h, vr)
54
INSN_LASX(xvreplgr2vr_w, vr)
55
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/loongarch/vec_helper.c
58
+++ b/target/loongarch/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_hu, 16, UH)
60
VSAT_U(vsat_wu, 32, UW)
61
VSAT_U(vsat_du, 64, UD)
62
63
-#define VEXTH(NAME, BIT, E1, E2) \
64
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
65
-{ \
66
- int i; \
67
- VReg *Vd = (VReg *)vd; \
68
- VReg *Vj = (VReg *)vj; \
69
- \
70
- for (i = 0; i < LSX_LEN/BIT; i++) { \
71
- Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
72
- } \
73
+#define VEXTH(NAME, BIT, E1, E2) \
74
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
75
+{ \
76
+ int i, j, ofs; \
77
+ VReg *Vd = (VReg *)vd; \
78
+ VReg *Vj = (VReg *)vj; \
79
+ int oprsz = simd_oprsz(desc); \
80
+ \
81
+ ofs = LSX_LEN / BIT; \
82
+ for (i = 0; i < oprsz / 16; i++) { \
83
+ for (j = 0; j < ofs; j++) { \
84
+ Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
85
+ } \
86
+ } \
87
}
88
89
void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
90
{
91
+ int i;
92
VReg *Vd = (VReg *)vd;
93
VReg *Vj = (VReg *)vj;
94
+ int oprsz = simd_oprsz(desc);
95
96
- Vd->Q(0) = int128_makes64(Vj->D(1));
97
+ for (i = 0; i < oprsz / 16; i++) {
98
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
99
+ }
100
}
101
102
void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
103
{
104
+ int i;
105
VReg *Vd = (VReg *)vd;
106
VReg *Vj = (VReg *)vj;
107
+ int oprsz = simd_oprsz(desc);
108
109
- Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
110
+ for (i = 0; i < oprsz / 16; i++) {
111
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
112
+ }
113
}
114
115
VEXTH(vexth_h_b, 16, H, B)
116
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/loongarch/insn_trans/trans_vec.c.inc
119
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
120
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
121
static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
122
gen_helper_gvec_2 *fn)
123
{
124
+ if (!check_vec(ctx, oprsz)) {
125
+ return true;
126
+ }
127
+
128
tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
129
vec_full_offset(a->vj),
130
oprsz, ctx->vl / 8, 0, fn);
131
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
132
133
static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
134
{
135
- if (!check_vec(ctx, 16)) {
136
- return true;
137
- }
138
-
139
return gen_vv_vl(ctx, a, 16, fn);
140
}
141
142
+static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
143
+{
144
+ return gen_vv_vl(ctx, a, 32, fn);
145
+}
146
+
147
static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
148
gen_helper_gvec_2i *fn)
149
{
150
@@ -XXX,XX +XXX,XX @@ TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
151
TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
152
TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
153
TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
154
+TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
155
+TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
156
+TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
157
+TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
158
+TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
159
+TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
160
+TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
161
+TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
162
163
static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
164
{
165
--
166
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- VEXT2XV.{H/W/D}.B, VEXT2XV.{HU/WU/DU}.BU;
3
- VEXT2XV.{W/D}.B, VEXT2XV.{WU/DU}.HU;
4
- VEXT2XV.D.W, VEXT2XV.DU.WU.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-31-gaosong@loongson.cn>
9
---
10
target/loongarch/helper.h | 13 ++++++++++
11
target/loongarch/insns.decode | 13 ++++++++++
12
target/loongarch/disas.c | 13 ++++++++++
13
target/loongarch/vec_helper.c | 28 +++++++++++++++++++++
14
target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++++++
15
5 files changed, 80 insertions(+)
16
17
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/helper.h
20
+++ b/target/loongarch/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
37
+
38
DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/insns.decode
44
+++ b/target/loongarch/insns.decode
45
@@ -XXX,XX +XXX,XX @@ xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv
46
xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv
47
xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv
48
49
+vext2xv_h_b 0111 01101001 11110 00100 ..... ..... @vv
50
+vext2xv_w_b 0111 01101001 11110 00101 ..... ..... @vv
51
+vext2xv_d_b 0111 01101001 11110 00110 ..... ..... @vv
52
+vext2xv_w_h 0111 01101001 11110 00111 ..... ..... @vv
53
+vext2xv_d_h 0111 01101001 11110 01000 ..... ..... @vv
54
+vext2xv_d_w 0111 01101001 11110 01001 ..... ..... @vv
55
+vext2xv_hu_bu 0111 01101001 11110 01010 ..... ..... @vv
56
+vext2xv_wu_bu 0111 01101001 11110 01011 ..... ..... @vv
57
+vext2xv_du_bu 0111 01101001 11110 01100 ..... ..... @vv
58
+vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv
59
+vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv
60
+vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv
61
+
62
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
63
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
64
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
65
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/loongarch/disas.c
68
+++ b/target/loongarch/disas.c
69
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvexth_wu_hu, vv)
70
INSN_LASX(xvexth_du_wu, vv)
71
INSN_LASX(xvexth_qu_du, vv)
72
73
+INSN_LASX(vext2xv_h_b, vv)
74
+INSN_LASX(vext2xv_w_b, vv)
75
+INSN_LASX(vext2xv_d_b, vv)
76
+INSN_LASX(vext2xv_w_h, vv)
77
+INSN_LASX(vext2xv_d_h, vv)
78
+INSN_LASX(vext2xv_d_w, vv)
79
+INSN_LASX(vext2xv_hu_bu, vv)
80
+INSN_LASX(vext2xv_wu_bu, vv)
81
+INSN_LASX(vext2xv_du_bu, vv)
82
+INSN_LASX(vext2xv_wu_hu, vv)
83
+INSN_LASX(vext2xv_du_hu, vv)
84
+INSN_LASX(vext2xv_du_wu, vv)
85
+
86
INSN_LASX(xvreplgr2vr_b, vr)
87
INSN_LASX(xvreplgr2vr_h, vr)
88
INSN_LASX(xvreplgr2vr_w, vr)
89
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/loongarch/vec_helper.c
92
+++ b/target/loongarch/vec_helper.c
93
@@ -XXX,XX +XXX,XX @@ VEXTH(vexth_hu_bu, 16, UH, UB)
94
VEXTH(vexth_wu_hu, 32, UW, UH)
95
VEXTH(vexth_du_wu, 64, UD, UW)
96
97
+#define VEXT2XV(NAME, BIT, E1, E2) \
98
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
99
+{ \
100
+ int i; \
101
+ VReg temp = {}; \
102
+ VReg *Vd = (VReg *)vd; \
103
+ VReg *Vj = (VReg *)vj; \
104
+ int oprsz = simd_oprsz(desc); \
105
+ \
106
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
107
+ temp.E1(i) = Vj->E2(i); \
108
+ } \
109
+ *Vd = temp; \
110
+}
111
+
112
+VEXT2XV(vext2xv_h_b, 16, H, B)
113
+VEXT2XV(vext2xv_w_b, 32, W, B)
114
+VEXT2XV(vext2xv_d_b, 64, D, B)
115
+VEXT2XV(vext2xv_w_h, 32, W, H)
116
+VEXT2XV(vext2xv_d_h, 64, D, H)
117
+VEXT2XV(vext2xv_d_w, 64, D, W)
118
+VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
119
+VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
120
+VEXT2XV(vext2xv_du_bu, 64, UD, UB)
121
+VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
122
+VEXT2XV(vext2xv_du_hu, 64, UD, UH)
123
+VEXT2XV(vext2xv_du_wu, 64, UD, UW)
124
+
125
#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
126
127
DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
128
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/loongarch/insn_trans/trans_vec.c.inc
131
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
132
@@ -XXX,XX +XXX,XX @@ TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
133
TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
134
TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
135
136
+TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
137
+TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
138
+TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
139
+TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
140
+TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
141
+TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
142
+TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
143
+TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
144
+TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
145
+TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
146
+TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
147
+TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
148
+
149
static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
150
{
151
TCGv_vec t1, zero;
152
--
153
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSIGNCOV.{B/H/W/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-32-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 +++++
9
target/loongarch/disas.c | 5 +++++
10
target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++
11
3 files changed, 14 insertions(+)
12
13
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/insns.decode
16
+++ b/target/loongarch/insns.decode
17
@@ -XXX,XX +XXX,XX @@ vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv
18
vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv
19
vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv
20
21
+xvsigncov_b 0111 01010010 11100 ..... ..... ..... @vvv
22
+xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv
23
+xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv
24
+xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv
25
+
26
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
27
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
28
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
29
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/loongarch/disas.c
32
+++ b/target/loongarch/disas.c
33
@@ -XXX,XX +XXX,XX @@ INSN_LASX(vext2xv_wu_hu, vv)
34
INSN_LASX(vext2xv_du_hu, vv)
35
INSN_LASX(vext2xv_du_wu, vv)
36
37
+INSN_LASX(xvsigncov_b, vvv)
38
+INSN_LASX(xvsigncov_h, vvv)
39
+INSN_LASX(xvsigncov_w, vvv)
40
+INSN_LASX(xvsigncov_d, vvv)
41
+
42
INSN_LASX(xvreplgr2vr_b, vr)
43
INSN_LASX(xvreplgr2vr_h, vr)
44
INSN_LASX(xvreplgr2vr_w, vr)
45
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/loongarch/insn_trans/trans_vec.c.inc
48
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
49
@@ -XXX,XX +XXX,XX @@ TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
50
TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
51
TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
52
TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
53
+TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
54
+TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
55
+TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
56
+TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
57
58
TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
59
TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
60
--
61
2.39.1
diff view generated by jsdifflib
1
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
1
This patch includes:
2
- XVMSKLTZ.{B/H/W/D};
3
- XVMSKGEZ.B;
4
- XVMSKNZ.B.
2
5
3
As per "Loongson 3A5000/3B5000 Processor Reference Manual",
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
4
Loongson 3A5000's IPI implementation have 4 mailboxes per
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
core.
8
Message-Id: <20230914022645.1151356-33-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 7 ++
11
target/loongarch/disas.c | 7 ++
12
target/loongarch/vec_helper.c | 78 ++++++++++++++-------
13
target/loongarch/insn_trans/trans_vec.c.inc | 6 ++
14
4 files changed, 74 insertions(+), 24 deletions(-)
6
15
7
However, in 78464f023b54 ("hw/loongarch/virt: Modify ipi as
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
8
percpu device"), the number of IPI mailboxes was reduced to
17
index XXXXXXX..XXXXXXX 100644
9
one, which mismatches actual hardware.
18
--- a/target/loongarch/insns.decode
10
19
+++ b/target/loongarch/insns.decode
11
It won't affect LoongArch based system as LoongArch boot code
20
@@ -XXX,XX +XXX,XX @@ xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv
12
only uses the first mailbox, however MIPS based Loongson boot
21
xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv
13
code uses all 4 mailboxes.
22
xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv
14
23
15
Fixes Coverity CID: 1512452, 1512453
24
+xvmskltz_b 0111 01101001 11000 10000 ..... ..... @vv
16
Fixes: 78464f023b54 ("hw/loongarch/virt: Modify ipi as percpu device")
25
+xvmskltz_h 0111 01101001 11000 10001 ..... ..... @vv
17
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
26
+xvmskltz_w 0111 01101001 11000 10010 ..... ..... @vv
18
Reviewed-by: Song Gao <gaosong@loongson.cn>
27
+xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv
19
Message-Id: <20230521102307.87081-2-jiaxun.yang@flygoat.com>
28
+xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv
20
Signed-off-by: Song Gao <gaosong@loongson.cn>
29
+xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
21
---
22
hw/intc/loongarch_ipi.c | 6 +++---
23
include/hw/intc/loongarch_ipi.h | 4 +++-
24
2 files changed, 6 insertions(+), 4 deletions(-)
25
26
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/intc/loongarch_ipi.c
29
+++ b/hw/intc/loongarch_ipi.c
30
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_init(Object *obj)
31
32
static const VMStateDescription vmstate_ipi_core = {
33
.name = "ipi-single",
34
- .version_id = 1,
35
- .minimum_version_id = 1,
36
+ .version_id = 2,
37
+ .minimum_version_id = 2,
38
.fields = (VMStateField[]) {
39
VMSTATE_UINT32(status, IPICore),
40
VMSTATE_UINT32(en, IPICore),
41
VMSTATE_UINT32(set, IPICore),
42
VMSTATE_UINT32(clear, IPICore),
43
- VMSTATE_UINT32_ARRAY(buf, IPICore, 2),
44
+ VMSTATE_UINT32_ARRAY(buf, IPICore, IPI_MBX_NUM * 2),
45
VMSTATE_END_OF_LIST()
46
}
47
};
48
diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/hw/intc/loongarch_ipi.h
51
+++ b/include/hw/intc/loongarch_ipi.h
52
@@ -XXX,XX +XXX,XX @@
53
#define MAIL_SEND_OFFSET 0
54
#define ANY_SEND_OFFSET (IOCSR_ANY_SEND - IOCSR_MAIL_SEND)
55
56
+#define IPI_MBX_NUM 4
57
+
30
+
58
#define TYPE_LOONGARCH_IPI "loongarch_ipi"
31
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
59
OBJECT_DECLARE_SIMPLE_TYPE(LoongArchIPI, LOONGARCH_IPI)
32
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
60
33
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
61
@@ -XXX,XX +XXX,XX @@ typedef struct IPICore {
34
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
62
uint32_t set;
35
index XXXXXXX..XXXXXXX 100644
63
uint32_t clear;
36
--- a/target/loongarch/disas.c
64
/* 64bit buf divide into 2 32bit buf */
37
+++ b/target/loongarch/disas.c
65
- uint32_t buf[2];
38
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsigncov_h, vvv)
66
+ uint32_t buf[IPI_MBX_NUM * 2];
39
INSN_LASX(xvsigncov_w, vvv)
67
qemu_irq irq;
40
INSN_LASX(xvsigncov_d, vvv)
68
} IPICore;
41
42
+INSN_LASX(xvmskltz_b, vv)
43
+INSN_LASX(xvmskltz_h, vv)
44
+INSN_LASX(xvmskltz_w, vv)
45
+INSN_LASX(xvmskltz_d, vv)
46
+INSN_LASX(xvmskgez_b, vv)
47
+INSN_LASX(xvmsknz_b, vv)
48
+
49
INSN_LASX(xvreplgr2vr_b, vr)
50
INSN_LASX(xvreplgr2vr_h, vr)
51
INSN_LASX(xvreplgr2vr_w, vr)
52
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/loongarch/vec_helper.c
55
+++ b/target/loongarch/vec_helper.c
56
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_b(int64_t val)
57
58
void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
59
{
60
+ int i;
61
uint16_t temp = 0;
62
VReg *Vd = (VReg *)vd;
63
VReg *Vj = (VReg *)vj;
64
+ int oprsz = simd_oprsz(desc);
65
66
- temp = do_vmskltz_b(Vj->D(0));
67
- temp |= (do_vmskltz_b(Vj->D(1)) << 8);
68
- Vd->D(0) = temp;
69
- Vd->D(1) = 0;
70
+ for (i = 0; i < oprsz / 16; i++) {
71
+ temp = 0;
72
+ temp = do_vmskltz_b(Vj->D(2 * i));
73
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
74
+ Vd->D(2 * i) = temp;
75
+ Vd->D(2 * i + 1) = 0;
76
+ }
77
}
78
79
static uint64_t do_vmskltz_h(int64_t val)
80
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_h(int64_t val)
81
82
void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
83
{
84
+ int i;
85
uint16_t temp = 0;
86
VReg *Vd = (VReg *)vd;
87
VReg *Vj = (VReg *)vj;
88
+ int oprsz = simd_oprsz(desc);
89
90
- temp = do_vmskltz_h(Vj->D(0));
91
- temp |= (do_vmskltz_h(Vj->D(1)) << 4);
92
- Vd->D(0) = temp;
93
- Vd->D(1) = 0;
94
+ for (i = 0; i < oprsz / 16; i++) {
95
+ temp = 0;
96
+ temp = do_vmskltz_h(Vj->D(2 * i));
97
+ temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4);
98
+ Vd->D(2 * i) = temp;
99
+ Vd->D(2 * i + 1) = 0;
100
+ }
101
}
102
103
static uint64_t do_vmskltz_w(int64_t val)
104
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_w(int64_t val)
105
106
void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
107
{
108
+ int i;
109
uint16_t temp = 0;
110
VReg *Vd = (VReg *)vd;
111
VReg *Vj = (VReg *)vj;
112
+ int oprsz = simd_oprsz(desc);
113
114
- temp = do_vmskltz_w(Vj->D(0));
115
- temp |= (do_vmskltz_w(Vj->D(1)) << 2);
116
- Vd->D(0) = temp;
117
- Vd->D(1) = 0;
118
+ for (i = 0; i < oprsz / 16; i++) {
119
+ temp = 0;
120
+ temp = do_vmskltz_w(Vj->D(2 * i));
121
+ temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2);
122
+ Vd->D(2 * i) = temp;
123
+ Vd->D(2 * i + 1) = 0;
124
+ }
125
}
126
127
static uint64_t do_vmskltz_d(int64_t val)
128
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_d(int64_t val)
129
}
130
void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
131
{
132
+ int i;
133
uint16_t temp = 0;
134
VReg *Vd = (VReg *)vd;
135
VReg *Vj = (VReg *)vj;
136
+ int oprsz = simd_oprsz(desc);
137
138
- temp = do_vmskltz_d(Vj->D(0));
139
- temp |= (do_vmskltz_d(Vj->D(1)) << 1);
140
- Vd->D(0) = temp;
141
- Vd->D(1) = 0;
142
+ for (i = 0; i < oprsz / 16; i++) {
143
+ temp = 0;
144
+ temp = do_vmskltz_d(Vj->D(2 * i));
145
+ temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1);
146
+ Vd->D(2 * i) = temp;
147
+ Vd->D(2 * i + 1) = 0;
148
+ }
149
}
150
151
void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
152
{
153
+ int i;
154
uint16_t temp = 0;
155
VReg *Vd = (VReg *)vd;
156
VReg *Vj = (VReg *)vj;
157
+ int oprsz = simd_oprsz(desc);
158
159
- temp = do_vmskltz_b(Vj->D(0));
160
- temp |= (do_vmskltz_b(Vj->D(1)) << 8);
161
- Vd->D(0) = (uint16_t)(~temp);
162
- Vd->D(1) = 0;
163
+ for (i = 0; i < oprsz / 16; i++) {
164
+ temp = 0;
165
+ temp = do_vmskltz_b(Vj->D(2 * i));
166
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
167
+ Vd->D(2 * i) = (uint16_t)(~temp);
168
+ Vd->D(2 * i + 1) = 0;
169
+ }
170
}
171
172
static uint64_t do_vmskez_b(uint64_t a)
173
@@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskez_b(uint64_t a)
174
175
void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
176
{
177
+ int i;
178
uint16_t temp = 0;
179
VReg *Vd = (VReg *)vd;
180
VReg *Vj = (VReg *)vj;
181
+ int oprsz = simd_oprsz(desc);
182
183
- temp = do_vmskez_b(Vj->D(0));
184
- temp |= (do_vmskez_b(Vj->D(1)) << 8);
185
- Vd->D(0) = (uint16_t)(~temp);
186
- Vd->D(1) = 0;
187
+ for (i = 0; i < oprsz / 16; i++) {
188
+ temp = 0;
189
+ temp = do_vmskez_b(Vj->D(2 * i));
190
+ temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8);
191
+ Vd->D(2 * i) = (uint16_t)(~temp);
192
+ Vd->D(2 * i + 1) = 0;
193
+ }
194
}
195
196
void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
197
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
198
index XXXXXXX..XXXXXXX 100644
199
--- a/target/loongarch/insn_trans/trans_vec.c.inc
200
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
201
@@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
202
TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
203
TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
204
TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
205
+TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
206
+TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
207
+TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
208
+TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
209
+TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
210
+TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
211
212
#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
69
213
70
--
214
--
71
2.39.1
215
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVLDI.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-34-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 2 ++
9
target/loongarch/disas.c | 7 +++++++
10
target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++-------
11
3 files changed, 15 insertions(+), 7 deletions(-)
12
13
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/insns.decode
16
+++ b/target/loongarch/insns.decode
17
@@ -XXX,XX +XXX,XX @@ xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv
18
xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv
19
xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
20
21
+xvldi 0111 01111110 00 ............. ..... @v_i13
22
+
23
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
24
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
25
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
26
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/loongarch/disas.c
29
+++ b/target/loongarch/disas.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
31
return true; \
32
}
33
34
+static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
35
+{
36
+ output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
37
+}
38
+
39
static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
40
{
41
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
42
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmskltz_d, vv)
43
INSN_LASX(xvmskgez_b, vv)
44
INSN_LASX(xvmsknz_b, vv)
45
46
+INSN_LASX(xvldi, v_i)
47
+
48
INSN_LASX(xvreplgr2vr_b, vr)
49
INSN_LASX(xvreplgr2vr_h, vr)
50
INSN_LASX(xvreplgr2vr_w, vr)
51
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/loongarch/insn_trans/trans_vec.c.inc
54
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
55
@@ -XXX,XX +XXX,XX @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
56
return data;
57
}
58
59
-static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
60
+static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
61
{
62
int sel, vece;
63
uint64_t value;
64
65
- if (!avail_LSX(ctx)) {
66
- return false;
67
- }
68
-
69
- if (!check_vec(ctx, 16)) {
70
+ if (!check_vec(ctx, oprsz)) {
71
return true;
72
}
73
74
@@ -XXX,XX +XXX,XX @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
75
vece = (a->imm >> 10) & 0x3;
76
}
77
78
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
79
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
80
tcg_constant_i64(value));
81
return true;
82
}
83
84
+TRANS(vldi, LSX, gen_vldi, 16)
85
+TRANS(xvldi, LASX, gen_vldi, 32)
86
+
87
TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
88
TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
89
TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
90
--
91
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XV{AND/OR/XOR/NOR/ANDN/ORN}.V;
3
- XV{AND/OR/XOR/NOR}I.B.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-35-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 12 +++++++
10
target/loongarch/disas.c | 12 +++++++
11
target/loongarch/vec_helper.c | 4 +--
12
target/loongarch/insn_trans/trans_vec.c.inc | 38 ++++++++++++---------
13
4 files changed, 48 insertions(+), 18 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
20
21
xvldi 0111 01111110 00 ............. ..... @v_i13
22
23
+xvand_v 0111 01010010 01100 ..... ..... ..... @vvv
24
+xvor_v 0111 01010010 01101 ..... ..... ..... @vvv
25
+xvxor_v 0111 01010010 01110 ..... ..... ..... @vvv
26
+xvnor_v 0111 01010010 01111 ..... ..... ..... @vvv
27
+xvandn_v 0111 01010010 10000 ..... ..... ..... @vvv
28
+xvorn_v 0111 01010010 10001 ..... ..... ..... @vvv
29
+
30
+xvandi_b 0111 01111101 00 ........ ..... ..... @vv_ui8
31
+xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8
32
+xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8
33
+xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8
34
+
35
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
36
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
37
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
38
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/loongarch/disas.c
41
+++ b/target/loongarch/disas.c
42
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmsknz_b, vv)
43
44
INSN_LASX(xvldi, v_i)
45
46
+INSN_LASX(xvand_v, vvv)
47
+INSN_LASX(xvor_v, vvv)
48
+INSN_LASX(xvxor_v, vvv)
49
+INSN_LASX(xvnor_v, vvv)
50
+INSN_LASX(xvandn_v, vvv)
51
+INSN_LASX(xvorn_v, vvv)
52
+
53
+INSN_LASX(xvandi_b, vv_i)
54
+INSN_LASX(xvori_b, vv_i)
55
+INSN_LASX(xvxori_b, vv_i)
56
+INSN_LASX(xvnori_b, vv_i)
57
+
58
INSN_LASX(xvreplgr2vr_b, vr)
59
INSN_LASX(xvreplgr2vr_h, vr)
60
INSN_LASX(xvreplgr2vr_w, vr)
61
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/loongarch/vec_helper.c
64
+++ b/target/loongarch/vec_helper.c
65
@@ -XXX,XX +XXX,XX @@ void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
66
}
67
}
68
69
-void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
70
+void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
71
{
72
int i;
73
VReg *Vd = (VReg *)vd;
74
VReg *Vj = (VReg *)vj;
75
76
- for (i = 0; i < LSX_LEN/8; i++) {
77
+ for (i = 0; i < simd_oprsz(desc); i++) {
78
Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
79
}
80
}
81
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/loongarch/insn_trans/trans_vec.c.inc
84
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
85
@@ -XXX,XX +XXX,XX @@ static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
86
TRANS(vldi, LSX, gen_vldi, 16)
87
TRANS(xvldi, LASX, gen_vldi, 32)
88
89
-TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
90
-TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
91
-TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
92
-TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
93
-
94
-static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
95
+static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
96
{
97
uint32_t vd_ofs, vj_ofs, vk_ofs;
98
99
- if (!avail_LSX(ctx)) {
100
- return false;
101
- }
102
-
103
- if (!check_vec(ctx, 16)) {
104
+ if (!check_vec(ctx, oprsz)) {
105
return true;
106
}
107
108
@@ -XXX,XX +XXX,XX @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
109
vj_ofs = vec_full_offset(a->vj);
110
vk_ofs = vec_full_offset(a->vk);
111
112
- tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
113
+ tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
114
return true;
115
}
116
-TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
117
-TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
118
-TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
119
-TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
120
121
static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
122
{
123
@@ -XXX,XX +XXX,XX @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
124
tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
125
}
126
127
+TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
128
+TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
129
+TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
130
+TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
131
+TRANS(vandn_v, LSX, gen_vandn_v, 16)
132
+TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
133
+TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
134
+TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
135
+TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
136
TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
137
+TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
138
+TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
139
+TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
140
+TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
141
+TRANS(xvandn_v, LASX, gen_vandn_v, 32)
142
+TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
143
+TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
144
+TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
145
+TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
146
+TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
147
148
TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
149
TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
150
--
151
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSLL[I].{B/H/W/D};
3
- XVSRL[I].{B/H/W/D};
4
- XVSRA[I].{B/H/W/D};
5
- XVROTR[I].{B/H/W/D}.
1
6
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230914022645.1151356-36-gaosong@loongson.cn>
10
---
11
target/loongarch/insns.decode | 33 +++++++++++++++++++
12
target/loongarch/disas.c | 36 +++++++++++++++++++++
13
target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++
14
3 files changed, 101 insertions(+)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8
21
xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8
22
xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8
23
24
+xvsll_b 0111 01001110 10000 ..... ..... ..... @vvv
25
+xvsll_h 0111 01001110 10001 ..... ..... ..... @vvv
26
+xvsll_w 0111 01001110 10010 ..... ..... ..... @vvv
27
+xvsll_d 0111 01001110 10011 ..... ..... ..... @vvv
28
+xvslli_b 0111 01110010 11000 01 ... ..... ..... @vv_ui3
29
+xvslli_h 0111 01110010 11000 1 .... ..... ..... @vv_ui4
30
+xvslli_w 0111 01110010 11001 ..... ..... ..... @vv_ui5
31
+xvslli_d 0111 01110010 1101 ...... ..... ..... @vv_ui6
32
+xvsrl_b 0111 01001110 10100 ..... ..... ..... @vvv
33
+xvsrl_h 0111 01001110 10101 ..... ..... ..... @vvv
34
+xvsrl_w 0111 01001110 10110 ..... ..... ..... @vvv
35
+xvsrl_d 0111 01001110 10111 ..... ..... ..... @vvv
36
+xvsrli_b 0111 01110011 00000 01 ... ..... ..... @vv_ui3
37
+xvsrli_h 0111 01110011 00000 1 .... ..... ..... @vv_ui4
38
+xvsrli_w 0111 01110011 00001 ..... ..... ..... @vv_ui5
39
+xvsrli_d 0111 01110011 0001 ...... ..... ..... @vv_ui6
40
+xvsra_b 0111 01001110 11000 ..... ..... ..... @vvv
41
+xvsra_h 0111 01001110 11001 ..... ..... ..... @vvv
42
+xvsra_w 0111 01001110 11010 ..... ..... ..... @vvv
43
+xvsra_d 0111 01001110 11011 ..... ..... ..... @vvv
44
+xvsrai_b 0111 01110011 01000 01 ... ..... ..... @vv_ui3
45
+xvsrai_h 0111 01110011 01000 1 .... ..... ..... @vv_ui4
46
+xvsrai_w 0111 01110011 01001 ..... ..... ..... @vv_ui5
47
+xvsrai_d 0111 01110011 0101 ...... ..... ..... @vv_ui6
48
+xvrotr_b 0111 01001110 11100 ..... ..... ..... @vvv
49
+xvrotr_h 0111 01001110 11101 ..... ..... ..... @vvv
50
+xvrotr_w 0111 01001110 11110 ..... ..... ..... @vvv
51
+xvrotr_d 0111 01001110 11111 ..... ..... ..... @vvv
52
+xvrotri_b 0111 01101010 00000 01 ... ..... ..... @vv_ui3
53
+xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4
54
+xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5
55
+xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6
56
+
57
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
58
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
59
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
60
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/disas.c
63
+++ b/target/loongarch/disas.c
64
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvori_b, vv_i)
65
INSN_LASX(xvxori_b, vv_i)
66
INSN_LASX(xvnori_b, vv_i)
67
68
+INSN_LASX(xvsll_b, vvv)
69
+INSN_LASX(xvsll_h, vvv)
70
+INSN_LASX(xvsll_w, vvv)
71
+INSN_LASX(xvsll_d, vvv)
72
+INSN_LASX(xvslli_b, vv_i)
73
+INSN_LASX(xvslli_h, vv_i)
74
+INSN_LASX(xvslli_w, vv_i)
75
+INSN_LASX(xvslli_d, vv_i)
76
+
77
+INSN_LASX(xvsrl_b, vvv)
78
+INSN_LASX(xvsrl_h, vvv)
79
+INSN_LASX(xvsrl_w, vvv)
80
+INSN_LASX(xvsrl_d, vvv)
81
+INSN_LASX(xvsrli_b, vv_i)
82
+INSN_LASX(xvsrli_h, vv_i)
83
+INSN_LASX(xvsrli_w, vv_i)
84
+INSN_LASX(xvsrli_d, vv_i)
85
+
86
+INSN_LASX(xvsra_b, vvv)
87
+INSN_LASX(xvsra_h, vvv)
88
+INSN_LASX(xvsra_w, vvv)
89
+INSN_LASX(xvsra_d, vvv)
90
+INSN_LASX(xvsrai_b, vv_i)
91
+INSN_LASX(xvsrai_h, vv_i)
92
+INSN_LASX(xvsrai_w, vv_i)
93
+INSN_LASX(xvsrai_d, vv_i)
94
+
95
+INSN_LASX(xvrotr_b, vvv)
96
+INSN_LASX(xvrotr_h, vvv)
97
+INSN_LASX(xvrotr_w, vvv)
98
+INSN_LASX(xvrotr_d, vvv)
99
+INSN_LASX(xvrotri_b, vv_i)
100
+INSN_LASX(xvrotri_h, vv_i)
101
+INSN_LASX(xvrotri_w, vv_i)
102
+INSN_LASX(xvrotri_d, vv_i)
103
+
104
INSN_LASX(xvreplgr2vr_b, vr)
105
INSN_LASX(xvreplgr2vr_h, vr)
106
INSN_LASX(xvreplgr2vr_w, vr)
107
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/loongarch/insn_trans/trans_vec.c.inc
110
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
111
@@ -XXX,XX +XXX,XX @@ TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
112
TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
113
TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
114
TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
115
+TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
116
+TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
117
+TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
118
+TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
119
+TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
120
+TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
121
+TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
122
+TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
123
124
TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
125
TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
126
@@ -XXX,XX +XXX,XX @@ TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
127
TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
128
TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
129
TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
130
+TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
131
+TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
132
+TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
133
+TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
134
+TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
135
+TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
136
+TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
137
+TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
138
139
TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
140
TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
141
@@ -XXX,XX +XXX,XX @@ TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
142
TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
143
TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
144
TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
145
+TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
146
+TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
147
+TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
148
+TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
149
+TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
150
+TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
151
+TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
152
+TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
153
154
TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
155
TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
156
@@ -XXX,XX +XXX,XX @@ TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
157
TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
158
TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
159
TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
160
+TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
161
+TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
162
+TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
163
+TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
164
+TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
165
+TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
166
+TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
167
+TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
168
169
TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
170
TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
171
--
172
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSLLWIL.{H.B/W.H/D.W};
3
- XVSLLWIL.{HU.BU/WU.HU/DU.WU};
4
- XVEXTL.Q.D, VEXTL.QU.DU.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-37-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 9 +++++
11
target/loongarch/disas.c | 9 +++++
12
target/loongarch/vec_helper.c | 45 +++++++++++++--------
13
target/loongarch/insn_trans/trans_vec.c.inc | 21 ++++++++--
14
4 files changed, 63 insertions(+), 21 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4
21
xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5
22
xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6
23
24
+xvsllwil_h_b 0111 01110000 10000 01 ... ..... ..... @vv_ui3
25
+xvsllwil_w_h 0111 01110000 10000 1 .... ..... ..... @vv_ui4
26
+xvsllwil_d_w 0111 01110000 10001 ..... ..... ..... @vv_ui5
27
+xvextl_q_d 0111 01110000 10010 00000 ..... ..... @vv
28
+xvsllwil_hu_bu 0111 01110000 11000 01 ... ..... ..... @vv_ui3
29
+xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4
30
+xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5
31
+xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv
32
+
33
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
34
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
35
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
36
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/loongarch/disas.c
39
+++ b/target/loongarch/disas.c
40
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvrotri_h, vv_i)
41
INSN_LASX(xvrotri_w, vv_i)
42
INSN_LASX(xvrotri_d, vv_i)
43
44
+INSN_LASX(xvsllwil_h_b, vv_i)
45
+INSN_LASX(xvsllwil_w_h, vv_i)
46
+INSN_LASX(xvsllwil_d_w, vv_i)
47
+INSN_LASX(xvextl_q_d, vv)
48
+INSN_LASX(xvsllwil_hu_bu, vv_i)
49
+INSN_LASX(xvsllwil_wu_hu, vv_i)
50
+INSN_LASX(xvsllwil_du_wu, vv_i)
51
+INSN_LASX(xvextl_qu_du, vv)
52
+
53
INSN_LASX(xvreplgr2vr_b, vr)
54
INSN_LASX(xvreplgr2vr_h, vr)
55
INSN_LASX(xvreplgr2vr_w, vr)
56
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/loongarch/vec_helper.c
59
+++ b/target/loongarch/vec_helper.c
60
@@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
61
}
62
}
63
64
-#define VSLLWIL(NAME, BIT, E1, E2) \
65
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
66
-{ \
67
- int i; \
68
- VReg temp; \
69
- VReg *Vd = (VReg *)vd; \
70
- VReg *Vj = (VReg *)vj; \
71
- typedef __typeof(temp.E1(0)) TD; \
72
- \
73
- temp.D(0) = 0; \
74
- temp.D(1) = 0; \
75
- for (i = 0; i < LSX_LEN/BIT; i++) { \
76
- temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
77
- } \
78
- *Vd = temp; \
79
+#define VSLLWIL(NAME, BIT, E1, E2) \
80
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
81
+{ \
82
+ int i, j, ofs; \
83
+ VReg temp = {}; \
84
+ VReg *Vd = (VReg *)vd; \
85
+ VReg *Vj = (VReg *)vj; \
86
+ int oprsz = simd_oprsz(desc); \
87
+ typedef __typeof(temp.E1(0)) TD; \
88
+ \
89
+ ofs = LSX_LEN / BIT; \
90
+ for (i = 0; i < oprsz / 16; i++) { \
91
+ for (j = 0; j < ofs; j++) { \
92
+ temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
93
+ } \
94
+ } \
95
+ *Vd = temp; \
96
}
97
98
+
99
void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
100
{
101
+ int i;
102
VReg *Vd = (VReg *)vd;
103
VReg *Vj = (VReg *)vj;
104
+ int oprsz = simd_oprsz(desc);
105
106
- Vd->Q(0) = int128_makes64(Vj->D(0));
107
+ for (i = 0; i < oprsz / 16; i++) {
108
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i));
109
+ }
110
}
111
112
void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
113
{
114
+ int i;
115
VReg *Vd = (VReg *)vd;
116
VReg *Vj = (VReg *)vj;
117
+ int oprsz = simd_oprsz(desc);
118
119
- Vd->Q(0) = int128_make64(Vj->D(0));
120
+ for (i = 0; i < oprsz / 16; i++) {
121
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i));
122
+ }
123
}
124
125
VSLLWIL(vsllwil_h_b, 16, H, B)
126
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/loongarch/insn_trans/trans_vec.c.inc
129
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
130
@@ -XXX,XX +XXX,XX @@ static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
131
static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
132
gen_helper_gvec_2i *fn)
133
{
134
+ if (!check_vec(ctx, oprsz)) {
135
+ return true;
136
+ }
137
+
138
tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
139
vec_full_offset(a->vj),
140
tcg_constant_i64(a->imm),
141
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
142
143
static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
144
{
145
- if (!check_vec(ctx, 16)) {
146
- return true;
147
- }
148
-
149
return gen_vv_i_vl(ctx, a, 16, fn);
150
}
151
152
+static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
153
+{
154
+ return gen_vv_i_vl(ctx, a, 32, fn);
155
+}
156
+
157
static bool gen_cv(DisasContext *ctx, arg_cv *a,
158
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
159
{
160
@@ -XXX,XX +XXX,XX @@ TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
161
TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
162
TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
163
TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
164
+TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
165
+TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
166
+TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
167
+TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
168
+TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
169
+TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
170
+TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
171
+TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
172
173
TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
174
TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
175
--
176
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSRLR[I].{B/H/W/D};
3
- XVSRAR[I].{B/H/W/D}.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-38-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 17 +++++++++++++++++
10
target/loongarch/disas.c | 18 ++++++++++++++++++
11
target/loongarch/vec_helper.c | 12 ++++++++----
12
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++
13
4 files changed, 59 insertions(+), 4 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4
20
xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5
21
xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv
22
23
+xvsrlr_b 0111 01001111 00000 ..... ..... ..... @vvv
24
+xvsrlr_h 0111 01001111 00001 ..... ..... ..... @vvv
25
+xvsrlr_w 0111 01001111 00010 ..... ..... ..... @vvv
26
+xvsrlr_d 0111 01001111 00011 ..... ..... ..... @vvv
27
+xvsrlri_b 0111 01101010 01000 01 ... ..... ..... @vv_ui3
28
+xvsrlri_h 0111 01101010 01000 1 .... ..... ..... @vv_ui4
29
+xvsrlri_w 0111 01101010 01001 ..... ..... ..... @vv_ui5
30
+xvsrlri_d 0111 01101010 0101 ...... ..... ..... @vv_ui6
31
+xvsrar_b 0111 01001111 00100 ..... ..... ..... @vvv
32
+xvsrar_h 0111 01001111 00101 ..... ..... ..... @vvv
33
+xvsrar_w 0111 01001111 00110 ..... ..... ..... @vvv
34
+xvsrar_d 0111 01001111 00111 ..... ..... ..... @vvv
35
+xvsrari_b 0111 01101010 10000 01 ... ..... ..... @vv_ui3
36
+xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4
37
+xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5
38
+xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6
39
+
40
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
41
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
42
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
43
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/loongarch/disas.c
46
+++ b/target/loongarch/disas.c
47
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsllwil_wu_hu, vv_i)
48
INSN_LASX(xvsllwil_du_wu, vv_i)
49
INSN_LASX(xvextl_qu_du, vv)
50
51
+INSN_LASX(xvsrlr_b, vvv)
52
+INSN_LASX(xvsrlr_h, vvv)
53
+INSN_LASX(xvsrlr_w, vvv)
54
+INSN_LASX(xvsrlr_d, vvv)
55
+INSN_LASX(xvsrlri_b, vv_i)
56
+INSN_LASX(xvsrlri_h, vv_i)
57
+INSN_LASX(xvsrlri_w, vv_i)
58
+INSN_LASX(xvsrlri_d, vv_i)
59
+
60
+INSN_LASX(xvsrar_b, vvv)
61
+INSN_LASX(xvsrar_h, vvv)
62
+INSN_LASX(xvsrar_w, vvv)
63
+INSN_LASX(xvsrar_d, vvv)
64
+INSN_LASX(xvsrari_b, vv_i)
65
+INSN_LASX(xvsrari_h, vv_i)
66
+INSN_LASX(xvsrari_w, vv_i)
67
+INSN_LASX(xvsrari_d, vv_i)
68
+
69
INSN_LASX(xvreplgr2vr_b, vr)
70
INSN_LASX(xvreplgr2vr_h, vr)
71
INSN_LASX(xvreplgr2vr_w, vr)
72
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/loongarch/vec_helper.c
75
+++ b/target/loongarch/vec_helper.c
76
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
77
VReg *Vd = (VReg *)vd; \
78
VReg *Vj = (VReg *)vj; \
79
VReg *Vk = (VReg *)vk; \
80
+ int oprsz = simd_oprsz(desc); \
81
\
82
- for (i = 0; i < LSX_LEN/BIT; i++) { \
83
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
84
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
85
} \
86
}
87
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
88
int i; \
89
VReg *Vd = (VReg *)vd; \
90
VReg *Vj = (VReg *)vj; \
91
+ int oprsz = simd_oprsz(desc); \
92
\
93
- for (i = 0; i < LSX_LEN/BIT; i++) { \
94
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
95
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
96
} \
97
}
98
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
99
VReg *Vd = (VReg *)vd; \
100
VReg *Vj = (VReg *)vj; \
101
VReg *Vk = (VReg *)vk; \
102
+ int oprsz = simd_oprsz(desc); \
103
\
104
- for (i = 0; i < LSX_LEN/BIT; i++) { \
105
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
106
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
107
} \
108
}
109
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
110
int i; \
111
VReg *Vd = (VReg *)vd; \
112
VReg *Vj = (VReg *)vj; \
113
+ int oprsz = simd_oprsz(desc); \
114
\
115
- for (i = 0; i < LSX_LEN/BIT; i++) { \
116
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
117
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
118
} \
119
}
120
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/loongarch/insn_trans/trans_vec.c.inc
123
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
124
@@ -XXX,XX +XXX,XX @@ TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
125
TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
126
TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
127
TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
128
+TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
129
+TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
130
+TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
131
+TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
132
+TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
133
+TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
134
+TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
135
+TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
136
137
TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
138
TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
139
@@ -XXX,XX +XXX,XX @@ TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
140
TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
141
TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
142
TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
143
+TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
144
+TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
145
+TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
146
+TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
147
+TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
148
+TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
149
+TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
150
+TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
151
152
TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
153
TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
154
--
155
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSRLN.{B.H/H.W/W.D};
3
- XVSRAN.{B.H/H.W/W.D};
4
- XVSRLNI.{B.H/H.W/W.D/D.Q};
5
- XVSRANI.{B.H/H.W/W.D/D.Q}.
1
6
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230914022645.1151356-39-gaosong@loongson.cn>
10
---
11
target/loongarch/insns.decode | 16 ++
12
target/loongarch/disas.c | 16 ++
13
target/loongarch/vec_helper.c | 166 +++++++++++---------
14
target/loongarch/insn_trans/trans_vec.c.inc | 14 ++
15
4 files changed, 137 insertions(+), 75 deletions(-)
16
17
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/insns.decode
20
+++ b/target/loongarch/insns.decode
21
@@ -XXX,XX +XXX,XX @@ xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4
22
xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5
23
xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6
24
25
+xvsrln_b_h 0111 01001111 01001 ..... ..... ..... @vvv
26
+xvsrln_h_w 0111 01001111 01010 ..... ..... ..... @vvv
27
+xvsrln_w_d 0111 01001111 01011 ..... ..... ..... @vvv
28
+xvsran_b_h 0111 01001111 01101 ..... ..... ..... @vvv
29
+xvsran_h_w 0111 01001111 01110 ..... ..... ..... @vvv
30
+xvsran_w_d 0111 01001111 01111 ..... ..... ..... @vvv
31
+
32
+xvsrlni_b_h 0111 01110100 00000 1 .... ..... ..... @vv_ui4
33
+xvsrlni_h_w 0111 01110100 00001 ..... ..... ..... @vv_ui5
34
+xvsrlni_w_d 0111 01110100 0001 ...... ..... ..... @vv_ui6
35
+xvsrlni_d_q 0111 01110100 001 ....... ..... ..... @vv_ui7
36
+xvsrani_b_h 0111 01110101 10000 1 .... ..... ..... @vv_ui4
37
+xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5
38
+xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6
39
+xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7
40
+
41
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
42
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
43
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
44
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/loongarch/disas.c
47
+++ b/target/loongarch/disas.c
48
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrari_h, vv_i)
49
INSN_LASX(xvsrari_w, vv_i)
50
INSN_LASX(xvsrari_d, vv_i)
51
52
+INSN_LASX(xvsrln_b_h, vvv)
53
+INSN_LASX(xvsrln_h_w, vvv)
54
+INSN_LASX(xvsrln_w_d, vvv)
55
+INSN_LASX(xvsran_b_h, vvv)
56
+INSN_LASX(xvsran_h_w, vvv)
57
+INSN_LASX(xvsran_w_d, vvv)
58
+
59
+INSN_LASX(xvsrlni_b_h, vv_i)
60
+INSN_LASX(xvsrlni_h_w, vv_i)
61
+INSN_LASX(xvsrlni_w_d, vv_i)
62
+INSN_LASX(xvsrlni_d_q, vv_i)
63
+INSN_LASX(xvsrani_b_h, vv_i)
64
+INSN_LASX(xvsrani_h_w, vv_i)
65
+INSN_LASX(xvsrani_w_d, vv_i)
66
+INSN_LASX(xvsrani_d_q, vv_i)
67
+
68
INSN_LASX(xvreplgr2vr_b, vr)
69
INSN_LASX(xvreplgr2vr_h, vr)
70
INSN_LASX(xvreplgr2vr_w, vr)
71
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/loongarch/vec_helper.c
74
+++ b/target/loongarch/vec_helper.c
75
@@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_d, 64, D)
76
77
#define R_SHIFT(a, b) (a >> b)
78
79
-#define VSRLN(NAME, BIT, T, E1, E2) \
80
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
81
-{ \
82
- int i; \
83
- VReg *Vd = (VReg *)vd; \
84
- VReg *Vj = (VReg *)vj; \
85
- VReg *Vk = (VReg *)vk; \
86
- \
87
- for (i = 0; i < LSX_LEN/BIT; i++) { \
88
- Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
89
- } \
90
- Vd->D(1) = 0; \
91
+#define VSRLN(NAME, BIT, E1, E2) \
92
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
93
+{ \
94
+ int i, j, ofs; \
95
+ VReg *Vd = (VReg *)vd; \
96
+ VReg *Vj = (VReg *)vj; \
97
+ VReg *Vk = (VReg *)vk; \
98
+ int oprsz = simd_oprsz(desc); \
99
+ \
100
+ ofs = LSX_LEN / BIT; \
101
+ for (i = 0; i < oprsz / 16; i++) { \
102
+ for (j = 0; j < ofs; j++) { \
103
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
104
+ Vk->E2(j + ofs * i) % BIT); \
105
+ } \
106
+ Vd->D(2 * i + 1) = 0; \
107
+ } \
108
}
109
110
-VSRLN(vsrln_b_h, 16, uint16_t, B, H)
111
-VSRLN(vsrln_h_w, 32, uint32_t, H, W)
112
-VSRLN(vsrln_w_d, 64, uint64_t, W, D)
113
+VSRLN(vsrln_b_h, 16, B, UH)
114
+VSRLN(vsrln_h_w, 32, H, UW)
115
+VSRLN(vsrln_w_d, 64, W, UD)
116
117
-#define VSRAN(NAME, BIT, T, E1, E2) \
118
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
119
-{ \
120
- int i; \
121
- VReg *Vd = (VReg *)vd; \
122
- VReg *Vj = (VReg *)vj; \
123
- VReg *Vk = (VReg *)vk; \
124
- \
125
- for (i = 0; i < LSX_LEN/BIT; i++) { \
126
- Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
127
- } \
128
- Vd->D(1) = 0; \
129
+#define VSRAN(NAME, BIT, E1, E2, E3) \
130
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
131
+{ \
132
+ int i, j, ofs; \
133
+ VReg *Vd = (VReg *)vd; \
134
+ VReg *Vj = (VReg *)vj; \
135
+ VReg *Vk = (VReg *)vk; \
136
+ int oprsz = simd_oprsz(desc); \
137
+ \
138
+ ofs = LSX_LEN / BIT; \
139
+ for (i = 0; i < oprsz / 16; i++) { \
140
+ for (j = 0; j < ofs; j++) { \
141
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
142
+ Vk->E3(j + ofs * i) % BIT); \
143
+ } \
144
+ Vd->D(2 * i + 1) = 0; \
145
+ } \
146
}
147
148
-VSRAN(vsran_b_h, 16, uint16_t, B, H)
149
-VSRAN(vsran_h_w, 32, uint32_t, H, W)
150
-VSRAN(vsran_w_d, 64, uint64_t, W, D)
151
+VSRAN(vsran_b_h, 16, B, H, UH)
152
+VSRAN(vsran_h_w, 32, H, W, UW)
153
+VSRAN(vsran_w_d, 64, W, D, UD)
154
155
-#define VSRLNI(NAME, BIT, T, E1, E2) \
156
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
157
-{ \
158
- int i, max; \
159
- VReg temp; \
160
- VReg *Vd = (VReg *)vd; \
161
- VReg *Vj = (VReg *)vj; \
162
- \
163
- temp.D(0) = 0; \
164
- temp.D(1) = 0; \
165
- max = LSX_LEN/BIT; \
166
- for (i = 0; i < max; i++) { \
167
- temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
168
- temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
169
- } \
170
- *Vd = temp; \
171
+#define VSRLNI(NAME, BIT, E1, E2) \
172
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
173
+{ \
174
+ int i, j, ofs; \
175
+ VReg temp = {}; \
176
+ VReg *Vd = (VReg *)vd; \
177
+ VReg *Vj = (VReg *)vj; \
178
+ int oprsz = simd_oprsz(desc); \
179
+ \
180
+ ofs = LSX_LEN / BIT; \
181
+ for (i = 0; i < oprsz / 16; i++) { \
182
+ for (j = 0; j < ofs; j++) { \
183
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
184
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
185
+ imm); \
186
+ } \
187
+ } \
188
+ *Vd = temp; \
189
}
190
191
void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
192
{
193
- VReg temp;
194
+ int i;
195
+ VReg temp = {};
196
VReg *Vd = (VReg *)vd;
197
VReg *Vj = (VReg *)vj;
198
199
- temp.D(0) = 0;
200
- temp.D(1) = 0;
201
- temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128));
202
- temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128));
203
+ for (i = 0; i < 2; i++) {
204
+ temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
205
+ temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
206
+ }
207
*Vd = temp;
208
}
209
210
-VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
211
-VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
212
-VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
213
+VSRLNI(vsrlni_b_h, 16, B, UH)
214
+VSRLNI(vsrlni_h_w, 32, H, UW)
215
+VSRLNI(vsrlni_w_d, 64, W, UD)
216
217
-#define VSRANI(NAME, BIT, E1, E2) \
218
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
219
-{ \
220
- int i, max; \
221
- VReg temp; \
222
- VReg *Vd = (VReg *)vd; \
223
- VReg *Vj = (VReg *)vj; \
224
- \
225
- temp.D(0) = 0; \
226
- temp.D(1) = 0; \
227
- max = LSX_LEN/BIT; \
228
- for (i = 0; i < max; i++) { \
229
- temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
230
- temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
231
- } \
232
- *Vd = temp; \
233
+#define VSRANI(NAME, BIT, E1, E2) \
234
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
235
+{ \
236
+ int i, j, ofs; \
237
+ VReg temp = {}; \
238
+ VReg *Vd = (VReg *)vd; \
239
+ VReg *Vj = (VReg *)vj; \
240
+ int oprsz = simd_oprsz(desc); \
241
+ \
242
+ ofs = LSX_LEN / BIT; \
243
+ for (i = 0; i < oprsz / 16; i++) { \
244
+ for (j = 0; j < ofs; j++) { \
245
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
246
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
247
+ imm); \
248
+ } \
249
+ } \
250
+ *Vd = temp; \
251
}
252
253
void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
254
{
255
- VReg temp;
256
+ int i;
257
+ VReg temp = {};
258
VReg *Vd = (VReg *)vd;
259
VReg *Vj = (VReg *)vj;
260
261
- temp.D(0) = 0;
262
- temp.D(1) = 0;
263
- temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128));
264
- temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128));
265
+ for (i = 0; i < 2; i++) {
266
+ temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128));
267
+ temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128));
268
+ }
269
*Vd = temp;
270
}
271
272
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
273
index XXXXXXX..XXXXXXX 100644
274
--- a/target/loongarch/insn_trans/trans_vec.c.inc
275
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
276
@@ -XXX,XX +XXX,XX @@ TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
277
TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
278
TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
279
TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
280
+TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
281
+TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
282
+TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
283
+TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
284
+TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
285
+TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
286
287
TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
288
TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
289
@@ -XXX,XX +XXX,XX @@ TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
290
TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
291
TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
292
TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
293
+TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
294
+TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
295
+TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
296
+TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
297
+TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
298
+TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
299
+TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
300
+TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
301
302
TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
303
TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
304
--
305
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSRLRN.{B.H/H.W/W.D};
3
- XVSRARN.{B.H/H.W/W.D};
4
- XVSRLRNI.{B.H/H.W/W.D/D.Q};
5
- XVSRARNI.{B.H/H.W/W.D/D.Q}.
1
6
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230914022645.1151356-40-gaosong@loongson.cn>
10
---
11
target/loongarch/insns.decode | 16 ++
12
target/loongarch/disas.c | 16 ++
13
target/loongarch/vec_helper.c | 198 +++++++++++---------
14
target/loongarch/insn_trans/trans_vec.c.inc | 14 ++
15
4 files changed, 159 insertions(+), 85 deletions(-)
16
17
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/insns.decode
20
+++ b/target/loongarch/insns.decode
21
@@ -XXX,XX +XXX,XX @@ xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5
22
xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6
23
xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7
24
25
+xvsrlrn_b_h 0111 01001111 10001 ..... ..... ..... @vvv
26
+xvsrlrn_h_w 0111 01001111 10010 ..... ..... ..... @vvv
27
+xvsrlrn_w_d 0111 01001111 10011 ..... ..... ..... @vvv
28
+xvsrarn_b_h 0111 01001111 10101 ..... ..... ..... @vvv
29
+xvsrarn_h_w 0111 01001111 10110 ..... ..... ..... @vvv
30
+xvsrarn_w_d 0111 01001111 10111 ..... ..... ..... @vvv
31
+
32
+xvsrlrni_b_h 0111 01110100 01000 1 .... ..... ..... @vv_ui4
33
+xvsrlrni_h_w 0111 01110100 01001 ..... ..... ..... @vv_ui5
34
+xvsrlrni_w_d 0111 01110100 0101 ...... ..... ..... @vv_ui6
35
+xvsrlrni_d_q 0111 01110100 011 ....... ..... ..... @vv_ui7
36
+xvsrarni_b_h 0111 01110101 11000 1 .... ..... ..... @vv_ui4
37
+xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5
38
+xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6
39
+xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7
40
+
41
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
42
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
43
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
44
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/loongarch/disas.c
47
+++ b/target/loongarch/disas.c
48
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrani_h_w, vv_i)
49
INSN_LASX(xvsrani_w_d, vv_i)
50
INSN_LASX(xvsrani_d_q, vv_i)
51
52
+INSN_LASX(xvsrlrn_b_h, vvv)
53
+INSN_LASX(xvsrlrn_h_w, vvv)
54
+INSN_LASX(xvsrlrn_w_d, vvv)
55
+INSN_LASX(xvsrarn_b_h, vvv)
56
+INSN_LASX(xvsrarn_h_w, vvv)
57
+INSN_LASX(xvsrarn_w_d, vvv)
58
+
59
+INSN_LASX(xvsrlrni_b_h, vv_i)
60
+INSN_LASX(xvsrlrni_h_w, vv_i)
61
+INSN_LASX(xvsrlrni_w_d, vv_i)
62
+INSN_LASX(xvsrlrni_d_q, vv_i)
63
+INSN_LASX(xvsrarni_b_h, vv_i)
64
+INSN_LASX(xvsrarni_h_w, vv_i)
65
+INSN_LASX(xvsrarni_w_d, vv_i)
66
+INSN_LASX(xvsrarni_d_q, vv_i)
67
+
68
INSN_LASX(xvreplgr2vr_b, vr)
69
INSN_LASX(xvreplgr2vr_h, vr)
70
INSN_LASX(xvreplgr2vr_w, vr)
71
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/loongarch/vec_helper.c
74
+++ b/target/loongarch/vec_helper.c
75
@@ -XXX,XX +XXX,XX @@ VSRANI(vsrani_b_h, 16, B, H)
76
VSRANI(vsrani_h_w, 32, H, W)
77
VSRANI(vsrani_w_d, 64, W, D)
78
79
-#define VSRLRN(NAME, BIT, T, E1, E2) \
80
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
81
-{ \
82
- int i; \
83
- VReg *Vd = (VReg *)vd; \
84
- VReg *Vj = (VReg *)vj; \
85
- VReg *Vk = (VReg *)vk; \
86
- \
87
- for (i = 0; i < LSX_LEN/BIT; i++) { \
88
- Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
89
- } \
90
- Vd->D(1) = 0; \
91
+#define VSRLRN(NAME, BIT, E1, E2, E3) \
92
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
93
+{ \
94
+ int i, j, ofs; \
95
+ VReg *Vd = (VReg *)vd; \
96
+ VReg *Vj = (VReg *)vj; \
97
+ VReg *Vk = (VReg *)vk; \
98
+ int oprsz = simd_oprsz(desc); \
99
+ \
100
+ ofs = LSX_LEN / BIT; \
101
+ for (i = 0; i < oprsz / 16; i++) { \
102
+ for (j = 0; j < ofs; j++) { \
103
+ Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \
104
+ Vk->E3(j + ofs * i) % BIT); \
105
+ } \
106
+ Vd->D(2 * i + 1) = 0; \
107
+ } \
108
}
109
110
-VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
111
-VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
112
-VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
113
+VSRLRN(vsrlrn_b_h, 16, B, H, UH)
114
+VSRLRN(vsrlrn_h_w, 32, H, W, UW)
115
+VSRLRN(vsrlrn_w_d, 64, W, D, UD)
116
117
-#define VSRARN(NAME, BIT, T, E1, E2) \
118
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
119
-{ \
120
- int i; \
121
- VReg *Vd = (VReg *)vd; \
122
- VReg *Vj = (VReg *)vj; \
123
- VReg *Vk = (VReg *)vk; \
124
- \
125
- for (i = 0; i < LSX_LEN/BIT; i++) { \
126
- Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
127
- } \
128
- Vd->D(1) = 0; \
129
+#define VSRARN(NAME, BIT, E1, E2, E3) \
130
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
131
+{ \
132
+ int i, j, ofs; \
133
+ VReg *Vd = (VReg *)vd; \
134
+ VReg *Vj = (VReg *)vj; \
135
+ VReg *Vk = (VReg *)vk; \
136
+ int oprsz = simd_oprsz(desc); \
137
+ \
138
+ ofs = LSX_LEN / BIT; \
139
+ for (i = 0; i < oprsz / 16; i++) { \
140
+ for (j = 0; j < ofs; j++) { \
141
+ Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \
142
+ Vk->E3(j + ofs * i) % BIT); \
143
+ } \
144
+ Vd->D(2 * i + 1) = 0; \
145
+ } \
146
}
147
148
-VSRARN(vsrarn_b_h, 16, uint8_t, B, H)
149
-VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
150
-VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
151
-
152
-#define VSRLRNI(NAME, BIT, E1, E2) \
153
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
154
-{ \
155
- int i, max; \
156
- VReg temp; \
157
- VReg *Vd = (VReg *)vd; \
158
- VReg *Vj = (VReg *)vj; \
159
- \
160
- temp.D(0) = 0; \
161
- temp.D(1) = 0; \
162
- max = LSX_LEN/BIT; \
163
- for (i = 0; i < max; i++) { \
164
- temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
165
- temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
166
- } \
167
- *Vd = temp; \
168
+VSRARN(vsrarn_b_h, 16, B, H, UH)
169
+VSRARN(vsrarn_h_w, 32, H, W, UW)
170
+VSRARN(vsrarn_w_d, 64, W, D, UD)
171
+
172
+#define VSRLRNI(NAME, BIT, E1, E2) \
173
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
174
+{ \
175
+ int i, j, ofs; \
176
+ VReg temp = {}; \
177
+ VReg *Vd = (VReg *)vd; \
178
+ VReg *Vj = (VReg *)vj; \
179
+ int oprsz = simd_oprsz(desc); \
180
+ \
181
+ ofs = LSX_LEN / BIT; \
182
+ for (i = 0; i < oprsz / 16; i++) { \
183
+ for (j = 0; j < ofs; j++) { \
184
+ temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
185
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
186
+ imm); \
187
+ } \
188
+ } \
189
+ *Vd = temp; \
190
}
191
192
void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
193
{
194
- VReg temp;
195
+ int i;
196
+ VReg temp = {};
197
VReg *Vd = (VReg *)vd;
198
VReg *Vj = (VReg *)vj;
199
- Int128 r1, r2;
200
-
201
- if (imm == 0) {
202
- temp.D(0) = int128_getlo(Vj->Q(0));
203
- temp.D(1) = int128_getlo(Vd->Q(0));
204
- } else {
205
- r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
206
- r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
207
+ Int128 r[4];
208
+ int oprsz = simd_oprsz(desc);
209
210
- temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
211
- temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
212
+ for (i = 0; i < oprsz / 16; i++) {
213
+ if (imm == 0) {
214
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
215
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
216
+ } else {
217
+ r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)),
218
+ int128_one());
219
+ r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)),
220
+ int128_one());
221
+ temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i),
222
+ imm), r[2 * i]));
223
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i),
224
+ imm), r[ 2 * i + 1]));
225
+ }
226
}
227
*Vd = temp;
228
}
229
@@ -XXX,XX +XXX,XX @@ VSRLRNI(vsrlrni_b_h, 16, B, H)
230
VSRLRNI(vsrlrni_h_w, 32, H, W)
231
VSRLRNI(vsrlrni_w_d, 64, W, D)
232
233
-#define VSRARNI(NAME, BIT, E1, E2) \
234
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
235
-{ \
236
- int i, max; \
237
- VReg temp; \
238
- VReg *Vd = (VReg *)vd; \
239
- VReg *Vj = (VReg *)vj; \
240
- \
241
- temp.D(0) = 0; \
242
- temp.D(1) = 0; \
243
- max = LSX_LEN/BIT; \
244
- for (i = 0; i < max; i++) { \
245
- temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
246
- temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
247
- } \
248
- *Vd = temp; \
249
+#define VSRARNI(NAME, BIT, E1, E2) \
250
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
251
+{ \
252
+ int i, j, ofs; \
253
+ VReg temp = {}; \
254
+ VReg *Vd = (VReg *)vd; \
255
+ VReg *Vj = (VReg *)vj; \
256
+ int oprsz = simd_oprsz(desc); \
257
+ \
258
+ ofs = LSX_LEN / BIT; \
259
+ for (i = 0; i < oprsz / 16; i++) { \
260
+ for (j = 0; j < ofs; j++) { \
261
+ temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
262
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
263
+ imm); \
264
+ } \
265
+ } \
266
+ *Vd = temp; \
267
}
268
269
void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
270
{
271
- VReg temp;
272
+ int i;
273
+ VReg temp = {};
274
VReg *Vd = (VReg *)vd;
275
VReg *Vj = (VReg *)vj;
276
- Int128 r1, r2;
277
-
278
- if (imm == 0) {
279
- temp.D(0) = int128_getlo(Vj->Q(0));
280
- temp.D(1) = int128_getlo(Vd->Q(0));
281
- } else {
282
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
283
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
284
+ Int128 r[4];
285
+ int oprsz = simd_oprsz(desc);
286
287
- temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
288
- temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
289
+ for (i = 0; i < oprsz / 16; i++) {
290
+ if (imm == 0) {
291
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
292
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
293
+ } else {
294
+ r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)),
295
+ int128_one());
296
+ r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)),
297
+ int128_one());
298
+ temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i),
299
+ imm), r[2 * i]));
300
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i),
301
+ imm), r[2 * i + 1]));
302
+ }
303
}
304
*Vd = temp;
305
}
306
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
307
index XXXXXXX..XXXXXXX 100644
308
--- a/target/loongarch/insn_trans/trans_vec.c.inc
309
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
310
@@ -XXX,XX +XXX,XX @@ TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
311
TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
312
TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
313
TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
314
+TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
315
+TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
316
+TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
317
+TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
318
+TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
319
+TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
320
321
TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
322
TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
323
@@ -XXX,XX +XXX,XX @@ TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
324
TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
325
TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
326
TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
327
+TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
328
+TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
329
+TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
330
+TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
331
+TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
332
+TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
333
+TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
334
+TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
335
336
TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
337
TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
338
--
339
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSSRLN.{B.H/H.W/W.D};
3
- XVSSRAN.{B.H/H.W/W.D};
4
- XVSSRLN.{BU.H/HU.W/WU.D};
5
- XVSSRAN.{BU.H/HU.W/WU.D};
6
- XVSSRLNI.{B.H/H.W/W.D/D.Q};
7
- XVSSRANI.{B.H/H.W/W.D/D.Q};
8
- XVSSRLNI.{BU.H/HU.W/WU.D/DU.Q};
9
- XVSSRANI.{BU.H/HU.W/WU.D/DU.Q}.
1
10
11
Signed-off-by: Song Gao <gaosong@loongson.cn>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-Id: <20230914022645.1151356-41-gaosong@loongson.cn>
14
---
15
target/loongarch/insns.decode | 30 ++
16
target/loongarch/disas.c | 30 ++
17
target/loongarch/vec_helper.c | 456 ++++++++++++--------
18
target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
19
4 files changed, 353 insertions(+), 191 deletions(-)
20
21
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/loongarch/insns.decode
24
+++ b/target/loongarch/insns.decode
25
@@ -XXX,XX +XXX,XX @@ xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5
26
xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6
27
xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7
28
29
+xvssrln_b_h 0111 01001111 11001 ..... ..... ..... @vvv
30
+xvssrln_h_w 0111 01001111 11010 ..... ..... ..... @vvv
31
+xvssrln_w_d 0111 01001111 11011 ..... ..... ..... @vvv
32
+xvssran_b_h 0111 01001111 11101 ..... ..... ..... @vvv
33
+xvssran_h_w 0111 01001111 11110 ..... ..... ..... @vvv
34
+xvssran_w_d 0111 01001111 11111 ..... ..... ..... @vvv
35
+xvssrln_bu_h 0111 01010000 01001 ..... ..... ..... @vvv
36
+xvssrln_hu_w 0111 01010000 01010 ..... ..... ..... @vvv
37
+xvssrln_wu_d 0111 01010000 01011 ..... ..... ..... @vvv
38
+xvssran_bu_h 0111 01010000 01101 ..... ..... ..... @vvv
39
+xvssran_hu_w 0111 01010000 01110 ..... ..... ..... @vvv
40
+xvssran_wu_d 0111 01010000 01111 ..... ..... ..... @vvv
41
+
42
+xvssrlni_b_h 0111 01110100 10000 1 .... ..... ..... @vv_ui4
43
+xvssrlni_h_w 0111 01110100 10001 ..... ..... ..... @vv_ui5
44
+xvssrlni_w_d 0111 01110100 1001 ...... ..... ..... @vv_ui6
45
+xvssrlni_d_q 0111 01110100 101 ....... ..... ..... @vv_ui7
46
+xvssrani_b_h 0111 01110110 00000 1 .... ..... ..... @vv_ui4
47
+xvssrani_h_w 0111 01110110 00001 ..... ..... ..... @vv_ui5
48
+xvssrani_w_d 0111 01110110 0001 ...... ..... ..... @vv_ui6
49
+xvssrani_d_q 0111 01110110 001 ....... ..... ..... @vv_ui7
50
+xvssrlni_bu_h 0111 01110100 11000 1 .... ..... ..... @vv_ui4
51
+xvssrlni_hu_w 0111 01110100 11001 ..... ..... ..... @vv_ui5
52
+xvssrlni_wu_d 0111 01110100 1101 ...... ..... ..... @vv_ui6
53
+xvssrlni_du_q 0111 01110100 111 ....... ..... ..... @vv_ui7
54
+xvssrani_bu_h 0111 01110110 01000 1 .... ..... ..... @vv_ui4
55
+xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5
56
+xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6
57
+xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7
58
+
59
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
60
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
61
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
62
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/disas.c
65
+++ b/target/loongarch/disas.c
66
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrarni_h_w, vv_i)
67
INSN_LASX(xvsrarni_w_d, vv_i)
68
INSN_LASX(xvsrarni_d_q, vv_i)
69
70
+INSN_LASX(xvssrln_b_h, vvv)
71
+INSN_LASX(xvssrln_h_w, vvv)
72
+INSN_LASX(xvssrln_w_d, vvv)
73
+INSN_LASX(xvssran_b_h, vvv)
74
+INSN_LASX(xvssran_h_w, vvv)
75
+INSN_LASX(xvssran_w_d, vvv)
76
+INSN_LASX(xvssrln_bu_h, vvv)
77
+INSN_LASX(xvssrln_hu_w, vvv)
78
+INSN_LASX(xvssrln_wu_d, vvv)
79
+INSN_LASX(xvssran_bu_h, vvv)
80
+INSN_LASX(xvssran_hu_w, vvv)
81
+INSN_LASX(xvssran_wu_d, vvv)
82
+
83
+INSN_LASX(xvssrlni_b_h, vv_i)
84
+INSN_LASX(xvssrlni_h_w, vv_i)
85
+INSN_LASX(xvssrlni_w_d, vv_i)
86
+INSN_LASX(xvssrlni_d_q, vv_i)
87
+INSN_LASX(xvssrani_b_h, vv_i)
88
+INSN_LASX(xvssrani_h_w, vv_i)
89
+INSN_LASX(xvssrani_w_d, vv_i)
90
+INSN_LASX(xvssrani_d_q, vv_i)
91
+INSN_LASX(xvssrlni_bu_h, vv_i)
92
+INSN_LASX(xvssrlni_hu_w, vv_i)
93
+INSN_LASX(xvssrlni_wu_d, vv_i)
94
+INSN_LASX(xvssrlni_du_q, vv_i)
95
+INSN_LASX(xvssrani_bu_h, vv_i)
96
+INSN_LASX(xvssrani_hu_w, vv_i)
97
+INSN_LASX(xvssrani_wu_d, vv_i)
98
+INSN_LASX(xvssrani_du_q, vv_i)
99
+
100
INSN_LASX(xvreplgr2vr_b, vr)
101
INSN_LASX(xvreplgr2vr_h, vr)
102
INSN_LASX(xvreplgr2vr_w, vr)
103
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/loongarch/vec_helper.c
106
+++ b/target/loongarch/vec_helper.c
107
@@ -XXX,XX +XXX,XX @@ SSRLNS(B, uint16_t, int16_t, uint8_t)
108
SSRLNS(H, uint32_t, int32_t, uint16_t)
109
SSRLNS(W, uint64_t, int64_t, uint32_t)
110
111
-#define VSSRLN(NAME, BIT, T, E1, E2) \
112
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
113
-{ \
114
- int i; \
115
- VReg *Vd = (VReg *)vd; \
116
- VReg *Vj = (VReg *)vj; \
117
- VReg *Vk = (VReg *)vk; \
118
- \
119
- for (i = 0; i < LSX_LEN/BIT; i++) { \
120
- Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
121
- } \
122
- Vd->D(1) = 0; \
123
+#define VSSRLN(NAME, BIT, E1, E2, E3) \
124
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
125
+{ \
126
+ int i, j, ofs; \
127
+ VReg *Vd = (VReg *)vd; \
128
+ VReg *Vj = (VReg *)vj; \
129
+ VReg *Vk = (VReg *)vk; \
130
+ int oprsz = simd_oprsz(desc); \
131
+ \
132
+ ofs = LSX_LEN / BIT; \
133
+ for (i = 0; i < oprsz / 16; i++) { \
134
+ for (j = 0; j < ofs; j++) { \
135
+ Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
136
+ Vk->E3(j + ofs * i) % BIT, \
137
+ BIT / 2 - 1); \
138
+ } \
139
+ Vd->D(2 * i + 1) = 0; \
140
+ } \
141
}
142
143
-VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
144
-VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
145
-VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
146
+VSSRLN(vssrln_b_h, 16, B, H, UH)
147
+VSSRLN(vssrln_h_w, 32, H, W, UW)
148
+VSSRLN(vssrln_w_d, 64, W, D, UD)
149
150
#define SSRANS(E, T1, T2) \
151
static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
152
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
153
shft_res = e2 >> sa; \
154
} \
155
T2 mask; \
156
- mask = (1ll << sh) -1; \
157
+ mask = (1ll << sh) - 1; \
158
if (shft_res > mask) { \
159
return mask; \
160
- } else if (shft_res < -(mask +1)) { \
161
+ } else if (shft_res < -(mask + 1)) { \
162
return ~mask; \
163
} else { \
164
return shft_res; \
165
@@ -XXX,XX +XXX,XX @@ SSRANS(B, int16_t, int8_t)
166
SSRANS(H, int32_t, int16_t)
167
SSRANS(W, int64_t, int32_t)
168
169
-#define VSSRAN(NAME, BIT, T, E1, E2) \
170
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
171
-{ \
172
- int i; \
173
- VReg *Vd = (VReg *)vd; \
174
- VReg *Vj = (VReg *)vj; \
175
- VReg *Vk = (VReg *)vk; \
176
- \
177
- for (i = 0; i < LSX_LEN/BIT; i++) { \
178
- Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
179
- } \
180
- Vd->D(1) = 0; \
181
+#define VSSRAN(NAME, BIT, E1, E2, E3) \
182
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
183
+{ \
184
+ int i, j, ofs; \
185
+ VReg *Vd = (VReg *)vd; \
186
+ VReg *Vj = (VReg *)vj; \
187
+ VReg *Vk = (VReg *)vk; \
188
+ int oprsz = simd_oprsz(desc); \
189
+ \
190
+ ofs = LSX_LEN / BIT; \
191
+ for (i = 0; i < oprsz / 16; i++) { \
192
+ for (j = 0; j < ofs; j++) { \
193
+ Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
194
+ Vk->E3(j + ofs * i) % BIT, \
195
+ BIT / 2 - 1); \
196
+ } \
197
+ Vd->D(2 * i + 1) = 0; \
198
+ } \
199
}
200
201
-VSSRAN(vssran_b_h, 16, uint16_t, B, H)
202
-VSSRAN(vssran_h_w, 32, uint32_t, H, W)
203
-VSSRAN(vssran_w_d, 64, uint64_t, W, D)
204
+VSSRAN(vssran_b_h, 16, B, H, UH)
205
+VSSRAN(vssran_h_w, 32, H, W, UW)
206
+VSSRAN(vssran_w_d, 64, W, D, UD)
207
208
#define SSRLNU(E, T1, T2, T3) \
209
static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
210
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
211
shft_res = (((T1)e2) >> sa); \
212
} \
213
T2 mask; \
214
- mask = (1ull << sh) -1; \
215
+ mask = (1ull << sh) - 1; \
216
if (shft_res > mask) { \
217
return mask; \
218
} else { \
219
@@ -XXX,XX +XXX,XX @@ SSRLNU(B, uint16_t, uint8_t, int16_t)
220
SSRLNU(H, uint32_t, uint16_t, int32_t)
221
SSRLNU(W, uint64_t, uint32_t, int64_t)
222
223
-#define VSSRLNU(NAME, BIT, T, E1, E2) \
224
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
225
-{ \
226
- int i; \
227
- VReg *Vd = (VReg *)vd; \
228
- VReg *Vj = (VReg *)vj; \
229
- VReg *Vk = (VReg *)vk; \
230
- \
231
- for (i = 0; i < LSX_LEN/BIT; i++) { \
232
- Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
233
- } \
234
- Vd->D(1) = 0; \
235
+#define VSSRLNU(NAME, BIT, E1, E2, E3) \
236
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
237
+{ \
238
+ int i, j, ofs; \
239
+ VReg *Vd = (VReg *)vd; \
240
+ VReg *Vj = (VReg *)vj; \
241
+ VReg *Vk = (VReg *)vk; \
242
+ int oprsz = simd_oprsz(desc); \
243
+ \
244
+ ofs = LSX_LEN / BIT; \
245
+ for (i = 0; i < oprsz / 16; i++) { \
246
+ for (j = 0; j < ofs; j++) { \
247
+ Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
248
+ Vk->E3(j + ofs * i) % BIT, \
249
+ BIT / 2); \
250
+ } \
251
+ Vd->D(2 * i + 1) = 0; \
252
+ } \
253
}
254
255
-VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
256
-VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
257
-VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
258
+VSSRLNU(vssrln_bu_h, 16, B, H, UH)
259
+VSSRLNU(vssrln_hu_w, 32, H, W, UW)
260
+VSSRLNU(vssrln_wu_d, 64, W, D, UD)
261
262
#define SSRANU(E, T1, T2, T3) \
263
static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
264
@@ -XXX,XX +XXX,XX @@ static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
265
shft_res = 0; \
266
} \
267
T2 mask; \
268
- mask = (1ull << sh) -1; \
269
+ mask = (1ull << sh) - 1; \
270
if (shft_res > mask) { \
271
return mask; \
272
} else { \
273
@@ -XXX,XX +XXX,XX @@ SSRANU(B, uint16_t, uint8_t, int16_t)
274
SSRANU(H, uint32_t, uint16_t, int32_t)
275
SSRANU(W, uint64_t, uint32_t, int64_t)
276
277
-#define VSSRANU(NAME, BIT, T, E1, E2) \
278
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
279
-{ \
280
- int i; \
281
- VReg *Vd = (VReg *)vd; \
282
- VReg *Vj = (VReg *)vj; \
283
- VReg *Vk = (VReg *)vk; \
284
- \
285
- for (i = 0; i < LSX_LEN/BIT; i++) { \
286
- Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
287
- } \
288
- Vd->D(1) = 0; \
289
-}
290
-
291
-VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
292
-VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
293
-VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
294
-
295
-#define VSSRLNI(NAME, BIT, E1, E2) \
296
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
297
-{ \
298
- int i; \
299
- VReg temp; \
300
- VReg *Vd = (VReg *)vd; \
301
- VReg *Vj = (VReg *)vj; \
302
- \
303
- for (i = 0; i < LSX_LEN/BIT; i++) { \
304
- temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
305
- temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
306
- } \
307
- *Vd = temp; \
308
+#define VSSRANU(NAME, BIT, E1, E2, E3) \
309
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
310
+{ \
311
+ int i, j, ofs; \
312
+ VReg *Vd = (VReg *)vd; \
313
+ VReg *Vj = (VReg *)vj; \
314
+ VReg *Vk = (VReg *)vk; \
315
+ int oprsz = simd_oprsz(desc); \
316
+ \
317
+ ofs = LSX_LEN / BIT; \
318
+ for (i = 0; i < oprsz / 16; i++) { \
319
+ for (j = 0; j < ofs; j++) { \
320
+ Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
321
+ Vk->E3(j + ofs * i) % BIT, \
322
+ BIT / 2); \
323
+ } \
324
+ Vd->D(2 * i + 1) = 0; \
325
+ } \
326
}
327
328
-void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
329
-{
330
- Int128 shft_res1, shft_res2, mask;
331
- VReg *Vd = (VReg *)vd;
332
- VReg *Vj = (VReg *)vj;
333
+VSSRANU(vssran_bu_h, 16, B, H, UH)
334
+VSSRANU(vssran_hu_w, 32, H, W, UW)
335
+VSSRANU(vssran_wu_d, 64, W, D, UD)
336
+
337
+#define VSSRLNI(NAME, BIT, E1, E2) \
338
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
339
+{ \
340
+ int i, j, ofs; \
341
+ VReg temp = {}; \
342
+ VReg *Vd = (VReg *)vd; \
343
+ VReg *Vj = (VReg *)vj; \
344
+ int oprsz = simd_oprsz(desc); \
345
+ \
346
+ ofs = LSX_LEN / BIT; \
347
+ for (i = 0; i < oprsz / 16; i++) { \
348
+ for (j = 0; j < ofs; j++) { \
349
+ temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
350
+ imm, BIT / 2 - 1); \
351
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
352
+ imm, BIT / 2 - 1); \
353
+ } \
354
+ } \
355
+ *Vd = temp; \
356
+}
357
+
358
+static void do_vssrlni_q(VReg *Vd, VReg *Vj,
359
+ uint64_t imm, int idx, Int128 mask)
360
+{
361
+ Int128 shft_res1, shft_res2;
362
363
if (imm == 0) {
364
- shft_res1 = Vj->Q(0);
365
- shft_res2 = Vd->Q(0);
366
+ shft_res1 = Vj->Q(idx);
367
+ shft_res2 = Vd->Q(idx);
368
} else {
369
- shft_res1 = int128_urshift(Vj->Q(0), imm);
370
- shft_res2 = int128_urshift(Vd->Q(0), imm);
371
+ shft_res1 = int128_urshift(Vj->Q(idx), imm);
372
+ shft_res2 = int128_urshift(Vd->Q(idx), imm);
373
}
374
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
375
376
if (int128_ult(mask, shft_res1)) {
377
- Vd->D(0) = int128_getlo(mask);
378
+ Vd->D(idx * 2) = int128_getlo(mask);
379
}else {
380
- Vd->D(0) = int128_getlo(shft_res1);
381
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
382
}
383
384
if (int128_ult(mask, shft_res2)) {
385
- Vd->D(1) = int128_getlo(mask);
386
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
387
}else {
388
- Vd->D(1) = int128_getlo(shft_res2);
389
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
390
+ }
391
+}
392
+
393
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
394
+{
395
+ int i;
396
+ Int128 mask;
397
+ VReg *Vd = (VReg *)vd;
398
+ VReg *Vj = (VReg *)vj;
399
+ int oprsz = simd_oprsz(desc);
400
+
401
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
402
+
403
+ for (i = 0; i < oprsz / 16; i++) {
404
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
405
}
406
}
407
408
@@ -XXX,XX +XXX,XX @@ VSSRLNI(vssrlni_b_h, 16, B, H)
409
VSSRLNI(vssrlni_h_w, 32, H, W)
410
VSSRLNI(vssrlni_w_d, 64, W, D)
411
412
-#define VSSRANI(NAME, BIT, E1, E2) \
413
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
414
-{ \
415
- int i; \
416
- VReg temp; \
417
- VReg *Vd = (VReg *)vd; \
418
- VReg *Vj = (VReg *)vj; \
419
- \
420
- for (i = 0; i < LSX_LEN/BIT; i++) { \
421
- temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
422
- temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
423
- } \
424
- *Vd = temp; \
425
-}
426
-
427
-void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
428
-{
429
- Int128 shft_res1, shft_res2, mask, min;
430
- VReg *Vd = (VReg *)vd;
431
- VReg *Vj = (VReg *)vj;
432
+#define VSSRANI(NAME, BIT, E1, E2) \
433
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
434
+{ \
435
+ int i, j, ofs; \
436
+ VReg temp = {}; \
437
+ VReg *Vd = (VReg *)vd; \
438
+ VReg *Vj = (VReg *)vj; \
439
+ int oprsz = simd_oprsz(desc); \
440
+ \
441
+ ofs = LSX_LEN / BIT; \
442
+ for (i = 0; i < oprsz / 16; i++) { \
443
+ for (j = 0; j < ofs; j++) { \
444
+ temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
445
+ imm, BIT / 2 - 1); \
446
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
447
+ imm, BIT / 2 - 1); \
448
+ } \
449
+ } \
450
+ *Vd = temp; \
451
+}
452
+
453
+static void do_vssrani_d_q(VReg *Vd, VReg *Vj,
454
+ uint64_t imm, int idx, Int128 mask, Int128 min)
455
+{
456
+ Int128 shft_res1, shft_res2;
457
458
if (imm == 0) {
459
- shft_res1 = Vj->Q(0);
460
- shft_res2 = Vd->Q(0);
461
+ shft_res1 = Vj->Q(idx);
462
+ shft_res2 = Vd->Q(idx);
463
} else {
464
- shft_res1 = int128_rshift(Vj->Q(0), imm);
465
- shft_res2 = int128_rshift(Vd->Q(0), imm);
466
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
467
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
468
}
469
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
470
- min = int128_lshift(int128_one(), 63);
471
472
- if (int128_gt(shft_res1, mask)) {
473
- Vd->D(0) = int128_getlo(mask);
474
+ if (int128_gt(shft_res1, mask)) {
475
+ Vd->D(idx * 2) = int128_getlo(mask);
476
} else if (int128_lt(shft_res1, int128_neg(min))) {
477
- Vd->D(0) = int128_getlo(min);
478
+ Vd->D(idx * 2) = int128_getlo(min);
479
} else {
480
- Vd->D(0) = int128_getlo(shft_res1);
481
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
482
}
483
484
if (int128_gt(shft_res2, mask)) {
485
- Vd->D(1) = int128_getlo(mask);
486
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
487
} else if (int128_lt(shft_res2, int128_neg(min))) {
488
- Vd->D(1) = int128_getlo(min);
489
+ Vd->D(idx * 2 + 1) = int128_getlo(min);
490
} else {
491
- Vd->D(1) = int128_getlo(shft_res2);
492
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
493
+ }
494
+}
495
+
496
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
497
+{
498
+ int i;
499
+ Int128 mask, min;
500
+ VReg *Vd = (VReg *)vd;
501
+ VReg *Vj = (VReg *)vj;
502
+ int oprsz = simd_oprsz(desc);
503
+
504
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
505
+ min = int128_lshift(int128_one(), 63);
506
+
507
+ for (i = 0; i < oprsz / 16; i++) {
508
+ do_vssrani_d_q(Vd, Vj, imm, i, mask, min);
509
}
510
}
511
512
+
513
VSSRANI(vssrani_b_h, 16, B, H)
514
VSSRANI(vssrani_h_w, 32, H, W)
515
VSSRANI(vssrani_w_d, 64, W, D)
516
517
-#define VSSRLNUI(NAME, BIT, E1, E2) \
518
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
519
-{ \
520
- int i; \
521
- VReg temp; \
522
- VReg *Vd = (VReg *)vd; \
523
- VReg *Vj = (VReg *)vj; \
524
- \
525
- for (i = 0; i < LSX_LEN/BIT; i++) { \
526
- temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
527
- temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
528
- } \
529
- *Vd = temp; \
530
+#define VSSRLNUI(NAME, BIT, E1, E2) \
531
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
532
+{ \
533
+ int i, j, ofs; \
534
+ VReg temp = {}; \
535
+ VReg *Vd = (VReg *)vd; \
536
+ VReg *Vj = (VReg *)vj; \
537
+ int oprsz = simd_oprsz(desc); \
538
+ \
539
+ ofs = LSX_LEN / BIT; \
540
+ for (i = 0; i < oprsz / 16; i++) { \
541
+ for (j = 0; j < ofs; j++) { \
542
+ temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
543
+ imm, BIT / 2); \
544
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
545
+ imm, BIT / 2); \
546
+ } \
547
+ } \
548
+ *Vd = temp; \
549
}
550
551
void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
552
{
553
- Int128 shft_res1, shft_res2, mask;
554
+ int i;
555
+ Int128 mask;
556
VReg *Vd = (VReg *)vd;
557
VReg *Vj = (VReg *)vj;
558
+ int oprsz = simd_oprsz(desc);
559
560
- if (imm == 0) {
561
- shft_res1 = Vj->Q(0);
562
- shft_res2 = Vd->Q(0);
563
- } else {
564
- shft_res1 = int128_urshift(Vj->Q(0), imm);
565
- shft_res2 = int128_urshift(Vd->Q(0), imm);
566
- }
567
mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
568
569
- if (int128_ult(mask, shft_res1)) {
570
- Vd->D(0) = int128_getlo(mask);
571
- }else {
572
- Vd->D(0) = int128_getlo(shft_res1);
573
- }
574
-
575
- if (int128_ult(mask, shft_res2)) {
576
- Vd->D(1) = int128_getlo(mask);
577
- }else {
578
- Vd->D(1) = int128_getlo(shft_res2);
579
+ for (i = 0; i < oprsz / 16; i++) {
580
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
581
}
582
}
583
584
@@ -XXX,XX +XXX,XX @@ VSSRLNUI(vssrlni_bu_h, 16, B, H)
585
VSSRLNUI(vssrlni_hu_w, 32, H, W)
586
VSSRLNUI(vssrlni_wu_d, 64, W, D)
587
588
-#define VSSRANUI(NAME, BIT, E1, E2) \
589
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
590
-{ \
591
- int i; \
592
- VReg temp; \
593
- VReg *Vd = (VReg *)vd; \
594
- VReg *Vj = (VReg *)vj; \
595
- \
596
- for (i = 0; i < LSX_LEN/BIT; i++) { \
597
- temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
598
- temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
599
- } \
600
- *Vd = temp; \
601
-}
602
-
603
-void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
604
-{
605
- Int128 shft_res1, shft_res2, mask;
606
- VReg *Vd = (VReg *)vd;
607
- VReg *Vj = (VReg *)vj;
608
+#define VSSRANUI(NAME, BIT, E1, E2) \
609
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
610
+{ \
611
+ int i, j, ofs; \
612
+ VReg temp = {}; \
613
+ VReg *Vd = (VReg *)vd; \
614
+ VReg *Vj = (VReg *)vj; \
615
+ int oprsz = simd_oprsz(desc); \
616
+ \
617
+ ofs = LSX_LEN / BIT; \
618
+ for (i = 0; i < oprsz / 16; i++) { \
619
+ for (j = 0; j < ofs; j++) { \
620
+ temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
621
+ imm, BIT / 2); \
622
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
623
+ imm, BIT / 2); \
624
+ } \
625
+ } \
626
+ *Vd = temp; \
627
+}
628
+
629
+static void do_vssrani_du_q(VReg *Vd, VReg *Vj,
630
+ uint64_t imm, int idx, Int128 mask)
631
+{
632
+ Int128 shft_res1, shft_res2;
633
634
if (imm == 0) {
635
- shft_res1 = Vj->Q(0);
636
- shft_res2 = Vd->Q(0);
637
+ shft_res1 = Vj->Q(idx);
638
+ shft_res2 = Vd->Q(idx);
639
} else {
640
- shft_res1 = int128_rshift(Vj->Q(0), imm);
641
- shft_res2 = int128_rshift(Vd->Q(0), imm);
642
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
643
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
644
}
645
646
- if (int128_lt(Vj->Q(0), int128_zero())) {
647
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
648
shft_res1 = int128_zero();
649
}
650
651
- if (int128_lt(Vd->Q(0), int128_zero())) {
652
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
653
shft_res2 = int128_zero();
654
}
655
-
656
- mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
657
-
658
if (int128_ult(mask, shft_res1)) {
659
- Vd->D(0) = int128_getlo(mask);
660
+ Vd->D(idx * 2) = int128_getlo(mask);
661
}else {
662
- Vd->D(0) = int128_getlo(shft_res1);
663
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
664
}
665
666
if (int128_ult(mask, shft_res2)) {
667
- Vd->D(1) = int128_getlo(mask);
668
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
669
}else {
670
- Vd->D(1) = int128_getlo(shft_res2);
671
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
672
+ }
673
+
674
+}
675
+
676
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
677
+{
678
+ int i;
679
+ Int128 mask;
680
+ VReg *Vd = (VReg *)vd;
681
+ VReg *Vj = (VReg *)vj;
682
+ int oprsz = simd_oprsz(desc);
683
+
684
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
685
+
686
+ for (i = 0; i < oprsz / 16; i++) {
687
+ do_vssrani_du_q(Vd, Vj, imm, i, mask);
688
}
689
}
690
691
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
692
index XXXXXXX..XXXXXXX 100644
693
--- a/target/loongarch/insn_trans/trans_vec.c.inc
694
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
695
@@ -XXX,XX +XXX,XX @@ TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
696
TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
697
TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
698
TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
699
+TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
700
+TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
701
+TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
702
+TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
703
+TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
704
+TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
705
+TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
706
+TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
707
+TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
708
+TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
709
+TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
710
+TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
711
712
TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
713
TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
714
@@ -XXX,XX +XXX,XX @@ TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
715
TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
716
TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
717
TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
718
+TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
719
+TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
720
+TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
721
+TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
722
+TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
723
+TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
724
+TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
725
+TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
726
+TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
727
+TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
728
+TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
729
+TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
730
+TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
731
+TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
732
+TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
733
+TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
734
735
TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
736
TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
737
--
738
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSSRLRN.{B.H/H.W/W.D};
3
- XVSSRARN.{B.H/H.W/W.D};
4
- XVSSRLRN.{BU.H/HU.W/WU.D};
5
- XVSSRARN.{BU.H/HU.W/WU.D};
6
- XVSSRLRNI.{B.H/H.W/W.D/D.Q};
7
- XVSSRARNI.{B.H/H.W/W.D/D.Q};
8
- XVSSRLRNI.{BU.H/HU.W/WU.D/DU.Q};
9
- XVSSRARNI.{BU.H/HU.W/WU.D/DU.Q}.
1
10
11
Signed-off-by: Song Gao <gaosong@loongson.cn>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-Id: <20230914022645.1151356-42-gaosong@loongson.cn>
14
---
15
target/loongarch/insns.decode | 30 ++
16
target/loongarch/disas.c | 30 ++
17
target/loongarch/vec_helper.c | 489 ++++++++++++--------
18
target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
19
4 files changed, 378 insertions(+), 199 deletions(-)
20
21
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/loongarch/insns.decode
24
+++ b/target/loongarch/insns.decode
25
@@ -XXX,XX +XXX,XX @@ xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5
26
xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6
27
xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7
28
29
+xvssrlrn_b_h 0111 01010000 00001 ..... ..... ..... @vvv
30
+xvssrlrn_h_w 0111 01010000 00010 ..... ..... ..... @vvv
31
+xvssrlrn_w_d 0111 01010000 00011 ..... ..... ..... @vvv
32
+xvssrarn_b_h 0111 01010000 00101 ..... ..... ..... @vvv
33
+xvssrarn_h_w 0111 01010000 00110 ..... ..... ..... @vvv
34
+xvssrarn_w_d 0111 01010000 00111 ..... ..... ..... @vvv
35
+xvssrlrn_bu_h 0111 01010000 10001 ..... ..... ..... @vvv
36
+xvssrlrn_hu_w 0111 01010000 10010 ..... ..... ..... @vvv
37
+xvssrlrn_wu_d 0111 01010000 10011 ..... ..... ..... @vvv
38
+xvssrarn_bu_h 0111 01010000 10101 ..... ..... ..... @vvv
39
+xvssrarn_hu_w 0111 01010000 10110 ..... ..... ..... @vvv
40
+xvssrarn_wu_d 0111 01010000 10111 ..... ..... ..... @vvv
41
+
42
+xvssrlrni_b_h 0111 01110101 00000 1 .... ..... ..... @vv_ui4
43
+xvssrlrni_h_w 0111 01110101 00001 ..... ..... ..... @vv_ui5
44
+xvssrlrni_w_d 0111 01110101 0001 ...... ..... ..... @vv_ui6
45
+xvssrlrni_d_q 0111 01110101 001 ....... ..... ..... @vv_ui7
46
+xvssrarni_b_h 0111 01110110 10000 1 .... ..... ..... @vv_ui4
47
+xvssrarni_h_w 0111 01110110 10001 ..... ..... ..... @vv_ui5
48
+xvssrarni_w_d 0111 01110110 1001 ...... ..... ..... @vv_ui6
49
+xvssrarni_d_q 0111 01110110 101 ....... ..... ..... @vv_ui7
50
+xvssrlrni_bu_h 0111 01110101 01000 1 .... ..... ..... @vv_ui4
51
+xvssrlrni_hu_w 0111 01110101 01001 ..... ..... ..... @vv_ui5
52
+xvssrlrni_wu_d 0111 01110101 0101 ...... ..... ..... @vv_ui6
53
+xvssrlrni_du_q 0111 01110101 011 ....... ..... ..... @vv_ui7
54
+xvssrarni_bu_h 0111 01110110 11000 1 .... ..... ..... @vv_ui4
55
+xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5
56
+xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6
57
+xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7
58
+
59
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
60
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
61
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
62
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/disas.c
65
+++ b/target/loongarch/disas.c
66
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssrani_hu_w, vv_i)
67
INSN_LASX(xvssrani_wu_d, vv_i)
68
INSN_LASX(xvssrani_du_q, vv_i)
69
70
+INSN_LASX(xvssrlrn_b_h, vvv)
71
+INSN_LASX(xvssrlrn_h_w, vvv)
72
+INSN_LASX(xvssrlrn_w_d, vvv)
73
+INSN_LASX(xvssrarn_b_h, vvv)
74
+INSN_LASX(xvssrarn_h_w, vvv)
75
+INSN_LASX(xvssrarn_w_d, vvv)
76
+INSN_LASX(xvssrlrn_bu_h, vvv)
77
+INSN_LASX(xvssrlrn_hu_w, vvv)
78
+INSN_LASX(xvssrlrn_wu_d, vvv)
79
+INSN_LASX(xvssrarn_bu_h, vvv)
80
+INSN_LASX(xvssrarn_hu_w, vvv)
81
+INSN_LASX(xvssrarn_wu_d, vvv)
82
+
83
+INSN_LASX(xvssrlrni_b_h, vv_i)
84
+INSN_LASX(xvssrlrni_h_w, vv_i)
85
+INSN_LASX(xvssrlrni_w_d, vv_i)
86
+INSN_LASX(xvssrlrni_d_q, vv_i)
87
+INSN_LASX(xvssrlrni_bu_h, vv_i)
88
+INSN_LASX(xvssrlrni_hu_w, vv_i)
89
+INSN_LASX(xvssrlrni_wu_d, vv_i)
90
+INSN_LASX(xvssrlrni_du_q, vv_i)
91
+INSN_LASX(xvssrarni_b_h, vv_i)
92
+INSN_LASX(xvssrarni_h_w, vv_i)
93
+INSN_LASX(xvssrarni_w_d, vv_i)
94
+INSN_LASX(xvssrarni_d_q, vv_i)
95
+INSN_LASX(xvssrarni_bu_h, vv_i)
96
+INSN_LASX(xvssrarni_hu_w, vv_i)
97
+INSN_LASX(xvssrarni_wu_d, vv_i)
98
+INSN_LASX(xvssrarni_du_q, vv_i)
99
+
100
INSN_LASX(xvreplgr2vr_b, vr)
101
INSN_LASX(xvreplgr2vr_h, vr)
102
INSN_LASX(xvreplgr2vr_w, vr)
103
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/target/loongarch/vec_helper.c
106
+++ b/target/loongarch/vec_helper.c
107
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
108
\
109
shft_res = do_vsrlr_ ## E2(e2, sa); \
110
T1 mask; \
111
- mask = (1ull << sh) -1; \
112
+ mask = (1ull << sh) - 1; \
113
if (shft_res > mask) { \
114
return mask; \
115
} else { \
116
@@ -XXX,XX +XXX,XX @@ SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
117
SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
118
SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
119
120
-#define VSSRLRN(NAME, BIT, T, E1, E2) \
121
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
122
-{ \
123
- int i; \
124
- VReg *Vd = (VReg *)vd; \
125
- VReg *Vj = (VReg *)vj; \
126
- VReg *Vk = (VReg *)vk; \
127
- \
128
- for (i = 0; i < LSX_LEN/BIT; i++) { \
129
- Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
130
- } \
131
- Vd->D(1) = 0; \
132
+#define VSSRLRN(NAME, BIT, E1, E2, E3) \
133
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
134
+{ \
135
+ int i, j, ofs; \
136
+ VReg *Vd = (VReg *)vd; \
137
+ VReg *Vj = (VReg *)vj; \
138
+ VReg *Vk = (VReg *)vk; \
139
+ int oprsz = simd_oprsz(desc); \
140
+ \
141
+ ofs = LSX_LEN / BIT; \
142
+ for (i = 0; i < oprsz / 16; i++) { \
143
+ for (j = 0; j < ofs; j++) { \
144
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
145
+ Vk->E3(j + ofs * i) % BIT, \
146
+ BIT / 2 - 1); \
147
+ } \
148
+ Vd->D(2 * i + 1) = 0; \
149
+ } \
150
}
151
152
-VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
153
-VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
154
-VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
155
+VSSRLRN(vssrlrn_b_h, 16, B, H, UH)
156
+VSSRLRN(vssrlrn_h_w, 32, H, W, UW)
157
+VSSRLRN(vssrlrn_w_d, 64, W, D, UD)
158
159
#define SSRARNS(E1, E2, T1, T2) \
160
static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
161
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
162
\
163
shft_res = do_vsrar_ ## E2(e2, sa); \
164
T2 mask; \
165
- mask = (1ll << sh) -1; \
166
+ mask = (1ll << sh) - 1; \
167
if (shft_res > mask) { \
168
return mask; \
169
} else if (shft_res < -(mask +1)) { \
170
@@ -XXX,XX +XXX,XX @@ SSRARNS(B, H, int16_t, int8_t)
171
SSRARNS(H, W, int32_t, int16_t)
172
SSRARNS(W, D, int64_t, int32_t)
173
174
-#define VSSRARN(NAME, BIT, T, E1, E2) \
175
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
176
-{ \
177
- int i; \
178
- VReg *Vd = (VReg *)vd; \
179
- VReg *Vj = (VReg *)vj; \
180
- VReg *Vk = (VReg *)vk; \
181
- \
182
- for (i = 0; i < LSX_LEN/BIT; i++) { \
183
- Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
184
- } \
185
- Vd->D(1) = 0; \
186
+#define VSSRARN(NAME, BIT, E1, E2, E3) \
187
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
188
+{ \
189
+ int i, j, ofs; \
190
+ VReg *Vd = (VReg *)vd; \
191
+ VReg *Vj = (VReg *)vj; \
192
+ VReg *Vk = (VReg *)vk; \
193
+ int oprsz = simd_oprsz(desc); \
194
+ \
195
+ ofs = LSX_LEN / BIT; \
196
+ for (i = 0; i < oprsz / 16; i++) { \
197
+ for (j = 0; j < ofs; j++) { \
198
+ Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
199
+ Vk->E3(j + ofs * i) % BIT, \
200
+ BIT/ 2 - 1); \
201
+ } \
202
+ Vd->D(2 * i + 1) = 0; \
203
+ } \
204
}
205
206
-VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
207
-VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
208
-VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
209
+VSSRARN(vssrarn_b_h, 16, B, H, UH)
210
+VSSRARN(vssrarn_h_w, 32, H, W, UW)
211
+VSSRARN(vssrarn_w_d, 64, W, D, UD)
212
213
#define SSRLRNU(E1, E2, T1, T2, T3) \
214
static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
215
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
216
shft_res = do_vsrlr_ ## E2(e2, sa); \
217
\
218
T2 mask; \
219
- mask = (1ull << sh) -1; \
220
+ mask = (1ull << sh) - 1; \
221
if (shft_res > mask) { \
222
return mask; \
223
} else { \
224
@@ -XXX,XX +XXX,XX @@ SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
225
SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
226
SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
227
228
-#define VSSRLRNU(NAME, BIT, T, E1, E2) \
229
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
230
-{ \
231
- int i; \
232
- VReg *Vd = (VReg *)vd; \
233
- VReg *Vj = (VReg *)vj; \
234
- VReg *Vk = (VReg *)vk; \
235
- \
236
- for (i = 0; i < LSX_LEN/BIT; i++) { \
237
- Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
238
- } \
239
- Vd->D(1) = 0; \
240
+#define VSSRLRNU(NAME, BIT, E1, E2, E3) \
241
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
242
+{ \
243
+ int i, j, ofs; \
244
+ VReg *Vd = (VReg *)vd; \
245
+ VReg *Vj = (VReg *)vj; \
246
+ VReg *Vk = (VReg *)vk; \
247
+ int oprsz = simd_oprsz(desc); \
248
+ \
249
+ ofs = LSX_LEN / BIT; \
250
+ for (i = 0; i < oprsz / 16; i++) { \
251
+ for (j = 0; j < ofs; j++) { \
252
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
253
+ Vk->E3(j + ofs * i) % BIT, \
254
+ BIT / 2); \
255
+ } \
256
+ Vd->D(2 * i + 1) = 0; \
257
+ } \
258
}
259
260
-VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
261
-VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
262
-VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
263
+VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH)
264
+VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW)
265
+VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD)
266
267
#define SSRARNU(E1, E2, T1, T2, T3) \
268
static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
269
@@ -XXX,XX +XXX,XX @@ static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
270
shft_res = do_vsrar_ ## E2(e2, sa); \
271
} \
272
T2 mask; \
273
- mask = (1ull << sh) -1; \
274
+ mask = (1ull << sh) - 1; \
275
if (shft_res > mask) { \
276
return mask; \
277
} else { \
278
@@ -XXX,XX +XXX,XX @@ SSRARNU(B, H, uint16_t, uint8_t, int16_t)
279
SSRARNU(H, W, uint32_t, uint16_t, int32_t)
280
SSRARNU(W, D, uint64_t, uint32_t, int64_t)
281
282
-#define VSSRARNU(NAME, BIT, T, E1, E2) \
283
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
284
-{ \
285
- int i; \
286
- VReg *Vd = (VReg *)vd; \
287
- VReg *Vj = (VReg *)vj; \
288
- VReg *Vk = (VReg *)vk; \
289
- \
290
- for (i = 0; i < LSX_LEN/BIT; i++) { \
291
- Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
292
- } \
293
- Vd->D(1) = 0; \
294
+#define VSSRARNU(NAME, BIT, E1, E2, E3) \
295
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
296
+{ \
297
+ int i, j, ofs; \
298
+ VReg *Vd = (VReg *)vd; \
299
+ VReg *Vj = (VReg *)vj; \
300
+ VReg *Vk = (VReg *)vk; \
301
+ int oprsz = simd_oprsz(desc); \
302
+ \
303
+ ofs = LSX_LEN / BIT; \
304
+ for (i = 0; i < oprsz / 16; i++) { \
305
+ for (j = 0; j < ofs; j++) { \
306
+ Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
307
+ Vk->E3(j + ofs * i) % BIT, \
308
+ BIT / 2); \
309
+ } \
310
+ Vd->D(2 * i + 1) = 0; \
311
+ } \
312
}
313
314
-VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
315
-VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
316
-VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
317
+VSSRARNU(vssrarn_bu_h, 16, B, H, UH)
318
+VSSRARNU(vssrarn_hu_w, 32, H, W, UW)
319
+VSSRARNU(vssrarn_wu_d, 64, W, D, UD)
320
+
321
+#define VSSRLRNI(NAME, BIT, E1, E2) \
322
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
323
+{ \
324
+ int i, j, ofs; \
325
+ VReg temp = {}; \
326
+ VReg *Vd = (VReg *)vd; \
327
+ VReg *Vj = (VReg *)vj; \
328
+ int oprsz = simd_oprsz(desc); \
329
+ \
330
+ ofs = LSX_LEN / BIT; \
331
+ for (i = 0; i < oprsz / 16; i++) { \
332
+ for (j = 0; j < ofs; j++) { \
333
+ temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
334
+ imm, BIT / 2 - 1); \
335
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
336
+ imm, BIT / 2 - 1); \
337
+ } \
338
+ } \
339
+ *Vd = temp; \
340
+}
341
+
342
+static void do_vssrlrni_q(VReg *Vd, VReg * Vj,
343
+ uint64_t imm, int idx, Int128 mask)
344
+{
345
+ Int128 shft_res1, shft_res2, r1, r2;
346
+ if (imm == 0) {
347
+ shft_res1 = Vj->Q(idx);
348
+ shft_res2 = Vd->Q(idx);
349
+ } else {
350
+ r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
351
+ r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
352
+ shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
353
+ shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
354
+ }
355
356
-#define VSSRLRNI(NAME, BIT, E1, E2) \
357
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
358
-{ \
359
- int i; \
360
- VReg temp; \
361
- VReg *Vd = (VReg *)vd; \
362
- VReg *Vj = (VReg *)vj; \
363
- \
364
- for (i = 0; i < LSX_LEN/BIT; i++) { \
365
- temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
366
- temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
367
- } \
368
- *Vd = temp; \
369
+ if (int128_ult(mask, shft_res1)) {
370
+ Vd->D(idx * 2) = int128_getlo(mask);
371
+ }else {
372
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
373
+ }
374
+
375
+ if (int128_ult(mask, shft_res2)) {
376
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
377
+ }else {
378
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
379
+ }
380
}
381
382
-#define VSSRLRNI_Q(NAME, sh) \
383
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
384
-{ \
385
- Int128 shft_res1, shft_res2, mask, r1, r2; \
386
- VReg *Vd = (VReg *)vd; \
387
- VReg *Vj = (VReg *)vj; \
388
- \
389
- if (imm == 0) { \
390
- shft_res1 = Vj->Q(0); \
391
- shft_res2 = Vd->Q(0); \
392
- } else { \
393
- r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
394
- r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
395
- \
396
- shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \
397
- shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \
398
- } \
399
- \
400
- mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \
401
- \
402
- if (int128_ult(mask, shft_res1)) { \
403
- Vd->D(0) = int128_getlo(mask); \
404
- }else { \
405
- Vd->D(0) = int128_getlo(shft_res1); \
406
- } \
407
- \
408
- if (int128_ult(mask, shft_res2)) { \
409
- Vd->D(1) = int128_getlo(mask); \
410
- }else { \
411
- Vd->D(1) = int128_getlo(shft_res2); \
412
- } \
413
+void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
414
+{
415
+ int i;
416
+ Int128 mask;
417
+ VReg *Vd = (VReg *)vd;
418
+ VReg *Vj = (VReg *)vj;
419
+ int oprsz = simd_oprsz(desc);
420
+
421
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
422
+
423
+ for (i = 0; i < oprsz / 16; i++) {
424
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
425
+ }
426
}
427
428
VSSRLRNI(vssrlrni_b_h, 16, B, H)
429
VSSRLRNI(vssrlrni_h_w, 32, H, W)
430
VSSRLRNI(vssrlrni_w_d, 64, W, D)
431
-VSSRLRNI_Q(vssrlrni_d_q, 63)
432
-
433
-#define VSSRARNI(NAME, BIT, E1, E2) \
434
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
435
-{ \
436
- int i; \
437
- VReg temp; \
438
- VReg *Vd = (VReg *)vd; \
439
- VReg *Vj = (VReg *)vj; \
440
- \
441
- for (i = 0; i < LSX_LEN/BIT; i++) { \
442
- temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
443
- temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
444
- } \
445
- *Vd = temp; \
446
-}
447
448
-void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
449
-{
450
- Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
451
- VReg *Vd = (VReg *)vd;
452
- VReg *Vj = (VReg *)vj;
453
+#define VSSRARNI(NAME, BIT, E1, E2) \
454
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
455
+{ \
456
+ int i, j, ofs; \
457
+ VReg temp = {}; \
458
+ VReg *Vd = (VReg *)vd; \
459
+ VReg *Vj = (VReg *)vj; \
460
+ int oprsz = simd_oprsz(desc); \
461
+ \
462
+ ofs = LSX_LEN / BIT; \
463
+ for (i = 0; i < oprsz / 16; i++) { \
464
+ for (j = 0; j < ofs; j++) { \
465
+ temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
466
+ imm, BIT / 2 - 1); \
467
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
468
+ imm, BIT / 2 - 1); \
469
+ } \
470
+ } \
471
+ *Vd = temp; \
472
+}
473
+
474
+static void do_vssrarni_d_q(VReg *Vd, VReg *Vj,
475
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
476
+{
477
+ Int128 shft_res1, shft_res2, r1, r2;
478
479
if (imm == 0) {
480
- shft_res1 = Vj->Q(0);
481
- shft_res2 = Vd->Q(0);
482
+ shft_res1 = Vj->Q(idx);
483
+ shft_res2 = Vd->Q(idx);
484
} else {
485
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
486
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
487
-
488
- shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
489
- shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
490
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
491
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
492
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
493
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
494
}
495
-
496
- mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
497
- mask2 = int128_lshift(int128_one(), 63);
498
-
499
- if (int128_gt(shft_res1, mask1)) {
500
- Vd->D(0) = int128_getlo(mask1);
501
+ if (int128_gt(shft_res1, mask1)) {
502
+ Vd->D(idx * 2) = int128_getlo(mask1);
503
} else if (int128_lt(shft_res1, int128_neg(mask2))) {
504
- Vd->D(0) = int128_getlo(mask2);
505
+ Vd->D(idx * 2) = int128_getlo(mask2);
506
} else {
507
- Vd->D(0) = int128_getlo(shft_res1);
508
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
509
}
510
511
if (int128_gt(shft_res2, mask1)) {
512
- Vd->D(1) = int128_getlo(mask1);
513
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
514
} else if (int128_lt(shft_res2, int128_neg(mask2))) {
515
- Vd->D(1) = int128_getlo(mask2);
516
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
517
} else {
518
- Vd->D(1) = int128_getlo(shft_res2);
519
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
520
+ }
521
+}
522
+
523
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
524
+{
525
+ int i;
526
+ Int128 mask1, mask2;
527
+ VReg *Vd = (VReg *)vd;
528
+ VReg *Vj = (VReg *)vj;
529
+ int oprsz = simd_oprsz(desc);
530
+
531
+ mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
532
+ mask2 = int128_lshift(int128_one(), 63);
533
+
534
+ for (i = 0; i < oprsz / 16; i++) {
535
+ do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2);
536
}
537
}
538
539
@@ -XXX,XX +XXX,XX @@ VSSRARNI(vssrarni_b_h, 16, B, H)
540
VSSRARNI(vssrarni_h_w, 32, H, W)
541
VSSRARNI(vssrarni_w_d, 64, W, D)
542
543
-#define VSSRLRNUI(NAME, BIT, E1, E2) \
544
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
545
-{ \
546
- int i; \
547
- VReg temp; \
548
- VReg *Vd = (VReg *)vd; \
549
- VReg *Vj = (VReg *)vj; \
550
- \
551
- for (i = 0; i < LSX_LEN/BIT; i++) { \
552
- temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \
553
- temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
554
- } \
555
- *Vd = temp; \
556
+#define VSSRLRNUI(NAME, BIT, E1, E2) \
557
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
558
+{ \
559
+ int i, j, ofs; \
560
+ VReg temp = {}; \
561
+ VReg *Vd = (VReg *)vd; \
562
+ VReg *Vj = (VReg *)vj; \
563
+ int oprsz = simd_oprsz(desc); \
564
+ \
565
+ ofs = LSX_LEN / BIT; \
566
+ for (i = 0; i < oprsz / 16; i++) { \
567
+ for (j = 0; j < ofs; j++) { \
568
+ temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
569
+ imm, BIT / 2); \
570
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
571
+ imm, BIT / 2); \
572
+ } \
573
+ } \
574
+ *Vd = temp; \
575
+}
576
+
577
+void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
578
+{
579
+ int i;
580
+ Int128 mask;
581
+ VReg *Vd = (VReg *)vd;
582
+ VReg *Vj = (VReg *)vj;
583
+ int oprsz = simd_oprsz(desc);
584
+
585
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
586
+
587
+ for (i = 0; i < oprsz / 16; i++) {
588
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
589
+ }
590
}
591
592
VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
593
VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
594
VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
595
-VSSRLRNI_Q(vssrlrni_du_q, 64)
596
597
-#define VSSRARNUI(NAME, BIT, E1, E2) \
598
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
599
-{ \
600
- int i; \
601
- VReg temp; \
602
- VReg *Vd = (VReg *)vd; \
603
- VReg *Vj = (VReg *)vj; \
604
- \
605
- for (i = 0; i < LSX_LEN/BIT; i++) { \
606
- temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \
607
- temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
608
- } \
609
- *Vd = temp; \
610
-}
611
-
612
-void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
613
-{
614
- Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
615
- VReg *Vd = (VReg *)vd;
616
- VReg *Vj = (VReg *)vj;
617
+#define VSSRARNUI(NAME, BIT, E1, E2) \
618
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
619
+{ \
620
+ int i, j, ofs; \
621
+ VReg temp = {}; \
622
+ VReg *Vd = (VReg *)vd; \
623
+ VReg *Vj = (VReg *)vj; \
624
+ int oprsz = simd_oprsz(desc); \
625
+ \
626
+ ofs = LSX_LEN / BIT; \
627
+ for (i = 0; i < oprsz / 16; i++) { \
628
+ for (j = 0; j < ofs; j++) { \
629
+ temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
630
+ imm, BIT / 2); \
631
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
632
+ imm, BIT / 2); \
633
+ } \
634
+ } \
635
+ *Vd = temp; \
636
+}
637
+
638
+static void do_vssrarni_du_q(VReg *Vd, VReg *Vj,
639
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
640
+{
641
+ Int128 shft_res1, shft_res2, r1, r2;
642
643
if (imm == 0) {
644
- shft_res1 = Vj->Q(0);
645
- shft_res2 = Vd->Q(0);
646
+ shft_res1 = Vj->Q(idx);
647
+ shft_res2 = Vd->Q(idx);
648
} else {
649
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
650
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
651
-
652
- shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
653
- shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
654
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
655
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
656
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
657
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
658
}
659
660
- if (int128_lt(Vj->Q(0), int128_zero())) {
661
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
662
shft_res1 = int128_zero();
663
}
664
- if (int128_lt(Vd->Q(0), int128_zero())) {
665
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
666
shft_res2 = int128_zero();
667
}
668
669
- mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
670
- mask2 = int128_lshift(int128_one(), 64);
671
-
672
if (int128_gt(shft_res1, mask1)) {
673
- Vd->D(0) = int128_getlo(mask1);
674
+ Vd->D(idx * 2) = int128_getlo(mask1);
675
} else if (int128_lt(shft_res1, int128_neg(mask2))) {
676
- Vd->D(0) = int128_getlo(mask2);
677
+ Vd->D(idx * 2) = int128_getlo(mask2);
678
} else {
679
- Vd->D(0) = int128_getlo(shft_res1);
680
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
681
}
682
683
if (int128_gt(shft_res2, mask1)) {
684
- Vd->D(1) = int128_getlo(mask1);
685
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
686
} else if (int128_lt(shft_res2, int128_neg(mask2))) {
687
- Vd->D(1) = int128_getlo(mask2);
688
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
689
} else {
690
- Vd->D(1) = int128_getlo(shft_res2);
691
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
692
+ }
693
+}
694
+
695
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
696
+{
697
+ int i;
698
+ Int128 mask1, mask2;
699
+ VReg *Vd = (VReg *)vd;
700
+ VReg *Vj = (VReg *)vj;
701
+ int oprsz = simd_oprsz(desc);
702
+
703
+ mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
704
+ mask2 = int128_lshift(int128_one(), 64);
705
+
706
+ for (i = 0; i < oprsz / 16; i++) {
707
+ do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2);
708
}
709
}
710
711
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
712
index XXXXXXX..XXXXXXX 100644
713
--- a/target/loongarch/insn_trans/trans_vec.c.inc
714
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
715
@@ -XXX,XX +XXX,XX @@ TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
716
TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
717
TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
718
TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
719
+TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
720
+TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
721
+TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
722
+TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
723
+TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
724
+TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
725
+TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
726
+TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
727
+TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
728
+TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
729
+TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
730
+TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
731
732
TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
733
TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
734
@@ -XXX,XX +XXX,XX @@ TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
735
TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
736
TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
737
TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
738
+TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
739
+TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
740
+TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
741
+TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
742
+TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
743
+TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
744
+TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
745
+TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
746
+TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
747
+TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
748
+TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
749
+TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
750
+TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
751
+TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
752
+TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
753
+TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
754
755
TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
756
TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
757
--
758
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVCLO.{B/H/W/D};
3
- XVCLZ.{B/H/W/D}.
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-43-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 9 +++++++++
10
target/loongarch/disas.c | 9 +++++++++
11
target/loongarch/vec_helper.c | 3 ++-
12
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++
13
4 files changed, 28 insertions(+), 1 deletion(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5
20
xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6
21
xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7
22
23
+xvclo_b 0111 01101001 11000 00000 ..... ..... @vv
24
+xvclo_h 0111 01101001 11000 00001 ..... ..... @vv
25
+xvclo_w 0111 01101001 11000 00010 ..... ..... @vv
26
+xvclo_d 0111 01101001 11000 00011 ..... ..... @vv
27
+xvclz_b 0111 01101001 11000 00100 ..... ..... @vv
28
+xvclz_h 0111 01101001 11000 00101 ..... ..... @vv
29
+xvclz_w 0111 01101001 11000 00110 ..... ..... @vv
30
+xvclz_d 0111 01101001 11000 00111 ..... ..... @vv
31
+
32
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
33
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
34
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
35
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/loongarch/disas.c
38
+++ b/target/loongarch/disas.c
39
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssrarni_hu_w, vv_i)
40
INSN_LASX(xvssrarni_wu_d, vv_i)
41
INSN_LASX(xvssrarni_du_q, vv_i)
42
43
+INSN_LASX(xvclo_b, vv)
44
+INSN_LASX(xvclo_h, vv)
45
+INSN_LASX(xvclo_w, vv)
46
+INSN_LASX(xvclo_d, vv)
47
+INSN_LASX(xvclz_b, vv)
48
+INSN_LASX(xvclz_h, vv)
49
+INSN_LASX(xvclz_w, vv)
50
+INSN_LASX(xvclz_d, vv)
51
+
52
INSN_LASX(xvreplgr2vr_b, vr)
53
INSN_LASX(xvreplgr2vr_h, vr)
54
INSN_LASX(xvreplgr2vr_w, vr)
55
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/loongarch/vec_helper.c
58
+++ b/target/loongarch/vec_helper.c
59
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
60
int i; \
61
VReg *Vd = (VReg *)vd; \
62
VReg *Vj = (VReg *)vj; \
63
+ int oprsz = simd_oprsz(desc); \
64
\
65
- for (i = 0; i < LSX_LEN/BIT; i++) \
66
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
67
{ \
68
Vd->E(i) = DO_OP(Vj->E(i)); \
69
} \
70
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/loongarch/insn_trans/trans_vec.c.inc
73
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
74
@@ -XXX,XX +XXX,XX @@ TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
75
TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
76
TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
77
TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
78
+TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
79
+TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
80
+TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
81
+TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
82
+TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
83
+TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
84
+TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
85
+TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
86
87
TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
88
TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
89
--
90
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- VPCNT.{B/H/W/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-44-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 +++++
9
target/loongarch/disas.c | 5 +++++
10
target/loongarch/vec_helper.c | 3 ++-
11
target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++
12
4 files changed, 16 insertions(+), 1 deletion(-)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvclz_h 0111 01101001 11000 00101 ..... ..... @vv
19
xvclz_w 0111 01101001 11000 00110 ..... ..... @vv
20
xvclz_d 0111 01101001 11000 00111 ..... ..... @vv
21
22
+xvpcnt_b 0111 01101001 11000 01000 ..... ..... @vv
23
+xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv
24
+xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv
25
+xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv
26
+
27
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
28
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
29
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
30
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/loongarch/disas.c
33
+++ b/target/loongarch/disas.c
34
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvclz_h, vv)
35
INSN_LASX(xvclz_w, vv)
36
INSN_LASX(xvclz_d, vv)
37
38
+INSN_LASX(xvpcnt_b, vv)
39
+INSN_LASX(xvpcnt_h, vv)
40
+INSN_LASX(xvpcnt_w, vv)
41
+INSN_LASX(xvpcnt_d, vv)
42
+
43
INSN_LASX(xvreplgr2vr_b, vr)
44
INSN_LASX(xvreplgr2vr_h, vr)
45
INSN_LASX(xvreplgr2vr_w, vr)
46
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/loongarch/vec_helper.c
49
+++ b/target/loongarch/vec_helper.c
50
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
51
int i; \
52
VReg *Vd = (VReg *)vd; \
53
VReg *Vj = (VReg *)vj; \
54
+ int oprsz = simd_oprsz(desc); \
55
\
56
- for (i = 0; i < LSX_LEN/BIT; i++) \
57
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
58
{ \
59
Vd->E(i) = FN(Vj->E(i)); \
60
} \
61
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/loongarch/insn_trans/trans_vec.c.inc
64
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
65
@@ -XXX,XX +XXX,XX @@ TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
66
TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
67
TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
68
TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
69
+TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
70
+TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
71
+TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
72
+TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
73
74
static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
75
void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
76
--
77
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVBITCLR[I].{B/H/W/D};
3
- XVBITSET[I].{B/H/W/D};
4
- XVBITREV[I].{B/H/W/D}.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-45-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 27 +++++++++++++
11
target/loongarch/disas.c | 25 ++++++++++++
12
target/loongarch/vec_helper.c | 44 +++++++++++----------
13
target/loongarch/insn_trans/trans_vec.c.inc | 24 +++++++++++
14
4 files changed, 99 insertions(+), 21 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv
21
xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv
22
xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv
23
24
+xvbitclr_b 0111 01010000 11000 ..... ..... ..... @vvv
25
+xvbitclr_h 0111 01010000 11001 ..... ..... ..... @vvv
26
+xvbitclr_w 0111 01010000 11010 ..... ..... ..... @vvv
27
+xvbitclr_d 0111 01010000 11011 ..... ..... ..... @vvv
28
+xvbitclri_b 0111 01110001 00000 01 ... ..... ..... @vv_ui3
29
+xvbitclri_h 0111 01110001 00000 1 .... ..... ..... @vv_ui4
30
+xvbitclri_w 0111 01110001 00001 ..... ..... ..... @vv_ui5
31
+xvbitclri_d 0111 01110001 0001 ...... ..... ..... @vv_ui6
32
+
33
+xvbitset_b 0111 01010000 11100 ..... ..... ..... @vvv
34
+xvbitset_h 0111 01010000 11101 ..... ..... ..... @vvv
35
+xvbitset_w 0111 01010000 11110 ..... ..... ..... @vvv
36
+xvbitset_d 0111 01010000 11111 ..... ..... ..... @vvv
37
+xvbitseti_b 0111 01110001 01000 01 ... ..... ..... @vv_ui3
38
+xvbitseti_h 0111 01110001 01000 1 .... ..... ..... @vv_ui4
39
+xvbitseti_w 0111 01110001 01001 ..... ..... ..... @vv_ui5
40
+xvbitseti_d 0111 01110001 0101 ...... ..... ..... @vv_ui6
41
+
42
+xvbitrev_b 0111 01010001 00000 ..... ..... ..... @vvv
43
+xvbitrev_h 0111 01010001 00001 ..... ..... ..... @vvv
44
+xvbitrev_w 0111 01010001 00010 ..... ..... ..... @vvv
45
+xvbitrev_d 0111 01010001 00011 ..... ..... ..... @vvv
46
+xvbitrevi_b 0111 01110001 10000 01 ... ..... ..... @vv_ui3
47
+xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4
48
+xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5
49
+xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6
50
+
51
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
52
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
53
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
54
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/loongarch/disas.c
57
+++ b/target/loongarch/disas.c
58
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvpcnt_h, vv)
59
INSN_LASX(xvpcnt_w, vv)
60
INSN_LASX(xvpcnt_d, vv)
61
62
+INSN_LASX(xvbitclr_b, vvv)
63
+INSN_LASX(xvbitclr_h, vvv)
64
+INSN_LASX(xvbitclr_w, vvv)
65
+INSN_LASX(xvbitclr_d, vvv)
66
+INSN_LASX(xvbitclri_b, vv_i)
67
+INSN_LASX(xvbitclri_h, vv_i)
68
+INSN_LASX(xvbitclri_w, vv_i)
69
+INSN_LASX(xvbitclri_d, vv_i)
70
+INSN_LASX(xvbitset_b, vvv)
71
+INSN_LASX(xvbitset_h, vvv)
72
+INSN_LASX(xvbitset_w, vvv)
73
+INSN_LASX(xvbitset_d, vvv)
74
+INSN_LASX(xvbitseti_b, vv_i)
75
+INSN_LASX(xvbitseti_h, vv_i)
76
+INSN_LASX(xvbitseti_w, vv_i)
77
+INSN_LASX(xvbitseti_d, vv_i)
78
+INSN_LASX(xvbitrev_b, vvv)
79
+INSN_LASX(xvbitrev_h, vvv)
80
+INSN_LASX(xvbitrev_w, vvv)
81
+INSN_LASX(xvbitrev_d, vvv)
82
+INSN_LASX(xvbitrevi_b, vv_i)
83
+INSN_LASX(xvbitrevi_h, vv_i)
84
+INSN_LASX(xvbitrevi_w, vv_i)
85
+INSN_LASX(xvbitrevi_d, vv_i)
86
+
87
INSN_LASX(xvreplgr2vr_b, vr)
88
INSN_LASX(xvreplgr2vr_h, vr)
89
INSN_LASX(xvreplgr2vr_w, vr)
90
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/loongarch/vec_helper.c
93
+++ b/target/loongarch/vec_helper.c
94
@@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_d, 64, UD, ctpop64)
95
#define DO_BITSET(a, bit) (a | 1ull << bit)
96
#define DO_BITREV(a, bit) (a ^ (1ull << bit))
97
98
-#define DO_BIT(NAME, BIT, E, DO_OP) \
99
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
100
-{ \
101
- int i; \
102
- VReg *Vd = (VReg *)vd; \
103
- VReg *Vj = (VReg *)vj; \
104
- VReg *Vk = (VReg *)vk; \
105
- \
106
- for (i = 0; i < LSX_LEN/BIT; i++) { \
107
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
108
- } \
109
+#define DO_BIT(NAME, BIT, E, DO_OP) \
110
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
111
+{ \
112
+ int i; \
113
+ VReg *Vd = (VReg *)vd; \
114
+ VReg *Vj = (VReg *)vj; \
115
+ VReg *Vk = (VReg *)vk; \
116
+ int oprsz = simd_oprsz(desc); \
117
+ \
118
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
119
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
120
+ } \
121
}
122
123
DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
124
@@ -XXX,XX +XXX,XX @@ DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
125
DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
126
DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
127
128
-#define DO_BITI(NAME, BIT, E, DO_OP) \
129
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
130
-{ \
131
- int i; \
132
- VReg *Vd = (VReg *)vd; \
133
- VReg *Vj = (VReg *)vj; \
134
- \
135
- for (i = 0; i < LSX_LEN/BIT; i++) { \
136
- Vd->E(i) = DO_OP(Vj->E(i), imm); \
137
- } \
138
+#define DO_BITI(NAME, BIT, E, DO_OP) \
139
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
140
+{ \
141
+ int i; \
142
+ VReg *Vd = (VReg *)vd; \
143
+ VReg *Vj = (VReg *)vj; \
144
+ int oprsz = simd_oprsz(desc); \
145
+ \
146
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
147
+ Vd->E(i) = DO_OP(Vj->E(i), imm); \
148
+ } \
149
}
150
151
DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
152
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
153
index XXXXXXX..XXXXXXX 100644
154
--- a/target/loongarch/insn_trans/trans_vec.c.inc
155
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
156
@@ -XXX,XX +XXX,XX @@ TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
157
TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
158
TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
159
TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
160
+TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
161
+TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
162
+TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
163
+TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
164
165
static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
166
void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
167
@@ -XXX,XX +XXX,XX @@ TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
168
TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
169
TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
170
TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
171
+TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
172
+TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
173
+TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
174
+TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
175
176
static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
177
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
178
@@ -XXX,XX +XXX,XX @@ TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
179
TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
180
TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
181
TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
182
+TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
183
+TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
184
+TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
185
+TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
186
187
static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
188
int64_t imm, uint32_t oprsz, uint32_t maxsz)
189
@@ -XXX,XX +XXX,XX @@ TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
190
TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
191
TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
192
TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
193
+TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
194
+TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
195
+TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
196
+TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
197
198
static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
199
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
200
@@ -XXX,XX +XXX,XX @@ TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
201
TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
202
TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
203
TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
204
+TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
205
+TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
206
+TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
207
+TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
208
209
static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
210
int64_t imm, uint32_t oprsz, uint32_t maxsz)
211
@@ -XXX,XX +XXX,XX @@ TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
212
TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
213
TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
214
TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
215
+TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
216
+TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
217
+TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
218
+TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
219
220
TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
221
TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
222
--
223
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVFRSTP[I].{B/H}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-46-gaosong@loongson.cn>
7
---
8
target/loongarch/insns.decode | 5 ++++
9
target/loongarch/disas.c | 5 ++++
10
target/loongarch/vec_helper.c | 32 +++++++++++++--------
11
target/loongarch/insn_trans/trans_vec.c.inc | 4 +++
12
4 files changed, 34 insertions(+), 12 deletions(-)
13
14
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insns.decode
17
+++ b/target/loongarch/insns.decode
18
@@ -XXX,XX +XXX,XX @@ xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4
19
xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5
20
xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6
21
22
+xvfrstp_b 0111 01010010 10110 ..... ..... ..... @vvv
23
+xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv
24
+xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5
25
+xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5
26
+
27
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
28
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
29
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
30
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/loongarch/disas.c
33
+++ b/target/loongarch/disas.c
34
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvbitrevi_h, vv_i)
35
INSN_LASX(xvbitrevi_w, vv_i)
36
INSN_LASX(xvbitrevi_d, vv_i)
37
38
+INSN_LASX(xvfrstp_b, vvv)
39
+INSN_LASX(xvfrstp_h, vvv)
40
+INSN_LASX(xvfrstpi_b, vv_i)
41
+INSN_LASX(xvfrstpi_h, vv_i)
42
+
43
INSN_LASX(xvreplgr2vr_b, vr)
44
INSN_LASX(xvreplgr2vr_h, vr)
45
INSN_LASX(xvreplgr2vr_w, vr)
46
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/loongarch/vec_helper.c
49
+++ b/target/loongarch/vec_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
51
#define VFRSTP(NAME, BIT, MASK, E) \
52
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
53
{ \
54
- int i, m; \
55
+ int i, j, m, ofs; \
56
VReg *Vd = (VReg *)vd; \
57
VReg *Vj = (VReg *)vj; \
58
VReg *Vk = (VReg *)vk; \
59
+ int oprsz = simd_oprsz(desc); \
60
\
61
- for (i = 0; i < LSX_LEN/BIT; i++) { \
62
- if (Vj->E(i) < 0) { \
63
- break; \
64
+ ofs = LSX_LEN / BIT; \
65
+ for (i = 0; i < oprsz / 16; i++) { \
66
+ m = Vk->E(i * ofs) & MASK; \
67
+ for (j = 0; j < ofs; j++) { \
68
+ if (Vj->E(j + ofs * i) < 0) { \
69
+ break; \
70
+ } \
71
} \
72
+ Vd->E(m + i * ofs) = j; \
73
} \
74
- m = Vk->E(0) & MASK; \
75
- Vd->E(m) = i; \
76
}
77
78
VFRSTP(vfrstp_b, 8, 0xf, B)
79
@@ -XXX,XX +XXX,XX @@ VFRSTP(vfrstp_h, 16, 0x7, H)
80
#define VFRSTPI(NAME, BIT, E) \
81
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
82
{ \
83
- int i, m; \
84
+ int i, j, m, ofs; \
85
VReg *Vd = (VReg *)vd; \
86
VReg *Vj = (VReg *)vj; \
87
+ int oprsz = simd_oprsz(desc); \
88
\
89
- for (i = 0; i < LSX_LEN/BIT; i++) { \
90
- if (Vj->E(i) < 0) { \
91
- break; \
92
+ ofs = LSX_LEN / BIT; \
93
+ m = imm % ofs; \
94
+ for (i = 0; i < oprsz / 16; i++) { \
95
+ for (j = 0; j < ofs; j++) { \
96
+ if (Vj->E(j + ofs * i) < 0) { \
97
+ break; \
98
+ } \
99
} \
100
+ Vd->E(m + i * ofs) = j; \
101
} \
102
- m = imm % (LSX_LEN/BIT); \
103
- Vd->E(m) = i; \
104
}
105
106
VFRSTPI(vfrstpi_b, 8, B)
107
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/loongarch/insn_trans/trans_vec.c.inc
110
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
111
@@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
112
TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
113
TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
114
TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
115
+TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
116
+TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
117
+TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
118
+TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
119
120
TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
121
TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
122
--
123
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVF{ADD/SUB/MUL/DIV}.{S/D};
3
- XVF{MADD/MSUB/NMADD/NMSUB}.{S/D};
4
- XVF{MAX/MIN}.{S/D};
5
- XVF{MAXA/MINA}.{S/D};
6
- XVFLOGB.{S/D};
7
- XVFCLASS.{S/D};
8
- XVF{SQRT/RECIP/RSQRT}.{S/D}.
1
9
10
Signed-off-by: Song Gao <gaosong@loongson.cn>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-Id: <20230914022645.1151356-47-gaosong@loongson.cn>
13
---
14
target/loongarch/insns.decode | 41 +++++++++++
15
target/loongarch/disas.c | 46 +++++++++++++
16
target/loongarch/vec_helper.c | 12 ++--
17
target/loongarch/insn_trans/trans_vec.c.inc | 75 +++++++++++++++++----
18
4 files changed, 158 insertions(+), 16 deletions(-)
19
20
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/loongarch/insns.decode
23
+++ b/target/loongarch/insns.decode
24
@@ -XXX,XX +XXX,XX @@ xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv
25
xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5
26
xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5
27
28
+xvfadd_s 0111 01010011 00001 ..... ..... ..... @vvv
29
+xvfadd_d 0111 01010011 00010 ..... ..... ..... @vvv
30
+xvfsub_s 0111 01010011 00101 ..... ..... ..... @vvv
31
+xvfsub_d 0111 01010011 00110 ..... ..... ..... @vvv
32
+xvfmul_s 0111 01010011 10001 ..... ..... ..... @vvv
33
+xvfmul_d 0111 01010011 10010 ..... ..... ..... @vvv
34
+xvfdiv_s 0111 01010011 10101 ..... ..... ..... @vvv
35
+xvfdiv_d 0111 01010011 10110 ..... ..... ..... @vvv
36
+
37
+xvfmadd_s 0000 10100001 ..... ..... ..... ..... @vvvv
38
+xvfmadd_d 0000 10100010 ..... ..... ..... ..... @vvvv
39
+xvfmsub_s 0000 10100101 ..... ..... ..... ..... @vvvv
40
+xvfmsub_d 0000 10100110 ..... ..... ..... ..... @vvvv
41
+xvfnmadd_s 0000 10101001 ..... ..... ..... ..... @vvvv
42
+xvfnmadd_d 0000 10101010 ..... ..... ..... ..... @vvvv
43
+xvfnmsub_s 0000 10101101 ..... ..... ..... ..... @vvvv
44
+xvfnmsub_d 0000 10101110 ..... ..... ..... ..... @vvvv
45
+
46
+xvfmax_s 0111 01010011 11001 ..... ..... ..... @vvv
47
+xvfmax_d 0111 01010011 11010 ..... ..... ..... @vvv
48
+xvfmin_s 0111 01010011 11101 ..... ..... ..... @vvv
49
+xvfmin_d 0111 01010011 11110 ..... ..... ..... @vvv
50
+
51
+xvfmaxa_s 0111 01010100 00001 ..... ..... ..... @vvv
52
+xvfmaxa_d 0111 01010100 00010 ..... ..... ..... @vvv
53
+xvfmina_s 0111 01010100 00101 ..... ..... ..... @vvv
54
+xvfmina_d 0111 01010100 00110 ..... ..... ..... @vvv
55
+
56
+xvflogb_s 0111 01101001 11001 10001 ..... ..... @vv
57
+xvflogb_d 0111 01101001 11001 10010 ..... ..... @vv
58
+
59
+xvfclass_s 0111 01101001 11001 10101 ..... ..... @vv
60
+xvfclass_d 0111 01101001 11001 10110 ..... ..... @vv
61
+
62
+xvfsqrt_s 0111 01101001 11001 11001 ..... ..... @vv
63
+xvfsqrt_d 0111 01101001 11001 11010 ..... ..... @vv
64
+xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv
65
+xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
66
+xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
67
+xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
68
+
69
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
70
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
71
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
72
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/loongarch/disas.c
75
+++ b/target/loongarch/disas.c
76
@@ -XXX,XX +XXX,XX @@ static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
77
output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
78
}
79
80
+static void output_vvvv_x(DisasContext *ctx, arg_vvvv *a, const char *mnemonic)
81
+{
82
+ output(ctx, mnemonic, "x%d, x%d, x%d, x%d", a->vd, a->vj, a->vk, a->va);
83
+}
84
+
85
static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
86
{
87
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
88
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvfrstp_h, vvv)
89
INSN_LASX(xvfrstpi_b, vv_i)
90
INSN_LASX(xvfrstpi_h, vv_i)
91
92
+INSN_LASX(xvfadd_s, vvv)
93
+INSN_LASX(xvfadd_d, vvv)
94
+INSN_LASX(xvfsub_s, vvv)
95
+INSN_LASX(xvfsub_d, vvv)
96
+INSN_LASX(xvfmul_s, vvv)
97
+INSN_LASX(xvfmul_d, vvv)
98
+INSN_LASX(xvfdiv_s, vvv)
99
+INSN_LASX(xvfdiv_d, vvv)
100
+
101
+INSN_LASX(xvfmadd_s, vvvv)
102
+INSN_LASX(xvfmadd_d, vvvv)
103
+INSN_LASX(xvfmsub_s, vvvv)
104
+INSN_LASX(xvfmsub_d, vvvv)
105
+INSN_LASX(xvfnmadd_s, vvvv)
106
+INSN_LASX(xvfnmadd_d, vvvv)
107
+INSN_LASX(xvfnmsub_s, vvvv)
108
+INSN_LASX(xvfnmsub_d, vvvv)
109
+
110
+INSN_LASX(xvfmax_s, vvv)
111
+INSN_LASX(xvfmax_d, vvv)
112
+INSN_LASX(xvfmin_s, vvv)
113
+INSN_LASX(xvfmin_d, vvv)
114
+
115
+INSN_LASX(xvfmaxa_s, vvv)
116
+INSN_LASX(xvfmaxa_d, vvv)
117
+INSN_LASX(xvfmina_s, vvv)
118
+INSN_LASX(xvfmina_d, vvv)
119
+
120
+INSN_LASX(xvflogb_s, vv)
121
+INSN_LASX(xvflogb_d, vv)
122
+
123
+INSN_LASX(xvfclass_s, vv)
124
+INSN_LASX(xvfclass_d, vv)
125
+
126
+INSN_LASX(xvfsqrt_s, vv)
127
+INSN_LASX(xvfsqrt_d, vv)
128
+INSN_LASX(xvfrecip_s, vv)
129
+INSN_LASX(xvfrecip_d, vv)
130
+INSN_LASX(xvfrsqrt_s, vv)
131
+INSN_LASX(xvfrsqrt_d, vv)
132
+
133
INSN_LASX(xvreplgr2vr_b, vr)
134
INSN_LASX(xvreplgr2vr_h, vr)
135
INSN_LASX(xvreplgr2vr_w, vr)
136
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/target/loongarch/vec_helper.c
139
+++ b/target/loongarch/vec_helper.c
140
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, \
141
VReg *Vd = (VReg *)vd; \
142
VReg *Vj = (VReg *)vj; \
143
VReg *Vk = (VReg *)vk; \
144
+ int oprsz = simd_oprsz(desc); \
145
\
146
vec_clear_cause(env); \
147
- for (i = 0; i < LSX_LEN/BIT; i++) { \
148
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
149
Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
150
vec_update_fcsr0(env, GETPC()); \
151
} \
152
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
153
VReg *Vj = (VReg *)vj; \
154
VReg *Vk = (VReg *)vk; \
155
VReg *Va = (VReg *)va; \
156
+ int oprsz = simd_oprsz(desc); \
157
\
158
vec_clear_cause(env); \
159
- for (i = 0; i < LSX_LEN/BIT; i++) { \
160
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
161
Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
162
vec_update_fcsr0(env, GETPC()); \
163
} \
164
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \
165
int i; \
166
VReg *Vd = (VReg *)vd; \
167
VReg *Vj = (VReg *)vj; \
168
+ int oprsz = simd_oprsz(desc); \
169
\
170
vec_clear_cause(env); \
171
- for (i = 0; i < LSX_LEN/BIT; i++) { \
172
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
173
Vd->E(i) = FN(env, Vj->E(i)); \
174
} \
175
}
176
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \
177
int i; \
178
VReg *Vd = (VReg *)vd; \
179
VReg *Vj = (VReg *)vj; \
180
+ int oprsz = simd_oprsz(desc); \
181
\
182
- for (i = 0; i < LSX_LEN/BIT; i++) { \
183
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
184
Vd->E(i) = FN(env, Vj->E(i)); \
185
} \
186
}
187
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
188
index XXXXXXX..XXXXXXX 100644
189
--- a/target/loongarch/insn_trans/trans_vec.c.inc
190
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
191
@@ -XXX,XX +XXX,XX @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz)
192
static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
193
gen_helper_gvec_4_ptr *fn)
194
{
195
+ if (!check_vec(ctx, oprsz)) {
196
+ return true;
197
+ }
198
+
199
tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
200
vec_full_offset(a->vj),
201
vec_full_offset(a->vk),
202
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
203
static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
204
gen_helper_gvec_4_ptr *fn)
205
{
206
- if (!check_vec(ctx, 16)) {
207
- return true;
208
- }
209
-
210
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
211
}
212
213
+static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
214
+ gen_helper_gvec_4_ptr *fn)
215
+{
216
+ return gen_vvvv_ptr_vl(ctx, a, 32, fn);
217
+}
218
+
219
static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
220
gen_helper_gvec_4 *fn)
221
{
222
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
223
static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
224
gen_helper_gvec_3_ptr *fn)
225
{
226
+ if (!check_vec(ctx, oprsz)) {
227
+ return true;
228
+ }
229
tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
230
vec_full_offset(a->vj),
231
vec_full_offset(a->vk),
232
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
233
static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
234
gen_helper_gvec_3_ptr *fn)
235
{
236
- if (!check_vec(ctx, 16)) {
237
- return true;
238
- }
239
-
240
return gen_vvv_ptr_vl(ctx, a, 16, fn);
241
}
242
243
+static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
244
+ gen_helper_gvec_3_ptr *fn)
245
+{
246
+ return gen_vvv_ptr_vl(ctx, a, 32, fn);
247
+}
248
+
249
static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
250
gen_helper_gvec_3 *fn)
251
{
252
@@ -XXX,XX +XXX,XX @@ static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
253
static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
254
gen_helper_gvec_2_ptr *fn)
255
{
256
+ if (!check_vec(ctx, oprsz)) {
257
+ return true;
258
+ }
259
+
260
tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
261
vec_full_offset(a->vj),
262
cpu_env,
263
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
264
static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
265
gen_helper_gvec_2_ptr *fn)
266
{
267
- if (!check_vec(ctx, 16)) {
268
- return true;
269
- }
270
-
271
return gen_vv_ptr_vl(ctx, a, 16, fn);
272
}
273
274
+static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
275
+ gen_helper_gvec_2_ptr *fn)
276
+{
277
+ return gen_vv_ptr_vl(ctx, a, 32, fn);
278
+}
279
+
280
static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
281
gen_helper_gvec_2 *fn)
282
{
283
@@ -XXX,XX +XXX,XX @@ TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
284
TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
285
TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
286
TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
287
+TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
288
+TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
289
+TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
290
+TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
291
+TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
292
+TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
293
+TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
294
+TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
295
296
TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
297
TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
298
@@ -XXX,XX +XXX,XX @@ TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
299
TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
300
TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
301
TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
302
+TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
303
+TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
304
+TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
305
+TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
306
+TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
307
+TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
308
+TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
309
+TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
310
311
TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
312
TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
313
TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
314
TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
315
+TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
316
+TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
317
+TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
318
+TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
319
320
TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
321
TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
322
TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
323
TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
324
+TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
325
+TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
326
+TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
327
+TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
328
329
TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
330
TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
331
+TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
332
+TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
333
334
TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
335
TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
336
+TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
337
+TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
338
339
TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
340
TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
341
@@ -XXX,XX +XXX,XX @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
342
TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
343
TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
344
TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
345
+TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
346
+TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
347
+TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
348
+TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
349
+TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
350
+TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
351
352
TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
353
TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
354
--
355
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVFCVT{L/H}.{S.H/D.S};
3
- XVFCVT.{H.S/S.D};
4
- XVFRINT[{RNE/RZ/RP/RM}].{S/D};
5
- XVFTINT[{RNE/RZ/RP/RM}].{W.S/L.D};
6
- XVFTINT[RZ].{WU.S/LU.D};
7
- XVFTINT[{RNE/RZ/RP/RM}].W.D;
8
- XVFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S;
9
- XVFFINT.{S.W/D.L}[U];
10
- X[CVFFINT.S.L, VFFINT{L/H}.D.W.
1
11
12
Signed-off-by: Song Gao <gaosong@loongson.cn>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-Id: <20230914022645.1151356-48-gaosong@loongson.cn>
15
---
16
target/loongarch/insns.decode | 58 +++++
17
target/loongarch/disas.c | 56 +++++
18
target/loongarch/vec_helper.c | 235 +++++++++++++-------
19
target/loongarch/insn_trans/trans_vec.c.inc | 52 +++++
20
4 files changed, 315 insertions(+), 86 deletions(-)
21
22
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/loongarch/insns.decode
25
+++ b/target/loongarch/insns.decode
26
@@ -XXX,XX +XXX,XX @@ xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
27
xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
28
xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
29
30
+xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv
31
+xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv
32
+xvfcvtl_d_s 0111 01101001 11011 11100 ..... ..... @vv
33
+xvfcvth_d_s 0111 01101001 11011 11101 ..... ..... @vv
34
+xvfcvt_h_s 0111 01010100 01100 ..... ..... ..... @vvv
35
+xvfcvt_s_d 0111 01010100 01101 ..... ..... ..... @vvv
36
+
37
+xvfrintrne_s 0111 01101001 11010 11101 ..... ..... @vv
38
+xvfrintrne_d 0111 01101001 11010 11110 ..... ..... @vv
39
+xvfrintrz_s 0111 01101001 11010 11001 ..... ..... @vv
40
+xvfrintrz_d 0111 01101001 11010 11010 ..... ..... @vv
41
+xvfrintrp_s 0111 01101001 11010 10101 ..... ..... @vv
42
+xvfrintrp_d 0111 01101001 11010 10110 ..... ..... @vv
43
+xvfrintrm_s 0111 01101001 11010 10001 ..... ..... @vv
44
+xvfrintrm_d 0111 01101001 11010 10010 ..... ..... @vv
45
+xvfrint_s 0111 01101001 11010 01101 ..... ..... @vv
46
+xvfrint_d 0111 01101001 11010 01110 ..... ..... @vv
47
+
48
+xvftintrne_w_s 0111 01101001 11100 10100 ..... ..... @vv
49
+xvftintrne_l_d 0111 01101001 11100 10101 ..... ..... @vv
50
+xvftintrz_w_s 0111 01101001 11100 10010 ..... ..... @vv
51
+xvftintrz_l_d 0111 01101001 11100 10011 ..... ..... @vv
52
+xvftintrp_w_s 0111 01101001 11100 10000 ..... ..... @vv
53
+xvftintrp_l_d 0111 01101001 11100 10001 ..... ..... @vv
54
+xvftintrm_w_s 0111 01101001 11100 01110 ..... ..... @vv
55
+xvftintrm_l_d 0111 01101001 11100 01111 ..... ..... @vv
56
+xvftint_w_s 0111 01101001 11100 01100 ..... ..... @vv
57
+xvftint_l_d 0111 01101001 11100 01101 ..... ..... @vv
58
+xvftintrz_wu_s 0111 01101001 11100 11100 ..... ..... @vv
59
+xvftintrz_lu_d 0111 01101001 11100 11101 ..... ..... @vv
60
+xvftint_wu_s 0111 01101001 11100 10110 ..... ..... @vv
61
+xvftint_lu_d 0111 01101001 11100 10111 ..... ..... @vv
62
+
63
+xvftintrne_w_d 0111 01010100 10111 ..... ..... ..... @vvv
64
+xvftintrz_w_d 0111 01010100 10110 ..... ..... ..... @vvv
65
+xvftintrp_w_d 0111 01010100 10101 ..... ..... ..... @vvv
66
+xvftintrm_w_d 0111 01010100 10100 ..... ..... ..... @vvv
67
+xvftint_w_d 0111 01010100 10011 ..... ..... ..... @vvv
68
+
69
+xvftintrnel_l_s 0111 01101001 11101 01000 ..... ..... @vv
70
+xvftintrneh_l_s 0111 01101001 11101 01001 ..... ..... @vv
71
+xvftintrzl_l_s 0111 01101001 11101 00110 ..... ..... @vv
72
+xvftintrzh_l_s 0111 01101001 11101 00111 ..... ..... @vv
73
+xvftintrpl_l_s 0111 01101001 11101 00100 ..... ..... @vv
74
+xvftintrph_l_s 0111 01101001 11101 00101 ..... ..... @vv
75
+xvftintrml_l_s 0111 01101001 11101 00010 ..... ..... @vv
76
+xvftintrmh_l_s 0111 01101001 11101 00011 ..... ..... @vv
77
+xvftintl_l_s 0111 01101001 11101 00000 ..... ..... @vv
78
+xvftinth_l_s 0111 01101001 11101 00001 ..... ..... @vv
79
+
80
+xvffint_s_w 0111 01101001 11100 00000 ..... ..... @vv
81
+xvffint_d_l 0111 01101001 11100 00010 ..... ..... @vv
82
+xvffint_s_wu 0111 01101001 11100 00001 ..... ..... @vv
83
+xvffint_d_lu 0111 01101001 11100 00011 ..... ..... @vv
84
+xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv
85
+xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv
86
+xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv
87
+
88
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
89
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
90
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
91
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/loongarch/disas.c
94
+++ b/target/loongarch/disas.c
95
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvfrecip_d, vv)
96
INSN_LASX(xvfrsqrt_s, vv)
97
INSN_LASX(xvfrsqrt_d, vv)
98
99
+INSN_LASX(xvfcvtl_s_h, vv)
100
+INSN_LASX(xvfcvth_s_h, vv)
101
+INSN_LASX(xvfcvtl_d_s, vv)
102
+INSN_LASX(xvfcvth_d_s, vv)
103
+INSN_LASX(xvfcvt_h_s, vvv)
104
+INSN_LASX(xvfcvt_s_d, vvv)
105
+
106
+INSN_LASX(xvfrint_s, vv)
107
+INSN_LASX(xvfrint_d, vv)
108
+INSN_LASX(xvfrintrm_s, vv)
109
+INSN_LASX(xvfrintrm_d, vv)
110
+INSN_LASX(xvfrintrp_s, vv)
111
+INSN_LASX(xvfrintrp_d, vv)
112
+INSN_LASX(xvfrintrz_s, vv)
113
+INSN_LASX(xvfrintrz_d, vv)
114
+INSN_LASX(xvfrintrne_s, vv)
115
+INSN_LASX(xvfrintrne_d, vv)
116
+
117
+INSN_LASX(xvftint_w_s, vv)
118
+INSN_LASX(xvftint_l_d, vv)
119
+INSN_LASX(xvftintrm_w_s, vv)
120
+INSN_LASX(xvftintrm_l_d, vv)
121
+INSN_LASX(xvftintrp_w_s, vv)
122
+INSN_LASX(xvftintrp_l_d, vv)
123
+INSN_LASX(xvftintrz_w_s, vv)
124
+INSN_LASX(xvftintrz_l_d, vv)
125
+INSN_LASX(xvftintrne_w_s, vv)
126
+INSN_LASX(xvftintrne_l_d, vv)
127
+INSN_LASX(xvftint_wu_s, vv)
128
+INSN_LASX(xvftint_lu_d, vv)
129
+INSN_LASX(xvftintrz_wu_s, vv)
130
+INSN_LASX(xvftintrz_lu_d, vv)
131
+INSN_LASX(xvftint_w_d, vvv)
132
+INSN_LASX(xvftintrm_w_d, vvv)
133
+INSN_LASX(xvftintrp_w_d, vvv)
134
+INSN_LASX(xvftintrz_w_d, vvv)
135
+INSN_LASX(xvftintrne_w_d, vvv)
136
+INSN_LASX(xvftintl_l_s, vv)
137
+INSN_LASX(xvftinth_l_s, vv)
138
+INSN_LASX(xvftintrml_l_s, vv)
139
+INSN_LASX(xvftintrmh_l_s, vv)
140
+INSN_LASX(xvftintrpl_l_s, vv)
141
+INSN_LASX(xvftintrph_l_s, vv)
142
+INSN_LASX(xvftintrzl_l_s, vv)
143
+INSN_LASX(xvftintrzh_l_s, vv)
144
+INSN_LASX(xvftintrnel_l_s, vv)
145
+INSN_LASX(xvftintrneh_l_s, vv)
146
+
147
+INSN_LASX(xvffint_s_w, vv)
148
+INSN_LASX(xvffint_s_wu, vv)
149
+INSN_LASX(xvffint_d_l, vv)
150
+INSN_LASX(xvffint_d_lu, vv)
151
+INSN_LASX(xvffintl_d_w, vv)
152
+INSN_LASX(xvffinth_d_w, vv)
153
+INSN_LASX(xvffint_s_l, vvv)
154
+
155
INSN_LASX(xvreplgr2vr_b, vr)
156
INSN_LASX(xvreplgr2vr_h, vr)
157
INSN_LASX(xvreplgr2vr_w, vr)
158
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/loongarch/vec_helper.c
161
+++ b/target/loongarch/vec_helper.c
162
@@ -XXX,XX +XXX,XX @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
163
void HELPER(vfcvtl_s_h)(void *vd, void *vj,
164
CPULoongArchState *env, uint32_t desc)
165
{
166
- int i;
167
- VReg temp;
168
+ int i, j, ofs;
169
+ VReg temp = {};
170
VReg *Vd = (VReg *)vd;
171
VReg *Vj = (VReg *)vj;
172
+ int oprsz = simd_oprsz(desc);
173
174
+ ofs = LSX_LEN / 32;
175
vec_clear_cause(env);
176
- for (i = 0; i < LSX_LEN/32; i++) {
177
- temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
178
+ for (i = 0; i < oprsz / 16; i++) {
179
+ for (j = 0; j < ofs; j++) {
180
+ temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i),
181
+ &env->fp_status);
182
+ }
183
vec_update_fcsr0(env, GETPC());
184
}
185
*Vd = temp;
186
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_s_h)(void *vd, void *vj,
187
void HELPER(vfcvtl_d_s)(void *vd, void *vj,
188
CPULoongArchState *env, uint32_t desc)
189
{
190
- int i;
191
- VReg temp;
192
+ int i, j, ofs;
193
+ VReg temp = {};
194
VReg *Vd = (VReg *)vd;
195
VReg *Vj = (VReg *)vj;
196
+ int oprsz = simd_oprsz(desc);
197
198
+ ofs = LSX_LEN / 64;
199
vec_clear_cause(env);
200
- for (i = 0; i < LSX_LEN/64; i++) {
201
- temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
202
+ for (i = 0; i < oprsz / 16; i++) {
203
+ for (j = 0; j < ofs; j++) {
204
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i),
205
+ &env->fp_status);
206
+ }
207
vec_update_fcsr0(env, GETPC());
208
}
209
*Vd = temp;
210
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_d_s)(void *vd, void *vj,
211
void HELPER(vfcvth_s_h)(void *vd, void *vj,
212
CPULoongArchState *env, uint32_t desc)
213
{
214
- int i;
215
- VReg temp;
216
+ int i, j, ofs;
217
+ VReg temp = {};
218
VReg *Vd = (VReg *)vd;
219
VReg *Vj = (VReg *)vj;
220
+ int oprsz = simd_oprsz(desc);
221
222
+ ofs = LSX_LEN / 32;
223
vec_clear_cause(env);
224
- for (i = 0; i < LSX_LEN/32; i++) {
225
- temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
226
+ for (i = 0; i < oprsz / 16; i++) {
227
+ for (j = 0; j < ofs; j++) {
228
+ temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)),
229
+ &env->fp_status);
230
+ }
231
vec_update_fcsr0(env, GETPC());
232
}
233
*Vd = temp;
234
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_s_h)(void *vd, void *vj,
235
void HELPER(vfcvth_d_s)(void *vd, void *vj,
236
CPULoongArchState *env, uint32_t desc)
237
{
238
- int i;
239
- VReg temp;
240
+ int i, j, ofs;
241
+ VReg temp = {};
242
VReg *Vd = (VReg *)vd;
243
VReg *Vj = (VReg *)vj;
244
+ int oprsz = simd_oprsz(desc);
245
246
+ ofs = LSX_LEN / 64;
247
vec_clear_cause(env);
248
- for (i = 0; i < LSX_LEN/64; i++) {
249
- temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
250
+ for (i = 0; i < oprsz / 16; i++) {
251
+ for (j = 0; j < ofs; j++) {
252
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)),
253
+ &env->fp_status);
254
+ }
255
vec_update_fcsr0(env, GETPC());
256
}
257
*Vd = temp;
258
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_d_s)(void *vd, void *vj,
259
void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
260
CPULoongArchState *env, uint32_t desc)
261
{
262
- int i;
263
- VReg temp;
264
+ int i, j, ofs;
265
+ VReg temp = {};
266
VReg *Vd = (VReg *)vd;
267
VReg *Vj = (VReg *)vj;
268
VReg *Vk = (VReg *)vk;
269
+ int oprsz = simd_oprsz(desc);
270
271
+ ofs = LSX_LEN / 32;
272
vec_clear_cause(env);
273
- for(i = 0; i < LSX_LEN/32; i++) {
274
- temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
275
- temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status);
276
+ for(i = 0; i < oprsz / 16; i++) {
277
+ for (j = 0; j < ofs; j++) {
278
+ temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i),
279
+ &env->fp_status);
280
+ temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i),
281
+ &env->fp_status);
282
+ }
283
vec_update_fcsr0(env, GETPC());
284
}
285
*Vd = temp;
286
@@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
287
void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
288
CPULoongArchState *env, uint32_t desc)
289
{
290
- int i;
291
- VReg temp;
292
+ int i, j, ofs;
293
+ VReg temp = {};
294
VReg *Vd = (VReg *)vd;
295
VReg *Vj = (VReg *)vj;
296
VReg *Vk = (VReg *)vk;
297
+ int oprsz = simd_oprsz(desc);
298
299
+ ofs = LSX_LEN / 64;
300
vec_clear_cause(env);
301
- for(i = 0; i < LSX_LEN/64; i++) {
302
- temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
303
- temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status);
304
+ for(i = 0; i < oprsz / 16; i++) {
305
+ for (j = 0; j < ofs; j++) {
306
+ temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i),
307
+ &env->fp_status);
308
+ temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i),
309
+ &env->fp_status);
310
+ }
311
vec_update_fcsr0(env, GETPC());
312
}
313
*Vd = temp;
314
@@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_s)(void *vd, void *vj,
315
int i;
316
VReg *Vd = (VReg *)vd;
317
VReg *Vj = (VReg *)vj;
318
+ int oprsz = simd_oprsz(desc);
319
320
vec_clear_cause(env);
321
- for (i = 0; i < 4; i++) {
322
+ for (i = 0; i < oprsz / 4; i++) {
323
Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
324
vec_update_fcsr0(env, GETPC());
325
}
326
@@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_d)(void *vd, void *vj,
327
int i;
328
VReg *Vd = (VReg *)vd;
329
VReg *Vj = (VReg *)vj;
330
+ int oprsz = simd_oprsz(desc);
331
332
vec_clear_cause(env);
333
- for (i = 0; i < 2; i++) {
334
+ for (i = 0; i < oprsz / 8; i++) {
335
Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
336
vec_update_fcsr0(env, GETPC());
337
}
338
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \
339
int i; \
340
VReg *Vd = (VReg *)vd; \
341
VReg *Vj = (VReg *)vj; \
342
+ int oprsz = simd_oprsz(desc); \
343
\
344
vec_clear_cause(env); \
345
- for (i = 0; i < LSX_LEN/BIT; i++) { \
346
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
347
FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
348
set_float_rounding_mode(MODE, &env->fp_status); \
349
Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
350
@@ -XXX,XX +XXX,XX @@ FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
351
FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
352
FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
353
354
-#define FTINT_W_D(NAME, FN) \
355
-void HELPER(NAME)(void *vd, void *vj, void *vk, \
356
- CPULoongArchState *env, uint32_t desc) \
357
-{ \
358
- int i; \
359
- VReg temp; \
360
- VReg *Vd = (VReg *)vd; \
361
- VReg *Vj = (VReg *)vj; \
362
- VReg *Vk = (VReg *)vk; \
363
- \
364
- vec_clear_cause(env); \
365
- for (i = 0; i < 2; i++) { \
366
- temp.W(i + 2) = FN(env, Vj->UD(i)); \
367
- temp.W(i) = FN(env, Vk->UD(i)); \
368
- } \
369
- *Vd = temp; \
370
+#define FTINT_W_D(NAME, FN) \
371
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
372
+ CPULoongArchState *env, uint32_t desc) \
373
+{ \
374
+ int i, j, ofs; \
375
+ VReg temp = {}; \
376
+ VReg *Vd = (VReg *)vd; \
377
+ VReg *Vj = (VReg *)vj; \
378
+ VReg *Vk = (VReg *)vk; \
379
+ int oprsz = simd_oprsz(desc); \
380
+ \
381
+ ofs = LSX_LEN / 64; \
382
+ vec_clear_cause(env); \
383
+ for (i = 0; i < oprsz / 16; i++) { \
384
+ for (j = 0; j < ofs; j++) { \
385
+ temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \
386
+ temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \
387
+ } \
388
+ } \
389
+ *Vd = temp; \
390
}
391
392
FTINT_W_D(vftint_w_d, do_float64_to_int32)
393
@@ -XXX,XX +XXX,XX @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
394
FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
395
FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
396
397
-#define FTINTL_L_S(NAME, FN) \
398
-void HELPER(NAME)(void *vd, void *vj, \
399
- CPULoongArchState *env, uint32_t desc) \
400
-{ \
401
- int i; \
402
- VReg temp; \
403
- VReg *Vd = (VReg *)vd; \
404
- VReg *Vj = (VReg *)vj; \
405
- \
406
- vec_clear_cause(env); \
407
- for (i = 0; i < 2; i++) { \
408
- temp.D(i) = FN(env, Vj->UW(i)); \
409
- } \
410
- *Vd = temp; \
411
+#define FTINTL_L_S(NAME, FN) \
412
+void HELPER(NAME)(void *vd, void *vj, \
413
+ CPULoongArchState *env, uint32_t desc) \
414
+{ \
415
+ int i, j, ofs; \
416
+ VReg temp; \
417
+ VReg *Vd = (VReg *)vd; \
418
+ VReg *Vj = (VReg *)vj; \
419
+ int oprsz = simd_oprsz(desc); \
420
+ \
421
+ ofs = LSX_LEN / 64; \
422
+ vec_clear_cause(env); \
423
+ for (i = 0; i < oprsz / 16; i++) { \
424
+ for (j = 0; j < ofs; j++) { \
425
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
426
+ } \
427
+ } \
428
+ *Vd = temp; \
429
}
430
431
FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
432
@@ -XXX,XX +XXX,XX @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
433
FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
434
FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
435
436
-#define FTINTH_L_S(NAME, FN) \
437
-void HELPER(NAME)(void *vd, void *vj, \
438
- CPULoongArchState *env, uint32_t desc) \
439
-{ \
440
- int i; \
441
- VReg temp; \
442
- VReg *Vd = (VReg *)vd; \
443
- VReg *Vj = (VReg *)vj; \
444
- \
445
- vec_clear_cause(env); \
446
- for (i = 0; i < 2; i++) { \
447
- temp.D(i) = FN(env, Vj->UW(i + 2)); \
448
- } \
449
- *Vd = temp; \
450
+#define FTINTH_L_S(NAME, FN) \
451
+void HELPER(NAME)(void *vd, void *vj, \
452
+ CPULoongArchState *env, uint32_t desc) \
453
+{ \
454
+ int i, j, ofs; \
455
+ VReg temp = {}; \
456
+ VReg *Vd = (VReg *)vd; \
457
+ VReg *Vj = (VReg *)vj; \
458
+ int oprsz = simd_oprsz(desc); \
459
+ \
460
+ ofs = LSX_LEN / 64; \
461
+ vec_clear_cause(env); \
462
+ for (i = 0; i < oprsz / 16; i++) { \
463
+ for (j = 0; j < ofs; j++) { \
464
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
465
+ } \
466
+ } \
467
+ *Vd = temp; \
468
}
469
470
FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
471
@@ -XXX,XX +XXX,XX @@ DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
472
void HELPER(vffintl_d_w)(void *vd, void *vj,
473
CPULoongArchState *env, uint32_t desc)
474
{
475
- int i;
476
- VReg temp;
477
+ int i, j, ofs;
478
+ VReg temp = {};
479
VReg *Vd = (VReg *)vd;
480
VReg *Vj = (VReg *)vj;
481
+ int oprsz = simd_oprsz(desc);
482
483
+ ofs = LSX_LEN / 64;
484
vec_clear_cause(env);
485
- for (i = 0; i < 2; i++) {
486
- temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
487
+ for (i = 0; i < oprsz / 16; i++) {
488
+ for (j = 0; j < ofs; j++) {
489
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
490
+ &env->fp_status);
491
+ }
492
vec_update_fcsr0(env, GETPC());
493
}
494
*Vd = temp;
495
@@ -XXX,XX +XXX,XX @@ void HELPER(vffintl_d_w)(void *vd, void *vj,
496
void HELPER(vffinth_d_w)(void *vd, void *vj,
497
CPULoongArchState *env, uint32_t desc)
498
{
499
- int i;
500
- VReg temp;
501
+ int i, j, ofs;
502
+ VReg temp = {};
503
VReg *Vd = (VReg *)vd;
504
VReg *Vj = (VReg *)vj;
505
+ int oprsz = simd_oprsz(desc);
506
507
+ ofs = LSX_LEN / 64;
508
vec_clear_cause(env);
509
- for (i = 0; i < 2; i++) {
510
- temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
511
+ for (i = 0; i < oprsz /16; i++) {
512
+ for (j = 0; j < ofs; j++) {
513
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)),
514
+ &env->fp_status);
515
+ }
516
vec_update_fcsr0(env, GETPC());
517
}
518
*Vd = temp;
519
@@ -XXX,XX +XXX,XX @@ void HELPER(vffinth_d_w)(void *vd, void *vj,
520
void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
521
CPULoongArchState *env, uint32_t desc)
522
{
523
- int i;
524
- VReg temp;
525
+ int i, j, ofs;
526
+ VReg temp = {};
527
VReg *Vd = (VReg *)vd;
528
VReg *Vj = (VReg *)vj;
529
VReg *Vk = (VReg *)vk;
530
+ int oprsz = simd_oprsz(desc);
531
532
+ ofs = LSX_LEN / 64;
533
vec_clear_cause(env);
534
- for (i = 0; i < 2; i++) {
535
- temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
536
- temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
537
+ for (i = 0; i < oprsz / 16; i++) {
538
+ for (j = 0; j < ofs; j++) {
539
+ temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i),
540
+ &env->fp_status);
541
+ temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i),
542
+ &env->fp_status);
543
+ }
544
vec_update_fcsr0(env, GETPC());
545
}
546
*Vd = temp;
547
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
548
index XXXXXXX..XXXXXXX 100644
549
--- a/target/loongarch/insn_trans/trans_vec.c.inc
550
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
551
@@ -XXX,XX +XXX,XX @@ TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
552
TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
553
TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
554
TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
555
+TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
556
+TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
557
+TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
558
+TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
559
+TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
560
+TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
561
562
TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
563
TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
564
@@ -XXX,XX +XXX,XX @@ TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
565
TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
566
TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
567
TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
568
+TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
569
+TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
570
+TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
571
+TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
572
+TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
573
+TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
574
+TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
575
+TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
576
+TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
577
+TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
578
579
TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
580
TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
581
@@ -XXX,XX +XXX,XX @@ TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
582
TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
583
TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
584
TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
585
+TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
586
+TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
587
+TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
588
+TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
589
+TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
590
+TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
591
+TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
592
+TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
593
+TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
594
+TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
595
+TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
596
+TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
597
+TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
598
+TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
599
+TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
600
+TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
601
+TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
602
+TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
603
+TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
604
+TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
605
+TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
606
+TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
607
+TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
608
+TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
609
+TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
610
+TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
611
+TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
612
+TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
613
+TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
614
615
TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
616
TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
617
@@ -XXX,XX +XXX,XX @@ TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
618
TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
619
TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
620
TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
621
+TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
622
+TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
623
+TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
624
+TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
625
+TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
626
+TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
627
+TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
628
629
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
630
{
631
--
632
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSEQ[I].{B/H/W/D};
3
- XVSLE[I].{B/H/W/D}[U];
4
- XVSLT[I].{B/H/W/D/}[U].
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-49-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 43 ++++
11
target/loongarch/disas.c | 43 ++++
12
target/loongarch/vec_helper.c | 23 +-
13
target/loongarch/insn_trans/trans_vec.c.inc | 257 ++++++++------------
14
4 files changed, 201 insertions(+), 165 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv
21
xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv
22
xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv
23
24
+xvseq_b 0111 01000000 00000 ..... ..... ..... @vvv
25
+xvseq_h 0111 01000000 00001 ..... ..... ..... @vvv
26
+xvseq_w 0111 01000000 00010 ..... ..... ..... @vvv
27
+xvseq_d 0111 01000000 00011 ..... ..... ..... @vvv
28
+xvseqi_b 0111 01101000 00000 ..... ..... ..... @vv_i5
29
+xvseqi_h 0111 01101000 00001 ..... ..... ..... @vv_i5
30
+xvseqi_w 0111 01101000 00010 ..... ..... ..... @vv_i5
31
+xvseqi_d 0111 01101000 00011 ..... ..... ..... @vv_i5
32
+
33
+xvsle_b 0111 01000000 00100 ..... ..... ..... @vvv
34
+xvsle_h 0111 01000000 00101 ..... ..... ..... @vvv
35
+xvsle_w 0111 01000000 00110 ..... ..... ..... @vvv
36
+xvsle_d 0111 01000000 00111 ..... ..... ..... @vvv
37
+xvslei_b 0111 01101000 00100 ..... ..... ..... @vv_i5
38
+xvslei_h 0111 01101000 00101 ..... ..... ..... @vv_i5
39
+xvslei_w 0111 01101000 00110 ..... ..... ..... @vv_i5
40
+xvslei_d 0111 01101000 00111 ..... ..... ..... @vv_i5
41
+xvsle_bu 0111 01000000 01000 ..... ..... ..... @vvv
42
+xvsle_hu 0111 01000000 01001 ..... ..... ..... @vvv
43
+xvsle_wu 0111 01000000 01010 ..... ..... ..... @vvv
44
+xvsle_du 0111 01000000 01011 ..... ..... ..... @vvv
45
+xvslei_bu 0111 01101000 01000 ..... ..... ..... @vv_ui5
46
+xvslei_hu 0111 01101000 01001 ..... ..... ..... @vv_ui5
47
+xvslei_wu 0111 01101000 01010 ..... ..... ..... @vv_ui5
48
+xvslei_du 0111 01101000 01011 ..... ..... ..... @vv_ui5
49
+
50
+xvslt_b 0111 01000000 01100 ..... ..... ..... @vvv
51
+xvslt_h 0111 01000000 01101 ..... ..... ..... @vvv
52
+xvslt_w 0111 01000000 01110 ..... ..... ..... @vvv
53
+xvslt_d 0111 01000000 01111 ..... ..... ..... @vvv
54
+xvslti_b 0111 01101000 01100 ..... ..... ..... @vv_i5
55
+xvslti_h 0111 01101000 01101 ..... ..... ..... @vv_i5
56
+xvslti_w 0111 01101000 01110 ..... ..... ..... @vv_i5
57
+xvslti_d 0111 01101000 01111 ..... ..... ..... @vv_i5
58
+xvslt_bu 0111 01000000 10000 ..... ..... ..... @vvv
59
+xvslt_hu 0111 01000000 10001 ..... ..... ..... @vvv
60
+xvslt_wu 0111 01000000 10010 ..... ..... ..... @vvv
61
+xvslt_du 0111 01000000 10011 ..... ..... ..... @vvv
62
+xvslti_bu 0111 01101000 10000 ..... ..... ..... @vv_ui5
63
+xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5
64
+xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5
65
+xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
66
+
67
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
68
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
69
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
70
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/loongarch/disas.c
73
+++ b/target/loongarch/disas.c
74
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvffintl_d_w, vv)
75
INSN_LASX(xvffinth_d_w, vv)
76
INSN_LASX(xvffint_s_l, vvv)
77
78
+INSN_LASX(xvseq_b, vvv)
79
+INSN_LASX(xvseq_h, vvv)
80
+INSN_LASX(xvseq_w, vvv)
81
+INSN_LASX(xvseq_d, vvv)
82
+INSN_LASX(xvseqi_b, vv_i)
83
+INSN_LASX(xvseqi_h, vv_i)
84
+INSN_LASX(xvseqi_w, vv_i)
85
+INSN_LASX(xvseqi_d, vv_i)
86
+
87
+INSN_LASX(xvsle_b, vvv)
88
+INSN_LASX(xvsle_h, vvv)
89
+INSN_LASX(xvsle_w, vvv)
90
+INSN_LASX(xvsle_d, vvv)
91
+INSN_LASX(xvslei_b, vv_i)
92
+INSN_LASX(xvslei_h, vv_i)
93
+INSN_LASX(xvslei_w, vv_i)
94
+INSN_LASX(xvslei_d, vv_i)
95
+INSN_LASX(xvsle_bu, vvv)
96
+INSN_LASX(xvsle_hu, vvv)
97
+INSN_LASX(xvsle_wu, vvv)
98
+INSN_LASX(xvsle_du, vvv)
99
+INSN_LASX(xvslei_bu, vv_i)
100
+INSN_LASX(xvslei_hu, vv_i)
101
+INSN_LASX(xvslei_wu, vv_i)
102
+INSN_LASX(xvslei_du, vv_i)
103
+
104
+INSN_LASX(xvslt_b, vvv)
105
+INSN_LASX(xvslt_h, vvv)
106
+INSN_LASX(xvslt_w, vvv)
107
+INSN_LASX(xvslt_d, vvv)
108
+INSN_LASX(xvslti_b, vv_i)
109
+INSN_LASX(xvslti_h, vv_i)
110
+INSN_LASX(xvslti_w, vv_i)
111
+INSN_LASX(xvslti_d, vv_i)
112
+INSN_LASX(xvslt_bu, vvv)
113
+INSN_LASX(xvslt_hu, vvv)
114
+INSN_LASX(xvslt_wu, vvv)
115
+INSN_LASX(xvslt_du, vvv)
116
+INSN_LASX(xvslti_bu, vv_i)
117
+INSN_LASX(xvslti_hu, vv_i)
118
+INSN_LASX(xvslti_wu, vv_i)
119
+INSN_LASX(xvslti_du, vv_i)
120
+
121
INSN_LASX(xvreplgr2vr_b, vr)
122
INSN_LASX(xvreplgr2vr_h, vr)
123
INSN_LASX(xvreplgr2vr_w, vr)
124
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/loongarch/vec_helper.c
127
+++ b/target/loongarch/vec_helper.c
128
@@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
129
#define VSLE(a, b) (a <= b ? -1 : 0)
130
#define VSLT(a, b) (a < b ? -1 : 0)
131
132
-#define VCMPI(NAME, BIT, E, DO_OP) \
133
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
134
-{ \
135
- int i; \
136
- VReg *Vd = (VReg *)vd; \
137
- VReg *Vj = (VReg *)vj; \
138
- typedef __typeof(Vd->E(0)) TD; \
139
- \
140
- for (i = 0; i < LSX_LEN/BIT; i++) { \
141
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
142
- } \
143
+#define VCMPI(NAME, BIT, E, DO_OP) \
144
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
145
+{ \
146
+ int i; \
147
+ VReg *Vd = (VReg *)vd; \
148
+ VReg *Vj = (VReg *)vj; \
149
+ typedef __typeof(Vd->E(0)) TD; \
150
+ int oprsz = simd_oprsz(desc); \
151
+ \
152
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
153
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
154
+ } \
155
}
156
157
VCMPI(vseqi_b, 8, B, VSEQ)
158
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/loongarch/insn_trans/trans_vec.c.inc
161
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
162
@@ -XXX,XX +XXX,XX @@ TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
163
TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
164
TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
165
166
-static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
167
+static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
168
+ uint32_t oprsz, MemOp mop, TCGCond cond)
169
{
170
uint32_t vd_ofs, vj_ofs, vk_ofs;
171
172
- if (!check_vec(ctx, 16)) {
173
+ if (!check_vec(ctx, oprsz)) {
174
return true;
175
}
176
177
@@ -XXX,XX +XXX,XX @@ static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
178
vj_ofs = vec_full_offset(a->vj);
179
vk_ofs = vec_full_offset(a->vk);
180
181
- tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
182
+ tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
183
return true;
184
}
185
186
-static void do_cmpi_vec(TCGCond cond,
187
- unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
188
-{
189
- tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm));
190
-}
191
-
192
-static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
193
-{
194
- do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm);
195
-}
196
-
197
-static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
198
-{
199
- do_cmpi_vec(TCG_COND_LE, vece, t, a, imm);
200
-}
201
-
202
-static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
203
-{
204
- do_cmpi_vec(TCG_COND_LT, vece, t, a, imm);
205
-}
206
-
207
-static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
208
-{
209
- do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm);
210
-}
211
-
212
-static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
213
-{
214
- do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
215
-}
216
-
217
-#define DO_CMPI_S(NAME) \
218
-static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
219
-{ \
220
- uint32_t vd_ofs, vj_ofs; \
221
- \
222
- if (!check_vec(ctx, 16)) { \
223
- return true; \
224
- } \
225
- \
226
- static const TCGOpcode vecop_list[] = { \
227
- INDEX_op_cmp_vec, 0 \
228
- }; \
229
- static const GVecGen2i op[4] = { \
230
- { \
231
- .fniv = gen_## NAME ##_s_vec, \
232
- .fnoi = gen_helper_## NAME ##_b, \
233
- .opt_opc = vecop_list, \
234
- .vece = MO_8 \
235
- }, \
236
- { \
237
- .fniv = gen_## NAME ##_s_vec, \
238
- .fnoi = gen_helper_## NAME ##_h, \
239
- .opt_opc = vecop_list, \
240
- .vece = MO_16 \
241
- }, \
242
- { \
243
- .fniv = gen_## NAME ##_s_vec, \
244
- .fnoi = gen_helper_## NAME ##_w, \
245
- .opt_opc = vecop_list, \
246
- .vece = MO_32 \
247
- }, \
248
- { \
249
- .fniv = gen_## NAME ##_s_vec, \
250
- .fnoi = gen_helper_## NAME ##_d, \
251
- .opt_opc = vecop_list, \
252
- .vece = MO_64 \
253
- } \
254
- }; \
255
- \
256
- vd_ofs = vec_full_offset(a->vd); \
257
- vj_ofs = vec_full_offset(a->vj); \
258
- \
259
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
260
- \
261
- return true; \
262
-}
263
-
264
-DO_CMPI_S(vseqi)
265
-DO_CMPI_S(vslei)
266
-DO_CMPI_S(vslti)
267
-
268
-#define DO_CMPI_U(NAME) \
269
-static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
270
-{ \
271
- uint32_t vd_ofs, vj_ofs; \
272
- \
273
- if (!check_vec(ctx, 16)) { \
274
- return true; \
275
- } \
276
- \
277
- static const TCGOpcode vecop_list[] = { \
278
- INDEX_op_cmp_vec, 0 \
279
- }; \
280
- static const GVecGen2i op[4] = { \
281
- { \
282
- .fniv = gen_## NAME ##_u_vec, \
283
- .fnoi = gen_helper_## NAME ##_bu, \
284
- .opt_opc = vecop_list, \
285
- .vece = MO_8 \
286
- }, \
287
- { \
288
- .fniv = gen_## NAME ##_u_vec, \
289
- .fnoi = gen_helper_## NAME ##_hu, \
290
- .opt_opc = vecop_list, \
291
- .vece = MO_16 \
292
- }, \
293
- { \
294
- .fniv = gen_## NAME ##_u_vec, \
295
- .fnoi = gen_helper_## NAME ##_wu, \
296
- .opt_opc = vecop_list, \
297
- .vece = MO_32 \
298
- }, \
299
- { \
300
- .fniv = gen_## NAME ##_u_vec, \
301
- .fnoi = gen_helper_## NAME ##_du, \
302
- .opt_opc = vecop_list, \
303
- .vece = MO_64 \
304
- } \
305
- }; \
306
- \
307
- vd_ofs = vec_full_offset(a->vd); \
308
- vj_ofs = vec_full_offset(a->vj); \
309
- \
310
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
311
- \
312
- return true; \
313
-}
314
-
315
-DO_CMPI_U(vslei)
316
-DO_CMPI_U(vslti)
317
+static bool do_cmp(DisasContext *ctx, arg_vvv *a,
318
+ MemOp mop, TCGCond cond)
319
+{
320
+ return do_cmp_vl(ctx, a, 16, mop, cond);
321
+}
322
+
323
+static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
324
+ MemOp mop, TCGCond cond)
325
+{
326
+ return do_cmp_vl(ctx, a, 32, mop, cond);
327
+}
328
+
329
+static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
330
+ uint32_t oprsz, MemOp mop, TCGCond cond)
331
+{
332
+ uint32_t vd_ofs, vj_ofs;
333
+
334
+ if (!check_vec(ctx, oprsz)) {
335
+ return true;
336
+ }
337
+
338
+ vd_ofs = vec_full_offset(a->vd);
339
+ vj_ofs = vec_full_offset(a->vj);
340
+
341
+ tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
342
+ return true;
343
+}
344
+
345
+static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
346
+ MemOp mop, TCGCond cond)
347
+{
348
+ return do_cmpi_vl(ctx, a, 16, mop, cond);
349
+}
350
+
351
+static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
352
+ MemOp mop, TCGCond cond)
353
+{
354
+ return do_cmpi_vl(ctx, a, 32, mop, cond);
355
+}
356
357
TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
358
TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
359
TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
360
TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
361
-TRANS(vseqi_b, LSX, do_vseqi_s, MO_8)
362
-TRANS(vseqi_h, LSX, do_vseqi_s, MO_16)
363
-TRANS(vseqi_w, LSX, do_vseqi_s, MO_32)
364
-TRANS(vseqi_d, LSX, do_vseqi_s, MO_64)
365
+TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
366
+TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
367
+TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
368
+TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
369
+TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
370
+TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
371
+TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
372
+TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
373
+TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
374
+TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
375
+TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
376
+TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
377
378
TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
379
TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
380
TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
381
TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
382
-TRANS(vslei_b, LSX, do_vslei_s, MO_8)
383
-TRANS(vslei_h, LSX, do_vslei_s, MO_16)
384
-TRANS(vslei_w, LSX, do_vslei_s, MO_32)
385
-TRANS(vslei_d, LSX, do_vslei_s, MO_64)
386
+TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
387
+TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
388
+TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
389
+TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
390
TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
391
TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
392
TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
393
TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
394
-TRANS(vslei_bu, LSX, do_vslei_u, MO_8)
395
-TRANS(vslei_hu, LSX, do_vslei_u, MO_16)
396
-TRANS(vslei_wu, LSX, do_vslei_u, MO_32)
397
-TRANS(vslei_du, LSX, do_vslei_u, MO_64)
398
+TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
399
+TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
400
+TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
401
+TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
402
+TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
403
+TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
404
+TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
405
+TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
406
+TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
407
+TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
408
+TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
409
+TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
410
+TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
411
+TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
412
+TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
413
+TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
414
+TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
415
+TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
416
+TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
417
+TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
418
419
TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
420
TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
421
TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
422
TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
423
-TRANS(vslti_b, LSX, do_vslti_s, MO_8)
424
-TRANS(vslti_h, LSX, do_vslti_s, MO_16)
425
-TRANS(vslti_w, LSX, do_vslti_s, MO_32)
426
-TRANS(vslti_d, LSX, do_vslti_s, MO_64)
427
+TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
428
+TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
429
+TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
430
+TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
431
TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
432
TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
433
TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
434
TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
435
-TRANS(vslti_bu, LSX, do_vslti_u, MO_8)
436
-TRANS(vslti_hu, LSX, do_vslti_u, MO_16)
437
-TRANS(vslti_wu, LSX, do_vslti_u, MO_32)
438
-TRANS(vslti_du, LSX, do_vslti_u, MO_64)
439
+TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
440
+TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
441
+TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
442
+TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
443
+TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
444
+TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
445
+TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
446
+TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
447
+TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
448
+TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
449
+TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
450
+TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
451
+TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
452
+TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
453
+TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
454
+TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
455
+TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
456
+TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
457
+TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
458
+TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
459
460
static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
461
{
462
--
463
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVFCMP.cond.{S/D}.
1
3
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20230914022645.1151356-50-gaosong@loongson.cn>
7
---
8
target/loongarch/helper.h | 8 +-
9
target/loongarch/insns.decode | 3 +
10
target/loongarch/disas.c | 93 +++++++++++++++++++++
11
target/loongarch/vec_helper.c | 4 +-
12
target/loongarch/insn_trans/trans_vec.c.inc | 31 ++++---
13
5 files changed, 117 insertions(+), 22 deletions(-)
14
15
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/helper.h
18
+++ b/target/loongarch/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
20
DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
21
DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
22
23
-DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
24
-DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
25
-DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
26
-DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
27
+DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32)
28
+DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32)
29
+DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32)
30
+DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32)
31
32
DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
33
34
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/loongarch/insns.decode
37
+++ b/target/loongarch/insns.decode
38
@@ -XXX,XX +XXX,XX @@ xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5
39
xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5
40
xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
41
42
+xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond
43
+xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond
44
+
45
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
46
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
47
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
48
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/loongarch/disas.c
51
+++ b/target/loongarch/disas.c
52
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvslti_hu, vv_i)
53
INSN_LASX(xvslti_wu, vv_i)
54
INSN_LASX(xvslti_du, vv_i)
55
56
+#define output_xvfcmp(C, PREFIX, SUFFIX) \
57
+{ \
58
+ (C)->info->fprintf_func((C)->info->stream, "%08x %s%s\tx%d, x%d, x%d", \
59
+ (C)->insn, PREFIX, SUFFIX, a->vd, \
60
+ a->vj, a->vk); \
61
+}
62
+static bool output_xxx_fcond(DisasContext *ctx, arg_vvv_fcond * a,
63
+ const char *suffix)
64
+{
65
+ bool ret = true;
66
+ switch (a->fcond) {
67
+ case 0x0:
68
+ output_xvfcmp(ctx, "xvfcmp_caf_", suffix);
69
+ break;
70
+ case 0x1:
71
+ output_xvfcmp(ctx, "xvfcmp_saf_", suffix);
72
+ break;
73
+ case 0x2:
74
+ output_xvfcmp(ctx, "xvfcmp_clt_", suffix);
75
+ break;
76
+ case 0x3:
77
+ output_xvfcmp(ctx, "xvfcmp_slt_", suffix);
78
+ break;
79
+ case 0x4:
80
+ output_xvfcmp(ctx, "xvfcmp_ceq_", suffix);
81
+ break;
82
+ case 0x5:
83
+ output_xvfcmp(ctx, "xvfcmp_seq_", suffix);
84
+ break;
85
+ case 0x6:
86
+ output_xvfcmp(ctx, "xvfcmp_cle_", suffix);
87
+ break;
88
+ case 0x7:
89
+ output_xvfcmp(ctx, "xvfcmp_sle_", suffix);
90
+ break;
91
+ case 0x8:
92
+ output_xvfcmp(ctx, "xvfcmp_cun_", suffix);
93
+ break;
94
+ case 0x9:
95
+ output_xvfcmp(ctx, "xvfcmp_sun_", suffix);
96
+ break;
97
+ case 0xA:
98
+ output_xvfcmp(ctx, "xvfcmp_cult_", suffix);
99
+ break;
100
+ case 0xB:
101
+ output_xvfcmp(ctx, "xvfcmp_sult_", suffix);
102
+ break;
103
+ case 0xC:
104
+ output_xvfcmp(ctx, "xvfcmp_cueq_", suffix);
105
+ break;
106
+ case 0xD:
107
+ output_xvfcmp(ctx, "xvfcmp_sueq_", suffix);
108
+ break;
109
+ case 0xE:
110
+ output_xvfcmp(ctx, "xvfcmp_cule_", suffix);
111
+ break;
112
+ case 0xF:
113
+ output_xvfcmp(ctx, "xvfcmp_sule_", suffix);
114
+ break;
115
+ case 0x10:
116
+ output_xvfcmp(ctx, "xvfcmp_cne_", suffix);
117
+ break;
118
+ case 0x11:
119
+ output_xvfcmp(ctx, "xvfcmp_sne_", suffix);
120
+ break;
121
+ case 0x14:
122
+ output_xvfcmp(ctx, "xvfcmp_cor_", suffix);
123
+ break;
124
+ case 0x15:
125
+ output_xvfcmp(ctx, "xvfcmp_sor_", suffix);
126
+ break;
127
+ case 0x18:
128
+ output_xvfcmp(ctx, "xvfcmp_cune_", suffix);
129
+ break;
130
+ case 0x19:
131
+ output_xvfcmp(ctx, "xvfcmp_sune_", suffix);
132
+ break;
133
+ default:
134
+ ret = false;
135
+ }
136
+ return ret;
137
+}
138
+
139
+#define LASX_FCMP_INSN(suffix) \
140
+static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
141
+ arg_vvv_fcond * a) \
142
+{ \
143
+ return output_xxx_fcond(ctx, a, #suffix); \
144
+}
145
+
146
+LASX_FCMP_INSN(s)
147
+LASX_FCMP_INSN(d)
148
+
149
INSN_LASX(xvreplgr2vr_b, vr)
150
INSN_LASX(xvreplgr2vr_h, vr)
151
INSN_LASX(xvreplgr2vr_w, vr)
152
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/target/loongarch/vec_helper.c
155
+++ b/target/loongarch/vec_helper.c
156
@@ -XXX,XX +XXX,XX @@ static uint64_t vfcmp_common(CPULoongArchState *env,
157
}
158
159
#define VFCMP(NAME, BIT, E, FN) \
160
-void HELPER(NAME)(CPULoongArchState *env, \
161
+void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
162
uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
163
{ \
164
int i; \
165
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
166
VReg *Vk = &(env->fpr[vk].vreg); \
167
\
168
vec_clear_cause(env); \
169
- for (i = 0; i < LSX_LEN/BIT ; i++) { \
170
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
171
FloatRelation cmp; \
172
cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
173
t.E(i) = vfcmp_common(env, cmp, flags); \
174
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
175
index XXXXXXX..XXXXXXX 100644
176
--- a/target/loongarch/insn_trans/trans_vec.c.inc
177
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
178
@@ -XXX,XX +XXX,XX @@ TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
179
TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
180
TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
181
182
-static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
183
+static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
184
{
185
uint32_t flags;
186
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
187
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
188
TCGv_i32 vd = tcg_constant_i32(a->vd);
189
TCGv_i32 vj = tcg_constant_i32(a->vj);
190
TCGv_i32 vk = tcg_constant_i32(a->vk);
191
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
192
193
- if (!avail_LSX(ctx)) {
194
- return false;
195
- }
196
-
197
- if (!check_vec(ctx, 16)) {
198
+ if (!check_vec(ctx, sz)) {
199
return true;
200
}
201
202
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
203
flags = get_fcmp_flags(a->fcond >> 1);
204
- fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
205
+ fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
206
207
return true;
208
}
209
210
-static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
211
+static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
212
{
213
uint32_t flags;
214
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
215
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
216
TCGv_i32 vd = tcg_constant_i32(a->vd);
217
TCGv_i32 vj = tcg_constant_i32(a->vj);
218
TCGv_i32 vk = tcg_constant_i32(a->vk);
219
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
220
221
- if (!avail_LSX(ctx)) {
222
- return false;
223
- }
224
-
225
- if (!check_vec(ctx, 16)) {
226
+ if (!check_vec(ctx, sz)) {
227
return true;
228
}
229
230
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
231
flags = get_fcmp_flags(a->fcond >> 1);
232
- fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
233
+ fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
234
235
return true;
236
}
237
238
+TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
239
+TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
240
+TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
241
+TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
242
+
243
static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
244
{
245
if (!avail_LSX(ctx)) {
246
--
247
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVBITSEL.V;
3
- XVBITSELI.B;
4
- XVSET{EQZ/NEZ}.V;
5
- XVSETANYEQZ.{B/H/W/D};
6
- XVSETALLNEZ.{B/H/W/D}.
1
7
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230914022645.1151356-51-gaosong@loongson.cn>
11
---
12
target/loongarch/helper.h | 16 ++--
13
target/loongarch/insns.decode | 15 ++++
14
target/loongarch/disas.c | 19 ++++
15
target/loongarch/vec_helper.c | 42 +++++----
16
target/loongarch/insn_trans/trans_vec.c.inc | 99 ++++++++++++++++-----
17
5 files changed, 147 insertions(+), 44 deletions(-)
18
19
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/loongarch/helper.h
22
+++ b/target/loongarch/helper.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32)
24
25
DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
26
27
-DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32)
28
-DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32)
29
-DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32)
30
-DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32)
31
-DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
32
-DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
33
-DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
34
-DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
35
+DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32)
36
+DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32)
37
+DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32)
38
+DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32)
39
+DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32)
40
+DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32)
41
+DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32)
42
+DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32)
43
44
DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
45
DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/loongarch/insns.decode
49
+++ b/target/loongarch/insns.decode
50
@@ -XXX,XX +XXX,XX @@ xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
51
xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond
52
xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond
53
54
+xvbitsel_v 0000 11010010 ..... ..... ..... ..... @vvvv
55
+
56
+xvbitseli_b 0111 01111100 01 ........ ..... ..... @vv_ui8
57
+
58
+xvseteqz_v 0111 01101001 11001 00110 ..... 00 ... @cv
59
+xvsetnez_v 0111 01101001 11001 00111 ..... 00 ... @cv
60
+xvsetanyeqz_b 0111 01101001 11001 01000 ..... 00 ... @cv
61
+xvsetanyeqz_h 0111 01101001 11001 01001 ..... 00 ... @cv
62
+xvsetanyeqz_w 0111 01101001 11001 01010 ..... 00 ... @cv
63
+xvsetanyeqz_d 0111 01101001 11001 01011 ..... 00 ... @cv
64
+xvsetallnez_b 0111 01101001 11001 01100 ..... 00 ... @cv
65
+xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv
66
+xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv
67
+xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv
68
+
69
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
70
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
71
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
72
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/loongarch/disas.c
75
+++ b/target/loongarch/disas.c
76
@@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
77
return true; \
78
}
79
80
+static void output_cv_x(DisasContext *ctx, arg_cv *a, const char *mnemonic)
81
+{
82
+ output(ctx, mnemonic, "fcc%d, x%d", a->cd, a->vj);
83
+}
84
+
85
static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
86
{
87
output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
88
@@ -XXX,XX +XXX,XX @@ static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
89
LASX_FCMP_INSN(s)
90
LASX_FCMP_INSN(d)
91
92
+INSN_LASX(xvbitsel_v, vvvv)
93
+INSN_LASX(xvbitseli_b, vv_i)
94
+
95
+INSN_LASX(xvseteqz_v, cv)
96
+INSN_LASX(xvsetnez_v, cv)
97
+INSN_LASX(xvsetanyeqz_b, cv)
98
+INSN_LASX(xvsetanyeqz_h, cv)
99
+INSN_LASX(xvsetanyeqz_w, cv)
100
+INSN_LASX(xvsetanyeqz_d, cv)
101
+INSN_LASX(xvsetallnez_b, cv)
102
+INSN_LASX(xvsetallnez_h, cv)
103
+INSN_LASX(xvsetallnez_w, cv)
104
+INSN_LASX(xvsetallnez_d, cv)
105
+
106
INSN_LASX(xvreplgr2vr_b, vr)
107
INSN_LASX(xvreplgr2vr_h, vr)
108
INSN_LASX(xvreplgr2vr_w, vr)
109
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/target/loongarch/vec_helper.c
112
+++ b/target/loongarch/vec_helper.c
113
@@ -XXX,XX +XXX,XX @@ VFCMP(vfcmp_s_s, 32, UW, float32_compare)
114
VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
115
VFCMP(vfcmp_s_d, 64, UD, float64_compare)
116
117
-void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
118
+void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
119
{
120
int i;
121
VReg *Vd = (VReg *)vd;
122
VReg *Vj = (VReg *)vj;
123
124
- for (i = 0; i < 16; i++) {
125
+ for (i = 0; i < simd_oprsz(desc); i++) {
126
Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
127
}
128
}
129
@@ -XXX,XX +XXX,XX @@ void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
130
/* Copy from target/arm/tcg/sve_helper.c */
131
static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
132
{
133
- uint64_t bits = 8 << esz;
134
+ int bits = 8 << esz;
135
uint64_t ones = dup_const(esz, 1);
136
uint64_t signs = ones << (bits - 1);
137
uint64_t cmp0, cmp1;
138
@@ -XXX,XX +XXX,XX @@ static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
139
return (cmp0 | cmp1) & signs;
140
}
141
142
-#define SETANYEQZ(NAME, MO) \
143
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
144
-{ \
145
- VReg *Vj = &(env->fpr[vj].vreg); \
146
- \
147
- env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
148
+#define SETANYEQZ(NAME, MO) \
149
+void HELPER(NAME)(CPULoongArchState *env, \
150
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
151
+{ \
152
+ VReg *Vj = &(env->fpr[vj].vreg); \
153
+ \
154
+ env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
155
+ if (oprsz == 32) { \
156
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] || \
157
+ do_match2(0, Vj->D(2), Vj->D(3), MO); \
158
+ } \
159
}
160
+
161
SETANYEQZ(vsetanyeqz_b, MO_8)
162
SETANYEQZ(vsetanyeqz_h, MO_16)
163
SETANYEQZ(vsetanyeqz_w, MO_32)
164
SETANYEQZ(vsetanyeqz_d, MO_64)
165
166
-#define SETALLNEZ(NAME, MO) \
167
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
168
-{ \
169
- VReg *Vj = &(env->fpr[vj].vreg); \
170
- \
171
- env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
172
+#define SETALLNEZ(NAME, MO) \
173
+void HELPER(NAME)(CPULoongArchState *env, \
174
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
175
+{ \
176
+ VReg *Vj = &(env->fpr[vj].vreg); \
177
+ \
178
+ env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
179
+ if (oprsz == 32) { \
180
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] && \
181
+ !do_match2(0, Vj->D(2), Vj->D(3), MO); \
182
+ } \
183
}
184
+
185
SETALLNEZ(vsetallnez_b, MO_8)
186
SETALLNEZ(vsetallnez_h, MO_16)
187
SETALLNEZ(vsetallnez_w, MO_32)
188
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
189
index XXXXXXX..XXXXXXX 100644
190
--- a/target/loongarch/insn_trans/trans_vec.c.inc
191
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
192
@@ -XXX,XX +XXX,XX @@ static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
193
return gen_vv_i_vl(ctx, a, 32, fn);
194
}
195
196
-static bool gen_cv(DisasContext *ctx, arg_cv *a,
197
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
198
+static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
199
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
200
{
201
- TCGv_i32 vj = tcg_constant_i32(a->vj);
202
- TCGv_i32 cd = tcg_constant_i32(a->cd);
203
-
204
- if (!check_vec(ctx, 16)) {
205
+ if (!check_vec(ctx, sz)) {
206
return true;
207
}
208
209
- func(cpu_env, cd, vj);
210
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
211
+ TCGv_i32 cd = tcg_constant_i32(a->cd);
212
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
213
+
214
+ func(cpu_env, oprsz, cd, vj);
215
return true;
216
}
217
218
+static bool gen_cv(DisasContext *ctx, arg_cv *a,
219
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
220
+{
221
+ return gen_cv_vl(ctx, a, 16, func);
222
+}
223
+
224
+static bool gen_cx(DisasContext *ctx, arg_cv *a,
225
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
226
+{
227
+ return gen_cv_vl(ctx, a, 32, func);
228
+}
229
+
230
static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
231
uint32_t oprsz, MemOp mop,
232
void (*func)(unsigned, uint32_t, uint32_t,
233
@@ -XXX,XX +XXX,XX @@ TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
234
TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
235
TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
236
237
-static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
238
+static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
239
{
240
- if (!avail_LSX(ctx)) {
241
- return false;
242
- }
243
-
244
- if (!check_vec(ctx, 16)) {
245
+ if (!check_vec(ctx, oprsz)) {
246
return true;
247
}
248
249
tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
250
vec_full_offset(a->vk), vec_full_offset(a->vj),
251
- 16, ctx->vl/8);
252
+ oprsz, ctx->vl / 8);
253
return true;
254
}
255
256
+TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
257
+TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
258
+
259
static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
260
{
261
tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
262
}
263
264
-static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
265
+static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
266
{
267
static const GVecGen2i op = {
268
.fniv = gen_vbitseli,
269
@@ -XXX,XX +XXX,XX @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
270
.load_dest = true
271
};
272
273
- if (!avail_LSX(ctx)) {
274
- return false;
275
- }
276
-
277
- if (!check_vec(ctx, 16)) {
278
+ if (!check_vec(ctx, oprsz)) {
279
return true;
280
}
281
282
tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
283
- 16, ctx->vl/8, a->imm, &op);
284
+ oprsz, ctx->vl / 8, a->imm , &op);
285
return true;
286
}
287
288
+TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
289
+TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
290
+
291
#define VSET(NAME, COND) \
292
static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
293
{ \
294
@@ -XXX,XX +XXX,XX @@ TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
295
TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
296
TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
297
298
+#define XVSET(NAME, COND) \
299
+static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \
300
+{ \
301
+ TCGv_i64 t1, t2, d[4]; \
302
+ \
303
+ d[0] = tcg_temp_new_i64(); \
304
+ d[1] = tcg_temp_new_i64(); \
305
+ d[2] = tcg_temp_new_i64(); \
306
+ d[3] = tcg_temp_new_i64(); \
307
+ t1 = tcg_temp_new_i64(); \
308
+ t2 = tcg_temp_new_i64(); \
309
+ \
310
+ get_vreg64(d[0], a->vj, 0); \
311
+ get_vreg64(d[1], a->vj, 1); \
312
+ get_vreg64(d[2], a->vj, 2); \
313
+ get_vreg64(d[3], a->vj, 3); \
314
+ \
315
+ if (!avail_LASX(ctx)) { \
316
+ return false; \
317
+ } \
318
+ \
319
+ if (!check_vec(ctx, 32)) { \
320
+ return true; \
321
+ } \
322
+ \
323
+ tcg_gen_or_i64(t1, d[0], d[1]); \
324
+ tcg_gen_or_i64(t2, d[2], d[3]); \
325
+ tcg_gen_or_i64(t1, t2, t1); \
326
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
327
+ tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
328
+ \
329
+ return true; \
330
+}
331
+
332
+XVSET(xvseteqz_v, TCG_COND_EQ)
333
+XVSET(xvsetnez_v, TCG_COND_NE)
334
+
335
+TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
336
+TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
337
+TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
338
+TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
339
+TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
340
+TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
341
+TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
342
+TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
343
+
344
static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
345
{
346
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
347
--
348
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVINSGR2VR.{W/D};
3
- XVPICKVE2GR.{W/D}[U].
1
4
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-Id: <20230914022645.1151356-52-gaosong@loongson.cn>
8
---
9
target/loongarch/insns.decode | 7 +
10
target/loongarch/disas.c | 17 ++
11
target/loongarch/translate.c | 12 ++
12
target/loongarch/insn_trans/trans_vec.c.inc | 208 ++++----------------
13
4 files changed, 74 insertions(+), 170 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv
20
xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv
21
xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv
22
23
+xvinsgr2vr_w 0111 01101110 10111 10 ... ..... ..... @vr_ui3
24
+xvinsgr2vr_d 0111 01101110 10111 110 .. ..... ..... @vr_ui2
25
+xvpickve2gr_w 0111 01101110 11111 10 ... ..... ..... @rv_ui3
26
+xvpickve2gr_d 0111 01101110 11111 110 .. ..... ..... @rv_ui2
27
+xvpickve2gr_wu 0111 01101111 00111 10 ... ..... ..... @rv_ui3
28
+xvpickve2gr_du 0111 01101111 00111 110 .. ..... ..... @rv_ui2
29
+
30
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
31
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
32
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
33
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/loongarch/disas.c
36
+++ b/target/loongarch/disas.c
37
@@ -XXX,XX +XXX,XX @@ static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic)
38
output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj);
39
}
40
41
+static void output_vr_i_x(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
42
+{
43
+ output(ctx, mnemonic, "x%d, r%d, 0x%x", a->vd, a->rj, a->imm);
44
+}
45
+
46
+static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
47
+{
48
+ output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm);
49
+}
50
+
51
INSN_LASX(xvadd_b, vvv)
52
INSN_LASX(xvadd_h, vvv)
53
INSN_LASX(xvadd_w, vvv)
54
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsetallnez_h, cv)
55
INSN_LASX(xvsetallnez_w, cv)
56
INSN_LASX(xvsetallnez_d, cv)
57
58
+INSN_LASX(xvinsgr2vr_w, vr_i)
59
+INSN_LASX(xvinsgr2vr_d, vr_i)
60
+INSN_LASX(xvpickve2gr_w, rv_i)
61
+INSN_LASX(xvpickve2gr_d, rv_i)
62
+INSN_LASX(xvpickve2gr_wu, rv_i)
63
+INSN_LASX(xvpickve2gr_du, rv_i)
64
+
65
INSN_LASX(xvreplgr2vr_b, vr)
66
INSN_LASX(xvreplgr2vr_h, vr)
67
INSN_LASX(xvreplgr2vr_w, vr)
68
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/translate.c
71
+++ b/target/loongarch/translate.c
72
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_offset(int regno)
73
return offsetof(CPULoongArchState, fpr[regno]);
74
}
75
76
+static inline int vec_reg_offset(int regno, int index, MemOp mop)
77
+{
78
+ const uint8_t size = 1 << mop;
79
+ int offs = index * size;
80
+
81
+ if (HOST_BIG_ENDIAN && size < 8 ) {
82
+ offs ^= (8 - size);
83
+ }
84
+
85
+ return offs + vec_full_offset(regno);
86
+}
87
+
88
static inline void get_vreg64(TCGv_i64 dest, int regno, int index)
89
{
90
tcg_gen_ld_i64(dest, cpu_env,
91
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/loongarch/insn_trans/trans_vec.c.inc
94
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
95
@@ -XXX,XX +XXX,XX @@ TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
96
TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
97
TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
98
99
-static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
100
+static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
101
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
102
{
103
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
104
105
- if (!avail_LSX(ctx)) {
106
- return false;
107
- }
108
-
109
- if (!check_vec(ctx, 16)) {
110
- return true;
111
- }
112
-
113
- tcg_gen_st8_i64(src, cpu_env,
114
- offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
115
- return true;
116
-}
117
-
118
-static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
119
-{
120
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
121
-
122
- if (!avail_LSX(ctx)) {
123
- return false;
124
- }
125
-
126
- if (!check_vec(ctx, 16)) {
127
- return true;
128
- }
129
-
130
- tcg_gen_st16_i64(src, cpu_env,
131
- offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
132
- return true;
133
-}
134
-
135
-static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
136
-{
137
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
138
-
139
- if (!avail_LSX(ctx)) {
140
- return false;
141
- }
142
-
143
- if (!check_vec(ctx, 16)) {
144
+ if (!check_vec(ctx, oprsz)) {
145
return true;
146
}
147
148
- tcg_gen_st32_i64(src, cpu_env,
149
- offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
150
- return true;
151
-}
152
-
153
-static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
154
-{
155
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
156
-
157
- if (!avail_LSX(ctx)) {
158
- return false;
159
- }
160
+ func(src, cpu_env, vec_reg_offset(a->vd, a->imm, mop));
161
162
- if (!check_vec(ctx, 16)) {
163
- return true;
164
- }
165
-
166
- tcg_gen_st_i64(src, cpu_env,
167
- offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
168
return true;
169
}
170
171
-static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
172
+static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
173
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
174
{
175
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
176
-
177
- if (!avail_LSX(ctx)) {
178
- return false;
179
- }
180
-
181
- if (!check_vec(ctx, 16)) {
182
- return true;
183
- }
184
-
185
- tcg_gen_ld8s_i64(dst, cpu_env,
186
- offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
187
- return true;
188
+ return gen_g2v_vl(ctx, a, 16, mop, func);
189
}
190
191
-static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
192
+static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
193
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
194
{
195
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
196
-
197
- if (!avail_LSX(ctx)) {
198
- return false;
199
- }
200
-
201
- if (!check_vec(ctx, 16)) {
202
- return true;
203
- }
204
-
205
- tcg_gen_ld16s_i64(dst, cpu_env,
206
- offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
207
- return true;
208
+ return gen_g2v_vl(ctx, a, 32, mop, func);
209
}
210
211
-static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
212
-{
213
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
214
+TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
215
+TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
216
+TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
217
+TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
218
+TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
219
+TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
220
221
- if (!avail_LSX(ctx)) {
222
- return false;
223
- }
224
-
225
- if (!check_vec(ctx, 16)) {
226
- return true;
227
- }
228
-
229
- tcg_gen_ld32s_i64(dst, cpu_env,
230
- offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
231
- return true;
232
-}
233
-
234
-static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
235
+static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
236
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
237
{
238
TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
239
240
- if (!avail_LSX(ctx)) {
241
- return false;
242
- }
243
-
244
- if (!check_vec(ctx, 16)) {
245
+ if (!check_vec(ctx, oprsz)) {
246
return true;
247
}
248
249
- tcg_gen_ld_i64(dst, cpu_env,
250
- offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
251
- return true;
252
-}
253
+ func(dst, cpu_env, vec_reg_offset(a->vj, a->imm, mop));
254
255
-static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
256
-{
257
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
258
-
259
- if (!avail_LSX(ctx)) {
260
- return false;
261
- }
262
-
263
- if (!check_vec(ctx, 16)) {
264
- return true;
265
- }
266
-
267
- tcg_gen_ld8u_i64(dst, cpu_env,
268
- offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
269
return true;
270
}
271
272
-static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
273
+static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
274
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
275
{
276
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
277
-
278
- if (!avail_LSX(ctx)) {
279
- return false;
280
- }
281
-
282
- if (!check_vec(ctx, 16)) {
283
- return true;
284
- }
285
-
286
- tcg_gen_ld16u_i64(dst, cpu_env,
287
- offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
288
- return true;
289
+ return gen_v2g_vl(ctx, a, 16, mop, func);
290
}
291
292
-static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
293
+static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
294
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
295
{
296
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
297
-
298
- if (!avail_LSX(ctx)) {
299
- return false;
300
- }
301
-
302
- if (!check_vec(ctx, 16)) {
303
- return true;
304
- }
305
-
306
- tcg_gen_ld32u_i64(dst, cpu_env,
307
- offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
308
- return true;
309
+ return gen_v2g_vl(ctx, a, 32, mop, func);
310
}
311
312
-static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
313
-{
314
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
315
-
316
- if (!avail_LSX(ctx)) {
317
- return false;
318
- }
319
-
320
- if (!check_vec(ctx, 16)) {
321
- return true;
322
- }
323
-
324
- tcg_gen_ld_i64(dst, cpu_env,
325
- offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
326
- return true;
327
-}
328
+TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
329
+TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
330
+TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
331
+TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
332
+TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
333
+TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
334
+TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
335
+TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
336
+TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
337
+TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
338
+TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
339
+TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
340
341
static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
342
uint32_t oprsz, MemOp mop)
343
--
344
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVREPLVE.{B/H/W/D};
3
- XVREPL128VEI.{B/H/W/D};
4
- XVREPLVE0.{B/H/W/D/Q};
5
- XVINSVE0.{W/D};
6
- XVPICKVE.{W/D};
7
- XVBSLL.V, XVBSRL.V.
1
8
9
Signed-off-by: Song Gao <gaosong@loongson.cn>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20230914022645.1151356-53-gaosong@loongson.cn>
12
---
13
target/loongarch/helper.h | 5 +
14
target/loongarch/insns.decode | 25 +++
15
target/loongarch/disas.c | 29 ++++
16
target/loongarch/vec_helper.c | 28 ++++
17
target/loongarch/insn_trans/trans_vec.c.inc | 171 +++++++++++++-------
18
5 files changed, 201 insertions(+), 57 deletions(-)
19
20
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/loongarch/helper.h
23
+++ b/target/loongarch/helper.h
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32)
25
DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32)
26
DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32)
27
28
+DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
29
+DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
30
+DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
31
+DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
32
+
33
DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/loongarch/insns.decode
39
+++ b/target/loongarch/insns.decode
40
@@ -XXX,XX +XXX,XX @@ xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
41
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
42
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
43
xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr
44
+
45
+xvreplve_b 0111 01010010 00100 ..... ..... ..... @vvr
46
+xvreplve_h 0111 01010010 00101 ..... ..... ..... @vvr
47
+xvreplve_w 0111 01010010 00110 ..... ..... ..... @vvr
48
+xvreplve_d 0111 01010010 00111 ..... ..... ..... @vvr
49
+
50
+xvrepl128vei_b 0111 01101111 01111 0 .... ..... ..... @vv_ui4
51
+xvrepl128vei_h 0111 01101111 01111 10 ... ..... ..... @vv_ui3
52
+xvrepl128vei_w 0111 01101111 01111 110 .. ..... ..... @vv_ui2
53
+xvrepl128vei_d 0111 01101111 01111 1110 . ..... ..... @vv_ui1
54
+
55
+xvreplve0_b 0111 01110000 01110 00000 ..... ..... @vv
56
+xvreplve0_h 0111 01110000 01111 00000 ..... ..... @vv
57
+xvreplve0_w 0111 01110000 01111 10000 ..... ..... @vv
58
+xvreplve0_d 0111 01110000 01111 11000 ..... ..... @vv
59
+xvreplve0_q 0111 01110000 01111 11100 ..... ..... @vv
60
+
61
+xvinsve0_w 0111 01101111 11111 10 ... ..... ..... @vv_ui3
62
+xvinsve0_d 0111 01101111 11111 110 .. ..... ..... @vv_ui2
63
+
64
+xvpickve_w 0111 01110000 00111 10 ... ..... ..... @vv_ui3
65
+xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2
66
+
67
+xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5
68
+xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5
69
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/loongarch/disas.c
72
+++ b/target/loongarch/disas.c
73
@@ -XXX,XX +XXX,XX @@ static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
74
output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm);
75
}
76
77
+static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
78
+{
79
+ output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk);
80
+}
81
+
82
INSN_LASX(xvadd_b, vvv)
83
INSN_LASX(xvadd_h, vvv)
84
INSN_LASX(xvadd_w, vvv)
85
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvreplgr2vr_b, vr)
86
INSN_LASX(xvreplgr2vr_h, vr)
87
INSN_LASX(xvreplgr2vr_w, vr)
88
INSN_LASX(xvreplgr2vr_d, vr)
89
+
90
+INSN_LASX(xvreplve_b, vvr)
91
+INSN_LASX(xvreplve_h, vvr)
92
+INSN_LASX(xvreplve_w, vvr)
93
+INSN_LASX(xvreplve_d, vvr)
94
+INSN_LASX(xvrepl128vei_b, vv_i)
95
+INSN_LASX(xvrepl128vei_h, vv_i)
96
+INSN_LASX(xvrepl128vei_w, vv_i)
97
+INSN_LASX(xvrepl128vei_d, vv_i)
98
+
99
+INSN_LASX(xvreplve0_b, vv)
100
+INSN_LASX(xvreplve0_h, vv)
101
+INSN_LASX(xvreplve0_w, vv)
102
+INSN_LASX(xvreplve0_d, vv)
103
+INSN_LASX(xvreplve0_q, vv)
104
+
105
+INSN_LASX(xvinsve0_w, vv_i)
106
+INSN_LASX(xvinsve0_d, vv_i)
107
+
108
+INSN_LASX(xvpickve_w, vv_i)
109
+INSN_LASX(xvpickve_d, vv_i)
110
+
111
+INSN_LASX(xvbsll_v, vv_i)
112
+INSN_LASX(xvbsrl_v, vv_i)
113
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/loongarch/vec_helper.c
116
+++ b/target/loongarch/vec_helper.c
117
@@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_h, MO_16)
118
SETALLNEZ(vsetallnez_w, MO_32)
119
SETALLNEZ(vsetallnez_d, MO_64)
120
121
+#define XVINSVE0(NAME, E, MASK) \
122
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
123
+{ \
124
+ VReg *Vd = (VReg *)vd; \
125
+ VReg *Vj = (VReg *)vj; \
126
+ Vd->E(imm & MASK) = Vj->E(0); \
127
+}
128
+
129
+XVINSVE0(xvinsve0_w, W, 0x7)
130
+XVINSVE0(xvinsve0_d, D, 0x3)
131
+
132
+#define XVPICKVE(NAME, E, BIT, MASK) \
133
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
134
+{ \
135
+ int i; \
136
+ VReg *Vd = (VReg *)vd; \
137
+ VReg *Vj = (VReg *)vj; \
138
+ int oprsz = simd_oprsz(desc); \
139
+ \
140
+ Vd->E(0) = Vj->E(imm & MASK); \
141
+ for (i = 1; i < oprsz / (BIT / 8); i++) { \
142
+ Vd->E(i) = 0; \
143
+ } \
144
+}
145
+
146
+XVPICKVE(xvpickve_w, W, 32, 0x7)
147
+XVPICKVE(xvpickve_d, D, 64, 0x3)
148
+
149
#define VPACKEV(NAME, BIT, E) \
150
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
151
{ \
152
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
153
index XXXXXXX..XXXXXXX 100644
154
--- a/target/loongarch/insn_trans/trans_vec.c.inc
155
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
156
@@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
157
return true;
158
}
159
160
-static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
161
- void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
162
+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
163
+ uint32_t oprsz, int vece, int bit,
164
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
165
{
166
+ int i;
167
TCGv_i64 t0 = tcg_temp_new_i64();
168
TCGv_ptr t1 = tcg_temp_new_ptr();
169
TCGv_i64 t2 = tcg_temp_new_i64();
170
171
- if (!avail_LSX(ctx)) {
172
- return false;
173
- }
174
-
175
- if (!check_vec(ctx, 16)) {
176
+ if (!check_vec(ctx, oprsz)) {
177
return true;
178
}
179
180
- tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
181
+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
182
tcg_gen_shli_i64(t0, t0, vece);
183
if (HOST_BIG_ENDIAN) {
184
- tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
185
+ tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
186
}
187
188
tcg_gen_trunc_i64_ptr(t1, t0);
189
tcg_gen_add_ptr(t1, t1, cpu_env);
190
- func(t2, t1, vec_full_offset(a->vj));
191
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
192
+
193
+ for (i = 0; i < oprsz; i += 16) {
194
+ func(t2, t1, vec_full_offset(a->vj) + i);
195
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
196
+ }
197
198
return true;
199
}
200
201
+static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
202
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
203
+{
204
+ return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
205
+}
206
+
207
+static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
208
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
209
+{
210
+ return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
211
+}
212
+
213
TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
214
TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
215
TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
216
TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
217
+TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64)
218
+TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
219
+TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
220
+TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
221
222
-static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
223
+static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
224
{
225
- int ofs;
226
- TCGv_i64 desthigh, destlow, high, low;
227
-
228
- if (!avail_LSX(ctx)) {
229
- return false;
230
- }
231
+ int i;
232
233
- if (!check_vec(ctx, 16)) {
234
+ if (!check_vec(ctx, 32)) {
235
return true;
236
}
237
238
- desthigh = tcg_temp_new_i64();
239
- destlow = tcg_temp_new_i64();
240
- high = tcg_temp_new_i64();
241
- low = tcg_temp_new_i64();
242
-
243
- get_vreg64(low, a->vj, 0);
244
+ for (i = 0; i < 32; i += 16) {
245
+ tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
246
+ vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
247
248
- ofs = ((a->imm) & 0xf) * 8;
249
- if (ofs < 64) {
250
- get_vreg64(high, a->vj, 1);
251
- tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
252
- tcg_gen_shli_i64(destlow, low, ofs);
253
- } else {
254
- tcg_gen_shli_i64(desthigh, low, ofs - 64);
255
- destlow = tcg_constant_i64(0);
256
}
257
+ return true;
258
+}
259
260
- set_vreg64(desthigh, a->vd, 1);
261
- set_vreg64(destlow, a->vd, 0);
262
+TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
263
+TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
264
+TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
265
+TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
266
267
+static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
268
+{
269
+ if (!check_vec(ctx, 32)) {
270
+ return true;
271
+ }
272
+
273
+ tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
274
+ vec_full_offset(a->vj), 32, 32);
275
return true;
276
}
277
278
-static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
279
+TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
280
+TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
281
+TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
282
+TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
283
+TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
284
+
285
+TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
286
+TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
287
+
288
+TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
289
+TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
290
+
291
+static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
292
{
293
- TCGv_i64 desthigh, destlow, high, low;
294
- int ofs;
295
+ int i, ofs;
296
297
- if (!avail_LSX(ctx)) {
298
- return false;
299
+ if (!check_vec(ctx, oprsz)) {
300
+ return true;
301
}
302
303
- if (!check_vec(ctx, 16)) {
304
- return true;
305
+ for (i = 0; i < oprsz / 16; i++) {
306
+ TCGv desthigh = tcg_temp_new_i64();
307
+ TCGv destlow = tcg_temp_new_i64();
308
+ TCGv high = tcg_temp_new_i64();
309
+ TCGv low = tcg_temp_new_i64();
310
+
311
+ get_vreg64(low, a->vj, 2 * i);
312
+
313
+ ofs = ((a->imm) & 0xf) * 8;
314
+ if (ofs < 64) {
315
+ get_vreg64(high, a->vj, 2 * i + 1);
316
+ tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
317
+ tcg_gen_shli_i64(destlow, low, ofs);
318
+ } else {
319
+ tcg_gen_shli_i64(desthigh, low, ofs - 64);
320
+ destlow = tcg_constant_i64(0);
321
+ }
322
+ set_vreg64(desthigh, a->vd, 2 * i + 1);
323
+ set_vreg64(destlow, a->vd, 2 * i);
324
}
325
326
- desthigh = tcg_temp_new_i64();
327
- destlow = tcg_temp_new_i64();
328
- high = tcg_temp_new_i64();
329
- low = tcg_temp_new_i64();
330
+ return true;
331
+}
332
333
- get_vreg64(high, a->vj, 1);
334
+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
335
+{
336
+ int i, ofs;
337
338
- ofs = ((a->imm) & 0xf) * 8;
339
- if (ofs < 64) {
340
- get_vreg64(low, a->vj, 0);
341
- tcg_gen_extract2_i64(destlow, low, high, ofs);
342
- tcg_gen_shri_i64(desthigh, high, ofs);
343
- } else {
344
- tcg_gen_shri_i64(destlow, high, ofs - 64);
345
- desthigh = tcg_constant_i64(0);
346
+ if (!check_vec(ctx, 32)) {
347
+ return true;
348
}
349
350
- set_vreg64(desthigh, a->vd, 1);
351
- set_vreg64(destlow, a->vd, 0);
352
+ for (i = 0; i < oprsz / 16; i++) {
353
+ TCGv desthigh = tcg_temp_new_i64();
354
+ TCGv destlow = tcg_temp_new_i64();
355
+ TCGv high = tcg_temp_new_i64();
356
+ TCGv low = tcg_temp_new_i64();
357
+ get_vreg64(high, a->vj, 2 * i + 1);
358
+
359
+ ofs = ((a->imm) & 0xf) * 8;
360
+ if (ofs < 64) {
361
+ get_vreg64(low, a->vj, 2 * i);
362
+ tcg_gen_extract2_i64(destlow, low, high, ofs);
363
+ tcg_gen_shri_i64(desthigh, high, ofs);
364
+ } else {
365
+ tcg_gen_shri_i64(destlow, high, ofs - 64);
366
+ desthigh = tcg_constant_i64(0);
367
+ }
368
+ set_vreg64(desthigh, a->vd, 2 * i + 1);
369
+ set_vreg64(destlow, a->vd, 2 * i);
370
+ }
371
372
return true;
373
}
374
375
+TRANS(vbsll_v, LSX, do_vbsll_v, 16)
376
+TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
377
+TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
378
+TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
379
+
380
TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
381
TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
382
TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
383
--
384
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVPACK{EV/OD}.{B/H/W/D};
3
- XVPICK{EV/OD}.{B/H/W/D};
4
- XVILV{L/H}.{B/H/W/D}.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-54-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 27 ++++
11
target/loongarch/disas.c | 27 ++++
12
target/loongarch/vec_helper.c | 138 +++++++++++---------
13
target/loongarch/insn_trans/trans_vec.c.inc | 24 ++++
14
4 files changed, 156 insertions(+), 60 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@ xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2
21
22
xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5
23
xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5
24
+
25
+xvpackev_b 0111 01010001 01100 ..... ..... ..... @vvv
26
+xvpackev_h 0111 01010001 01101 ..... ..... ..... @vvv
27
+xvpackev_w 0111 01010001 01110 ..... ..... ..... @vvv
28
+xvpackev_d 0111 01010001 01111 ..... ..... ..... @vvv
29
+xvpackod_b 0111 01010001 10000 ..... ..... ..... @vvv
30
+xvpackod_h 0111 01010001 10001 ..... ..... ..... @vvv
31
+xvpackod_w 0111 01010001 10010 ..... ..... ..... @vvv
32
+xvpackod_d 0111 01010001 10011 ..... ..... ..... @vvv
33
+
34
+xvpickev_b 0111 01010001 11100 ..... ..... ..... @vvv
35
+xvpickev_h 0111 01010001 11101 ..... ..... ..... @vvv
36
+xvpickev_w 0111 01010001 11110 ..... ..... ..... @vvv
37
+xvpickev_d 0111 01010001 11111 ..... ..... ..... @vvv
38
+xvpickod_b 0111 01010010 00000 ..... ..... ..... @vvv
39
+xvpickod_h 0111 01010010 00001 ..... ..... ..... @vvv
40
+xvpickod_w 0111 01010010 00010 ..... ..... ..... @vvv
41
+xvpickod_d 0111 01010010 00011 ..... ..... ..... @vvv
42
+
43
+xvilvl_b 0111 01010001 10100 ..... ..... ..... @vvv
44
+xvilvl_h 0111 01010001 10101 ..... ..... ..... @vvv
45
+xvilvl_w 0111 01010001 10110 ..... ..... ..... @vvv
46
+xvilvl_d 0111 01010001 10111 ..... ..... ..... @vvv
47
+xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv
48
+xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv
49
+xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv
50
+xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv
51
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/loongarch/disas.c
54
+++ b/target/loongarch/disas.c
55
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvpickve_d, vv_i)
56
57
INSN_LASX(xvbsll_v, vv_i)
58
INSN_LASX(xvbsrl_v, vv_i)
59
+
60
+INSN_LASX(xvpackev_b, vvv)
61
+INSN_LASX(xvpackev_h, vvv)
62
+INSN_LASX(xvpackev_w, vvv)
63
+INSN_LASX(xvpackev_d, vvv)
64
+INSN_LASX(xvpackod_b, vvv)
65
+INSN_LASX(xvpackod_h, vvv)
66
+INSN_LASX(xvpackod_w, vvv)
67
+INSN_LASX(xvpackod_d, vvv)
68
+
69
+INSN_LASX(xvpickev_b, vvv)
70
+INSN_LASX(xvpickev_h, vvv)
71
+INSN_LASX(xvpickev_w, vvv)
72
+INSN_LASX(xvpickev_d, vvv)
73
+INSN_LASX(xvpickod_b, vvv)
74
+INSN_LASX(xvpickod_h, vvv)
75
+INSN_LASX(xvpickod_w, vvv)
76
+INSN_LASX(xvpickod_d, vvv)
77
+
78
+INSN_LASX(xvilvl_b, vvv)
79
+INSN_LASX(xvilvl_h, vvv)
80
+INSN_LASX(xvilvl_w, vvv)
81
+INSN_LASX(xvilvl_d, vvv)
82
+INSN_LASX(xvilvh_b, vvv)
83
+INSN_LASX(xvilvh_h, vvv)
84
+INSN_LASX(xvilvh_w, vvv)
85
+INSN_LASX(xvilvh_d, vvv)
86
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/target/loongarch/vec_helper.c
89
+++ b/target/loongarch/vec_helper.c
90
@@ -XXX,XX +XXX,XX @@ XVPICKVE(xvpickve_d, D, 64, 0x3)
91
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
92
{ \
93
int i; \
94
- VReg temp; \
95
+ VReg temp = {}; \
96
VReg *Vd = (VReg *)vd; \
97
VReg *Vj = (VReg *)vj; \
98
VReg *Vk = (VReg *)vk; \
99
+ int oprsz = simd_oprsz(desc); \
100
\
101
- for (i = 0; i < LSX_LEN/BIT; i++) { \
102
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
103
temp.E(2 * i + 1) = Vj->E(2 * i); \
104
temp.E(2 *i) = Vk->E(2 * i); \
105
} \
106
@@ -XXX,XX +XXX,XX @@ VPACKEV(vpackev_d, 128, D)
107
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
108
{ \
109
int i; \
110
- VReg temp; \
111
+ VReg temp = {}; \
112
VReg *Vd = (VReg *)vd; \
113
VReg *Vj = (VReg *)vj; \
114
VReg *Vk = (VReg *)vk; \
115
+ int oprsz = simd_oprsz(desc); \
116
\
117
- for (i = 0; i < LSX_LEN/BIT; i++) { \
118
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
119
temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
120
temp.E(2 * i) = Vk->E(2 * i + 1); \
121
} \
122
@@ -XXX,XX +XXX,XX @@ VPACKOD(vpackod_h, 32, H)
123
VPACKOD(vpackod_w, 64, W)
124
VPACKOD(vpackod_d, 128, D)
125
126
-#define VPICKEV(NAME, BIT, E) \
127
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
128
-{ \
129
- int i; \
130
- VReg temp; \
131
- VReg *Vd = (VReg *)vd; \
132
- VReg *Vj = (VReg *)vj; \
133
- VReg *Vk = (VReg *)vk; \
134
- \
135
- for (i = 0; i < LSX_LEN/BIT; i++) { \
136
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
137
- temp.E(i) = Vk->E(2 * i); \
138
- } \
139
- *Vd = temp; \
140
+#define VPICKEV(NAME, BIT, E) \
141
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
142
+{ \
143
+ int i, j, ofs; \
144
+ VReg temp = {}; \
145
+ VReg *Vd = (VReg *)vd; \
146
+ VReg *Vj = (VReg *)vj; \
147
+ VReg *Vk = (VReg *)vk; \
148
+ int oprsz = simd_oprsz(desc); \
149
+ \
150
+ ofs = LSX_LEN / BIT; \
151
+ for (i = 0; i < oprsz / 16; i++) { \
152
+ for (j = 0; j < ofs; j++) { \
153
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
154
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \
155
+ } \
156
+ } \
157
+ *Vd = temp; \
158
}
159
160
VPICKEV(vpickev_b, 16, B)
161
@@ -XXX,XX +XXX,XX @@ VPICKEV(vpickev_h, 32, H)
162
VPICKEV(vpickev_w, 64, W)
163
VPICKEV(vpickev_d, 128, D)
164
165
-#define VPICKOD(NAME, BIT, E) \
166
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
167
-{ \
168
- int i; \
169
- VReg temp; \
170
- VReg *Vd = (VReg *)vd; \
171
- VReg *Vj = (VReg *)vj; \
172
- VReg *Vk = (VReg *)vk; \
173
- \
174
- for (i = 0; i < LSX_LEN/BIT; i++) { \
175
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
176
- temp.E(i) = Vk->E(2 * i + 1); \
177
- } \
178
- *Vd = temp; \
179
+#define VPICKOD(NAME, BIT, E) \
180
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
181
+{ \
182
+ int i, j, ofs; \
183
+ VReg temp = {}; \
184
+ VReg *Vd = (VReg *)vd; \
185
+ VReg *Vj = (VReg *)vj; \
186
+ VReg *Vk = (VReg *)vk; \
187
+ int oprsz = simd_oprsz(desc); \
188
+ \
189
+ ofs = LSX_LEN / BIT; \
190
+ for (i = 0; i < oprsz / 16; i++) { \
191
+ for (j = 0; j < ofs; j++) { \
192
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
193
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \
194
+ } \
195
+ } \
196
+ *Vd = temp; \
197
}
198
199
VPICKOD(vpickod_b, 16, B)
200
@@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_h, 32, H)
201
VPICKOD(vpickod_w, 64, W)
202
VPICKOD(vpickod_d, 128, D)
203
204
-#define VILVL(NAME, BIT, E) \
205
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
206
-{ \
207
- int i; \
208
- VReg temp; \
209
- VReg *Vd = (VReg *)vd; \
210
- VReg *Vj = (VReg *)vj; \
211
- VReg *Vk = (VReg *)vk; \
212
- \
213
- for (i = 0; i < LSX_LEN/BIT; i++) { \
214
- temp.E(2 * i + 1) = Vj->E(i); \
215
- temp.E(2 * i) = Vk->E(i); \
216
- } \
217
- *Vd = temp; \
218
+#define VILVL(NAME, BIT, E) \
219
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
220
+{ \
221
+ int i, j, ofs; \
222
+ VReg temp = {}; \
223
+ VReg *Vd = (VReg *)vd; \
224
+ VReg *Vj = (VReg *)vj; \
225
+ VReg *Vk = (VReg *)vk; \
226
+ int oprsz = simd_oprsz(desc); \
227
+ \
228
+ ofs = LSX_LEN / BIT; \
229
+ for (i = 0; i < oprsz / 16; i++) { \
230
+ for (j = 0; j < ofs; j++) { \
231
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
232
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \
233
+ } \
234
+ } \
235
+ *Vd = temp; \
236
}
237
238
VILVL(vilvl_b, 16, B)
239
@@ -XXX,XX +XXX,XX @@ VILVL(vilvl_h, 32, H)
240
VILVL(vilvl_w, 64, W)
241
VILVL(vilvl_d, 128, D)
242
243
-#define VILVH(NAME, BIT, E) \
244
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
245
-{ \
246
- int i; \
247
- VReg temp; \
248
- VReg *Vd = (VReg *)vd; \
249
- VReg *Vj = (VReg *)vj; \
250
- VReg *Vk = (VReg *)vk; \
251
- \
252
- for (i = 0; i < LSX_LEN/BIT; i++) { \
253
- temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
254
- temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
255
- } \
256
- *Vd = temp; \
257
+#define VILVH(NAME, BIT, E) \
258
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
259
+{ \
260
+ int i, j, ofs; \
261
+ VReg temp = {}; \
262
+ VReg *Vd = (VReg *)vd; \
263
+ VReg *Vj = (VReg *)vj; \
264
+ VReg *Vk = (VReg *)vk; \
265
+ int oprsz = simd_oprsz(desc); \
266
+ \
267
+ ofs = LSX_LEN / BIT; \
268
+ for (i = 0; i < oprsz / 16; i++) { \
269
+ for (j = 0; j < ofs; j++) { \
270
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
271
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \
272
+ } \
273
+ } \
274
+ *Vd = temp; \
275
}
276
277
VILVH(vilvh_b, 16, B)
278
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
279
index XXXXXXX..XXXXXXX 100644
280
--- a/target/loongarch/insn_trans/trans_vec.c.inc
281
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
282
@@ -XXX,XX +XXX,XX @@ TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
283
TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
284
TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
285
TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
286
+TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
287
+TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
288
+TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
289
+TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
290
+TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
291
+TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
292
+TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
293
+TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
294
295
TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
296
TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
297
@@ -XXX,XX +XXX,XX @@ TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
298
TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
299
TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
300
TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
301
+TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
302
+TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
303
+TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
304
+TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
305
+TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
306
+TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
307
+TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
308
+TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
309
310
TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
311
TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
312
@@ -XXX,XX +XXX,XX @@ TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
313
TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
314
TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
315
TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
316
+TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
317
+TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
318
+TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
319
+TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
320
+TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
321
+TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
322
+TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
323
+TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
324
325
TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
326
TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
327
--
328
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVSHUF.{B/H/W/D};
3
- XVPERM.W;
4
- XVSHUF4i.{B/H/W/D};
5
- XVPERMI.{W/D/Q};
6
- XVEXTRINS.{B/H/W/D}.
1
7
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230914022645.1151356-55-gaosong@loongson.cn>
11
---
12
target/loongarch/helper.h | 3 +
13
target/loongarch/insns.decode | 21 +++
14
target/loongarch/disas.c | 21 +++
15
target/loongarch/vec_helper.c | 146 ++++++++++++++------
16
target/loongarch/insn_trans/trans_vec.c.inc | 30 +++-
17
5 files changed, 175 insertions(+), 46 deletions(-)
18
19
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/loongarch/helper.h
22
+++ b/target/loongarch/helper.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
24
DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
25
DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
26
27
+DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
29
+DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
30
+DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
31
32
DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
33
DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
34
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/loongarch/insns.decode
37
+++ b/target/loongarch/insns.decode
38
@@ -XXX,XX +XXX,XX @@ xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv
39
xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv
40
xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv
41
xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv
42
+
43
+xvshuf_b 0000 11010110 ..... ..... ..... ..... @vvvv
44
+xvshuf_h 0111 01010111 10101 ..... ..... ..... @vvv
45
+xvshuf_w 0111 01010111 10110 ..... ..... ..... @vvv
46
+xvshuf_d 0111 01010111 10111 ..... ..... ..... @vvv
47
+
48
+xvperm_w 0111 01010111 11010 ..... ..... ..... @vvv
49
+
50
+xvshuf4i_b 0111 01111001 00 ........ ..... ..... @vv_ui8
51
+xvshuf4i_h 0111 01111001 01 ........ ..... ..... @vv_ui8
52
+xvshuf4i_w 0111 01111001 10 ........ ..... ..... @vv_ui8
53
+xvshuf4i_d 0111 01111001 11 ........ ..... ..... @vv_ui8
54
+
55
+xvpermi_w 0111 01111110 01 ........ ..... ..... @vv_ui8
56
+xvpermi_d 0111 01111110 10 ........ ..... ..... @vv_ui8
57
+xvpermi_q 0111 01111110 11 ........ ..... ..... @vv_ui8
58
+
59
+xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8
60
+xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8
61
+xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8
62
+xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8
63
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/loongarch/disas.c
66
+++ b/target/loongarch/disas.c
67
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvilvh_b, vvv)
68
INSN_LASX(xvilvh_h, vvv)
69
INSN_LASX(xvilvh_w, vvv)
70
INSN_LASX(xvilvh_d, vvv)
71
+
72
+INSN_LASX(xvshuf_b, vvvv)
73
+INSN_LASX(xvshuf_h, vvv)
74
+INSN_LASX(xvshuf_w, vvv)
75
+INSN_LASX(xvshuf_d, vvv)
76
+
77
+INSN_LASX(xvperm_w, vvv)
78
+
79
+INSN_LASX(xvshuf4i_b, vv_i)
80
+INSN_LASX(xvshuf4i_h, vv_i)
81
+INSN_LASX(xvshuf4i_w, vv_i)
82
+INSN_LASX(xvshuf4i_d, vv_i)
83
+
84
+INSN_LASX(xvpermi_w, vv_i)
85
+INSN_LASX(xvpermi_d, vv_i)
86
+INSN_LASX(xvpermi_q, vv_i)
87
+
88
+INSN_LASX(xvextrins_d, vv_i)
89
+INSN_LASX(xvextrins_w, vv_i)
90
+INSN_LASX(xvextrins_h, vv_i)
91
+INSN_LASX(xvextrins_b, vv_i)
92
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/loongarch/vec_helper.c
95
+++ b/target/loongarch/vec_helper.c
96
@@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H)
97
VILVH(vilvh_w, 64, W)
98
VILVH(vilvh_d, 128, D)
99
100
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
101
+
102
void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
103
{
104
- int i, m;
105
- VReg temp;
106
+ int i, j, m;
107
+ VReg temp = {};
108
VReg *Vd = (VReg *)vd;
109
VReg *Vj = (VReg *)vj;
110
VReg *Vk = (VReg *)vk;
111
VReg *Va = (VReg *)va;
112
+ int oprsz = simd_oprsz(desc);
113
114
- m = LSX_LEN/8;
115
- for (i = 0; i < m ; i++) {
116
+ m = LSX_LEN / 8;
117
+ for (i = 0; i < (oprsz / 16) * m; i++) {
118
+ j = i < m ? 0 : 1;
119
uint64_t k = (uint8_t)Va->B(i) % (2 * m);
120
- temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
121
+ temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m);
122
}
123
*Vd = temp;
124
}
125
126
-#define VSHUF(NAME, BIT, E) \
127
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
128
-{ \
129
- int i, m; \
130
- VReg temp; \
131
- VReg *Vd = (VReg *)vd; \
132
- VReg *Vj = (VReg *)vj; \
133
- VReg *Vk = (VReg *)vk; \
134
- \
135
- m = LSX_LEN/BIT; \
136
- for (i = 0; i < m; i++) { \
137
- uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
138
- temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
139
- } \
140
- *Vd = temp; \
141
+#define VSHUF(NAME, BIT, E) \
142
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
143
+{ \
144
+ int i, j, m; \
145
+ VReg temp = {}; \
146
+ VReg *Vd = (VReg *)vd; \
147
+ VReg *Vj = (VReg *)vj; \
148
+ VReg *Vk = (VReg *)vk; \
149
+ int oprsz = simd_oprsz(desc); \
150
+ \
151
+ m = LSX_LEN / BIT; \
152
+ for (i = 0; i < (oprsz / 16) * m; i++) { \
153
+ j = i < m ? 0 : 1; \
154
+ uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \
155
+ temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \
156
+ } \
157
+ *Vd = temp; \
158
}
159
160
VSHUF(vshuf_h, 16, H)
161
VSHUF(vshuf_w, 32, W)
162
VSHUF(vshuf_d, 64, D)
163
164
-#define VSHUF4I(NAME, BIT, E) \
165
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
166
-{ \
167
- int i; \
168
- VReg temp; \
169
- VReg *Vd = (VReg *)vd; \
170
- VReg *Vj = (VReg *)vj; \
171
- \
172
- for (i = 0; i < LSX_LEN/BIT; i++) { \
173
- temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
174
- (2 * ((i) & 0x03))) & 0x03)); \
175
- } \
176
- *Vd = temp; \
177
+#define VSHUF4I(NAME, BIT, E) \
178
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
179
+{ \
180
+ int i, j, max; \
181
+ VReg temp = {}; \
182
+ VReg *Vd = (VReg *)vd; \
183
+ VReg *Vj = (VReg *)vj; \
184
+ int oprsz = simd_oprsz(desc); \
185
+ \
186
+ max = LSX_LEN / BIT; \
187
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
188
+ j = i < max ? 1 : 2; \
189
+ temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \
190
+ } \
191
+ *Vd = temp; \
192
}
193
194
VSHUF4I(vshuf4i_b, 8, B)
195
@@ -XXX,XX +XXX,XX @@ VSHUF4I(vshuf4i_w, 32, W)
196
197
void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
198
{
199
+ int i;
200
+ VReg temp = {};
201
VReg *Vd = (VReg *)vd;
202
VReg *Vj = (VReg *)vj;
203
+ int oprsz = simd_oprsz(desc);
204
205
- VReg temp;
206
- temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
207
- temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
208
+ for (i = 0; i < oprsz / 16; i++) {
209
+ temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i);
210
+ temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i);
211
+ }
212
+ *Vd = temp;
213
+}
214
+
215
+void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc)
216
+{
217
+ int i, m;
218
+ VReg temp = {};
219
+ VReg *Vd = (VReg *)vd;
220
+ VReg *Vj = (VReg *)vj;
221
+ VReg *Vk = (VReg *)vk;
222
+
223
+ m = LASX_LEN / 32;
224
+ for (i = 0; i < m ; i++) {
225
+ uint64_t k = (uint8_t)Vk->W(i) % 8;
226
+ temp.W(i) = Vj->W(k);
227
+ }
228
*Vd = temp;
229
}
230
231
void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
232
{
233
+ int i;
234
+ VReg temp = {};
235
+ VReg *Vd = (VReg *)vd;
236
+ VReg *Vj = (VReg *)vj;
237
+ int oprsz = simd_oprsz(desc);
238
+
239
+ for (i = 0; i < oprsz / 16; i++) {
240
+ temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i);
241
+ temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i);
242
+ temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i);
243
+ temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i);
244
+ }
245
+ *Vd = temp;
246
+}
247
+
248
+void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
249
+{
250
+ VReg temp = {};
251
+ VReg *Vd = (VReg *)vd;
252
+ VReg *Vj = (VReg *)vj;
253
+
254
+ temp.D(0) = Vj->D(imm & 0x3);
255
+ temp.D(1) = Vj->D((imm >> 2) & 0x3);
256
+ temp.D(2) = Vj->D((imm >> 4) & 0x3);
257
+ temp.D(3) = Vj->D((imm >> 6) & 0x3);
258
+ *Vd = temp;
259
+}
260
+
261
+void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
262
+{
263
+ int i;
264
VReg temp;
265
VReg *Vd = (VReg *)vd;
266
VReg *Vj = (VReg *)vj;
267
268
- temp.W(0) = Vj->W(imm & 0x3);
269
- temp.W(1) = Vj->W((imm >> 2) & 0x3);
270
- temp.W(2) = Vd->W((imm >> 4) & 0x3);
271
- temp.W(3) = Vd->W((imm >> 6) & 0x3);
272
+ for (i = 0; i < 2; i++, imm >>= 4) {
273
+ temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1);
274
+ }
275
*Vd = temp;
276
}
277
278
#define VEXTRINS(NAME, BIT, E, MASK) \
279
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
280
{ \
281
- int ins, extr; \
282
+ int i, ins, extr, max; \
283
VReg *Vd = (VReg *)vd; \
284
VReg *Vj = (VReg *)vj; \
285
+ int oprsz = simd_oprsz(desc); \
286
\
287
+ max = LSX_LEN / BIT; \
288
ins = (imm >> 4) & MASK; \
289
extr = imm & MASK; \
290
- Vd->E(ins) = Vj->E(extr); \
291
+ for (i = 0; i < oprsz / 16; i++) { \
292
+ Vd->E(ins + i * max) = Vj->E(extr + i * max); \
293
+ } \
294
}
295
296
VEXTRINS(vextrins_b, 8, B, 0xf)
297
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
298
index XXXXXXX..XXXXXXX 100644
299
--- a/target/loongarch/insn_trans/trans_vec.c.inc
300
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
301
@@ -XXX,XX +XXX,XX @@ static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
302
static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
303
gen_helper_gvec_4 *fn)
304
{
305
+ if (!check_vec(ctx, oprsz)) {
306
+ return true;
307
+ }
308
+
309
tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
310
vec_full_offset(a->vj),
311
vec_full_offset(a->vk),
312
@@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
313
static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
314
gen_helper_gvec_4 *fn)
315
{
316
- if (!check_vec(ctx, 16)) {
317
- return true;
318
- }
319
-
320
return gen_vvvv_vl(ctx, a, 16, fn);
321
}
322
323
+static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
324
+ gen_helper_gvec_4 *fn)
325
+{
326
+ return gen_vvvv_vl(ctx, a, 32, fn);
327
+}
328
+
329
static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
330
gen_helper_gvec_3_ptr *fn)
331
{
332
@@ -XXX,XX +XXX,XX @@ TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
333
TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
334
TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
335
TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
336
+TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
337
+TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
338
+TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
339
+TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
340
TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
341
TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
342
TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
343
TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
344
+TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
345
+TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
346
+TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
347
+TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
348
349
+TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
350
TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
351
+TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
352
+TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
353
+TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
354
355
TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
356
TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
357
TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
358
TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
359
+TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
360
+TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
361
+TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
362
+TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
363
364
static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
365
{
366
--
367
2.39.1
diff view generated by jsdifflib
New patch
1
This patch includes:
2
- XVLD[X], XVST[X];
3
- XVLDREPL.{B/H/W/D};
4
- XVSTELM.{B/H/W/D}.
1
5
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230914022645.1151356-56-gaosong@loongson.cn>
9
---
10
target/loongarch/insns.decode | 18 ++
11
target/loongarch/disas.c | 24 +++
12
target/loongarch/insn_trans/trans_vec.c.inc | 212 ++++++++++++++------
13
3 files changed, 194 insertions(+), 60 deletions(-)
14
15
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/insns.decode
18
+++ b/target/loongarch/insns.decode
19
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
20
@vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2
21
@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1
22
@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii
23
+@vr_i8i2x .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s3
24
+@vr_i8i3x .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s2
25
+@vr_i8i4x .... ...... imm2:4 ........ rj:5 vd:5 &vr_ii imm=%i8s1
26
+@vr_i8i5x .... ..... imm2:5 imm:s8 rj:5 vd:5 &vr_ii
27
@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr
28
@v_i13 .... ........ .. imm:13 vd:5 &v_i
29
30
@@ -XXX,XX +XXX,XX @@ xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8
31
xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8
32
xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8
33
xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8
34
+
35
+xvld 0010 110010 ............ ..... ..... @vr_i12
36
+xvst 0010 110011 ............ ..... ..... @vr_i12
37
+xvldx 0011 10000100 10000 ..... ..... ..... @vrr
38
+xvstx 0011 10000100 11000 ..... ..... ..... @vrr
39
+
40
+xvldrepl_d 0011 00100001 0 ......... ..... ..... @vr_i9
41
+xvldrepl_w 0011 00100010 .......... ..... ..... @vr_i10
42
+xvldrepl_h 0011 0010010 ........... ..... ..... @vr_i11
43
+xvldrepl_b 0011 001010 ............ ..... ..... @vr_i12
44
+xvstelm_d 0011 00110001 .. ........ ..... ..... @vr_i8i2x
45
+xvstelm_w 0011 0011001 ... ........ ..... ..... @vr_i8i3x
46
+xvstelm_h 0011 001101 .... ........ ..... ..... @vr_i8i4x
47
+xvstelm_b 0011 00111 ..... ........ ..... ..... @vr_i8i5x
48
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/loongarch/disas.c
51
+++ b/target/loongarch/disas.c
52
@@ -XXX,XX +XXX,XX @@ static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
53
output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk);
54
}
55
56
+static void output_vrr_x(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
57
+{
58
+ output(ctx, mnemonic, "x%d, r%d, r%d", a->vd, a->rj, a->rk);
59
+}
60
+
61
+static void output_vr_ii_x(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
62
+{
63
+ output(ctx, mnemonic, "x%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2);
64
+}
65
+
66
INSN_LASX(xvadd_b, vvv)
67
INSN_LASX(xvadd_h, vvv)
68
INSN_LASX(xvadd_w, vvv)
69
@@ -XXX,XX +XXX,XX @@ INSN_LASX(xvextrins_d, vv_i)
70
INSN_LASX(xvextrins_w, vv_i)
71
INSN_LASX(xvextrins_h, vv_i)
72
INSN_LASX(xvextrins_b, vv_i)
73
+
74
+INSN_LASX(xvld, vr_i)
75
+INSN_LASX(xvst, vr_i)
76
+INSN_LASX(xvldx, vrr)
77
+INSN_LASX(xvstx, vrr)
78
+
79
+INSN_LASX(xvldrepl_d, vr_i)
80
+INSN_LASX(xvldrepl_w, vr_i)
81
+INSN_LASX(xvldrepl_h, vr_i)
82
+INSN_LASX(xvldrepl_b, vr_i)
83
+INSN_LASX(xvstelm_d, vr_ii)
84
+INSN_LASX(xvstelm_w, vr_ii)
85
+INSN_LASX(xvstelm_h, vr_ii)
86
+INSN_LASX(xvstelm_b, vr_ii)
87
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/loongarch/insn_trans/trans_vec.c.inc
90
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
91
@@ -XXX,XX +XXX,XX @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
92
return true;
93
}
94
95
-#define VLDREPL(NAME, MO) \
96
-static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
97
-{ \
98
- TCGv addr; \
99
- TCGv_i64 val; \
100
- \
101
- if (!avail_LSX(ctx)) { \
102
- return false; \
103
- } \
104
- \
105
- if (!check_vec(ctx, 16)) { \
106
- return true; \
107
- } \
108
- \
109
- addr = gpr_src(ctx, a->rj, EXT_NONE); \
110
- val = tcg_temp_new_i64(); \
111
- \
112
- addr = make_address_i(ctx, addr, a->imm); \
113
- \
114
- tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \
115
- tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
116
- \
117
- return true; \
118
-}
119
-
120
-VLDREPL(vldrepl_b, MO_8)
121
-VLDREPL(vldrepl_h, MO_16)
122
-VLDREPL(vldrepl_w, MO_32)
123
-VLDREPL(vldrepl_d, MO_64)
124
-
125
-#define VSTELM(NAME, MO, E) \
126
-static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
127
-{ \
128
- TCGv addr; \
129
- TCGv_i64 val; \
130
- \
131
- if (!avail_LSX(ctx)) { \
132
- return false; \
133
- } \
134
- \
135
- if (!check_vec(ctx, 16)) { \
136
- return true; \
137
- } \
138
- \
139
- addr = gpr_src(ctx, a->rj, EXT_NONE); \
140
- val = tcg_temp_new_i64(); \
141
- \
142
- addr = make_address_i(ctx, addr, a->imm); \
143
- \
144
- tcg_gen_ld_i64(val, cpu_env, \
145
- offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
146
- tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \
147
- \
148
- return true; \
149
-}
150
-
151
-VSTELM(vstelm_b, MO_8, B)
152
-VSTELM(vstelm_h, MO_16, H)
153
-VSTELM(vstelm_w, MO_32, W)
154
-VSTELM(vstelm_d, MO_64, D)
155
+static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
156
+ uint32_t oprsz, MemOp mop)
157
+{
158
+ TCGv addr;
159
+ TCGv_i64 val;
160
+
161
+ if (!check_vec(ctx, oprsz)) {
162
+ return true;
163
+ }
164
+
165
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
166
+ val = tcg_temp_new_i64();
167
+
168
+ addr = make_address_i(ctx, addr, a->imm);
169
+
170
+ tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
171
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
172
+
173
+ return true;
174
+}
175
+
176
+static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
177
+{
178
+ return do_vldrepl_vl(ctx, a, 16, mop);
179
+}
180
+
181
+static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
182
+{
183
+ return do_vldrepl_vl(ctx, a, 32, mop);
184
+}
185
+
186
+TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
187
+TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
188
+TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
189
+TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
190
+TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
191
+TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
192
+TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
193
+TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
194
+
195
+static bool do_vstelm_vl(DisasContext *ctx,
196
+ arg_vr_ii *a, uint32_t oprsz, MemOp mop)
197
+{
198
+ TCGv addr;
199
+ TCGv_i64 val;
200
+
201
+ if (!check_vec(ctx, oprsz)) {
202
+ return true;
203
+ }
204
+
205
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
206
+ val = tcg_temp_new_i64();
207
+
208
+ addr = make_address_i(ctx, addr, a->imm);
209
+ tcg_gen_ld_i64(val, cpu_env, vec_reg_offset(a->vd, a->imm2, mop));
210
+ tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
211
+ return true;
212
+}
213
+
214
+static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
215
+{
216
+ return do_vstelm_vl(ctx, a, 16, mop);
217
+}
218
+
219
+static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
220
+{
221
+ return do_vstelm_vl(ctx, a, 32, mop);
222
+}
223
+
224
+TRANS(vstelm_b, LSX, do_vstelm, MO_8)
225
+TRANS(vstelm_h, LSX, do_vstelm, MO_16)
226
+TRANS(vstelm_w, LSX, do_vstelm, MO_32)
227
+TRANS(vstelm_d, LSX, do_vstelm, MO_64)
228
+TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
229
+TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
230
+TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
231
+TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
232
+
233
+static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
234
+ void (*func)(DisasContext *, int, TCGv))
235
+{
236
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
237
+ TCGv temp = NULL;
238
+
239
+ if (!check_vec(ctx, 32)) {
240
+ return true;
241
+ }
242
+
243
+ if (a->imm) {
244
+ temp = tcg_temp_new();
245
+ tcg_gen_addi_tl(temp, addr, a->imm);
246
+ addr = temp;
247
+ }
248
+
249
+ func(ctx, a->vd, addr);
250
+ return true;
251
+}
252
+
253
+static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
254
+{
255
+ int i;
256
+ TCGv temp = tcg_temp_new();
257
+ TCGv dest = tcg_temp_new();
258
+
259
+ tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
260
+ set_vreg64(dest, vreg, 0);
261
+
262
+ for (i = 1; i < 4; i++) {
263
+ tcg_gen_addi_tl(temp, addr, 8 * i);
264
+ tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
265
+ set_vreg64(dest, vreg, i);
266
+ }
267
+}
268
+
269
+static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
270
+{
271
+ int i;
272
+ TCGv temp = tcg_temp_new();
273
+ TCGv dest = tcg_temp_new();
274
+
275
+ get_vreg64(dest, vreg, 0);
276
+ tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
277
+
278
+ for (i = 1; i < 4; i++) {
279
+ tcg_gen_addi_tl(temp, addr, 8 * i);
280
+ get_vreg64(dest, vreg, i);
281
+ tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
282
+ }
283
+}
284
+
285
+TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
286
+TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
287
+
288
+static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
289
+ void (*func)(DisasContext*, int, TCGv))
290
+{
291
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
292
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
293
+ TCGv addr = tcg_temp_new();
294
+
295
+ if (!check_vec(ctx, 32)) {
296
+ return true;
297
+ }
298
+
299
+ tcg_gen_add_tl(addr, src1, src2);
300
+ func(ctx, a->vd, addr);
301
+
302
+ return true;
303
+}
304
+
305
+TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
306
+TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
307
--
308
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-57-gaosong@loongson.cn>
4
---
5
target/loongarch/vec.h | 42 ++++++++++++++++++++++++++++++
6
target/loongarch/vec_helper.c | 48 -----------------------------------
7
2 files changed, 42 insertions(+), 48 deletions(-)
1
8
9
diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/loongarch/vec.h
12
+++ b/target/loongarch/vec.h
13
@@ -XXX,XX +XXX,XX @@
14
#define Q(x) Q[x]
15
#endif /* HOST_BIG_ENDIAN */
16
17
+#define DO_ADD(a, b) (a + b)
18
+#define DO_SUB(a, b) (a - b)
19
+#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
20
+#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
21
+#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a))
22
+#define DO_VABS(a) ((a < 0) ? (-a) : (a))
23
+#define DO_MIN(a, b) (a < b ? a : b)
24
+#define DO_MAX(a, b) (a > b ? a : b)
25
+#define DO_MUL(a, b) (a * b)
26
+#define DO_MADD(a, b, c) (a + b * c)
27
+#define DO_MSUB(a, b, c) (a - b * c)
28
+
29
+#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
30
+#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
31
+#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\
32
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
33
+#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
34
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
35
+
36
+#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
37
+
38
+#define R_SHIFT(a, b) (a >> b)
39
+
40
+#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
41
+#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16)
42
+#define DO_CLO_W(N) (clz32(~N))
43
+#define DO_CLO_D(N) (clz64(~N))
44
+#define DO_CLZ_B(N) (clz32(N) - 24)
45
+#define DO_CLZ_H(N) (clz32(N) - 16)
46
+#define DO_CLZ_W(N) (clz32(N))
47
+#define DO_CLZ_D(N) (clz64(N))
48
+
49
+#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
50
+#define DO_BITSET(a, bit) (a | 1ull << bit)
51
+#define DO_BITREV(a, bit) (a ^ (1ull << bit))
52
+
53
+#define VSEQ(a, b) (a == b ? -1 : 0)
54
+#define VSLE(a, b) (a <= b ? -1 : 0)
55
+#define VSLT(a, b) (a < b ? -1 : 0)
56
+
57
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
58
+
59
#endif /* LOONGARCH_VEC_H */
60
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/vec_helper.c
63
+++ b/target/loongarch/vec_helper.c
64
@@ -XXX,XX +XXX,XX @@
65
#include "vec.h"
66
#include "tcg/tcg-gvec-desc.h"
67
68
-#define DO_ADD(a, b) (a + b)
69
-#define DO_SUB(a, b) (a - b)
70
-
71
#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
72
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
73
{ \
74
@@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
75
DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
76
DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
77
78
-#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
79
-#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
80
-
81
#define DO_3OP(NAME, BIT, E, DO_OP) \
82
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
83
{ \
84
@@ -XXX,XX +XXX,XX @@ DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
85
DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
86
DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
87
88
-#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a))
89
-
90
DO_3OP(vabsd_b, 8, B, DO_VABSD)
91
DO_3OP(vabsd_h, 16, H, DO_VABSD)
92
DO_3OP(vabsd_w, 32, W, DO_VABSD)
93
@@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
94
DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
95
DO_3OP(vabsd_du, 64, UD, DO_VABSD)
96
97
-#define DO_VABS(a) ((a < 0) ? (-a) : (a))
98
-
99
#define DO_VADDA(NAME, BIT, E) \
100
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
101
{ \
102
@@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_h, 16, H)
103
DO_VADDA(vadda_w, 32, W)
104
DO_VADDA(vadda_d, 64, D)
105
106
-#define DO_MIN(a, b) (a < b ? a : b)
107
-#define DO_MAX(a, b) (a > b ? a : b)
108
-
109
#define VMINMAXI(NAME, BIT, E, DO_OP) \
110
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
111
{ \
112
@@ -XXX,XX +XXX,XX @@ DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
113
DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
114
DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
115
116
-#define DO_MUL(a, b) (a * b)
117
-
118
DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
119
DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
120
DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
121
@@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
122
DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
123
DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
124
125
-#define DO_MADD(a, b, c) (a + b * c)
126
-#define DO_MSUB(a, b, c) (a - b * c)
127
-
128
#define VMADDSUB(NAME, BIT, E, DO_OP) \
129
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
130
{ \
131
@@ -XXX,XX +XXX,XX @@ VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
132
VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
133
VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
134
135
-#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
136
-#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
137
-#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\
138
- unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
139
-#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
140
- unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
141
-
142
#define VDIV(NAME, BIT, E, DO_OP) \
143
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
144
{ \
145
@@ -XXX,XX +XXX,XX @@ VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
146
VEXT2XV(vext2xv_du_hu, 64, UD, UH)
147
VEXT2XV(vext2xv_du_wu, 64, UD, UW)
148
149
-#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
150
-
151
DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
152
DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
153
DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
154
@@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_h, 16, H)
155
VSRARI(vsrari_w, 32, W)
156
VSRARI(vsrari_d, 64, D)
157
158
-#define R_SHIFT(a, b) (a >> b)
159
-
160
#define VSRLN(NAME, BIT, E1, E2) \
161
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
162
{ \
163
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
164
} \
165
}
166
167
-#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
168
-#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16)
169
-#define DO_CLO_W(N) (clz32(~N))
170
-#define DO_CLO_D(N) (clz64(~N))
171
-#define DO_CLZ_B(N) (clz32(N) - 24)
172
-#define DO_CLZ_H(N) (clz32(N) - 16)
173
-#define DO_CLZ_W(N) (clz32(N))
174
-#define DO_CLZ_D(N) (clz64(N))
175
-
176
DO_2OP(vclo_b, 8, UB, DO_CLO_B)
177
DO_2OP(vclo_h, 16, UH, DO_CLO_H)
178
DO_2OP(vclo_w, 32, UW, DO_CLO_W)
179
@@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_h, 16, UH, ctpop16)
180
VPCNT(vpcnt_w, 32, UW, ctpop32)
181
VPCNT(vpcnt_d, 64, UD, ctpop64)
182
183
-#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
184
-#define DO_BITSET(a, bit) (a | 1ull << bit)
185
-#define DO_BITREV(a, bit) (a ^ (1ull << bit))
186
-
187
#define DO_BIT(NAME, BIT, E, DO_OP) \
188
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
189
{ \
190
@@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
191
*Vd = temp;
192
}
193
194
-#define VSEQ(a, b) (a == b ? -1 : 0)
195
-#define VSLE(a, b) (a <= b ? -1 : 0)
196
-#define VSLT(a, b) (a < b ? -1 : 0)
197
-
198
#define VCMPI(NAME, BIT, E, DO_OP) \
199
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
200
{ \
201
@@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H)
202
VILVH(vilvh_w, 64, W)
203
VILVH(vilvh_d, 128, D)
204
205
-#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
206
-
207
void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
208
{
209
int i, j, m;
210
--
211
2.39.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230914022645.1151356-58-gaosong@loongson.cn>
4
---
5
target/loongarch/cpu.c | 1 +
6
1 file changed, 1 insertion(+)
1
7
8
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/loongarch/cpu.c
11
+++ b/target/loongarch/cpu.c
12
@@ -XXX,XX +XXX,XX @@ static void loongarch_la464_initfn(Object *obj)
13
data = FIELD_DP32(data, CPUCFG2, FP_DP, 1);
14
data = FIELD_DP32(data, CPUCFG2, FP_VER, 1);
15
data = FIELD_DP32(data, CPUCFG2, LSX, 1),
16
+ data = FIELD_DP32(data, CPUCFG2, LASX, 1),
17
data = FIELD_DP32(data, CPUCFG2, LLFTP, 1);
18
data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1);
19
data = FIELD_DP32(data, CPUCFG2, LSPW, 1);
20
--
21
2.39.1
diff view generated by jsdifflib