1 | The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d: | 1 | The following changes since commit 4907644841e3200aea6475c0f72d3d987e9f3d93: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700) | 3 | Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu into staging (2023-09-19 13:22:19 -0400) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230605 | 7 | https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230920 |
8 | 8 | ||
9 | for you to fetch changes up to 8555ddc671203969b0e6eb651e538d02a9a79b3a: | 9 | for you to fetch changes up to 2cd81e37512648a03d7dd37c39fa7fd50e2e4478: |
10 | 10 | ||
11 | hw/intc/loongarch_ipi: Bring back all 4 IPI mailboxes (2023-06-05 11:08:55 +0800) | 11 | target/loongarch: CPUCFG support LASX (2023-09-20 14:33:43 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fixes Coverity CID: 1512452, 1512453 | 14 | Add LASX instructions support. |
15 | Fixes: 78464f023b54 ("hw/loongarch/virt: Modify ipi as percpu device") | ||
16 | 15 | ||
17 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
18 | Jiaxun Yang (1): | 17 | Song Gao (57): |
19 | hw/intc/loongarch_ipi: Bring back all 4 IPI mailboxes | 18 | target/loongarch: Renamed lsx*.c to vec* .c |
19 | target/loongarch: Implement gvec_*_vl functions | ||
20 | target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions | ||
21 | target/loongarch: Use gen_helper_gvec_4 for 4OP vector instructions | ||
22 | target/loongarch: Use gen_helper_gvec_3_ptr for 3OP + env vector instructions | ||
23 | target/loongarch: Use gen_helper_gvec_3 for 3OP vector instructions | ||
24 | target/loongarch: Use gen_helper_gvec_2_ptr for 2OP + env vector instructions | ||
25 | target/loongarch: Use gen_helper_gvec_2 for 2OP vector instructions | ||
26 | target/loongarch: Use gen_helper_gvec_2i for 2OP + imm vector instructions | ||
27 | target/loongarch: Replace CHECK_SXE to check_vec(ctx, 16) | ||
28 | target/loongarch: Add LASX data support | ||
29 | target/loongarch: check_vec support check LASX instructions | ||
30 | target/loongarch: Add avail_LASX to check LASX instructions | ||
31 | target/loongarch: Implement xvadd/xvsub | ||
32 | target/loongarch: Implement xvreplgr2vr | ||
33 | target/loongarch: Implement xvaddi/xvsubi | ||
34 | target/loongarch: Implement xvneg | ||
35 | target/loongarch: Implement xvsadd/xvssub | ||
36 | target/loongarch: Implement xvhaddw/xvhsubw | ||
37 | target/loongarch: Implement xvaddw/xvsubw | ||
38 | target/loongarch: Implement xavg/xvagr | ||
39 | target/loongarch: Implement xvabsd | ||
40 | target/loongarch: Implement xvadda | ||
41 | target/loongarch: Implement xvmax/xvmin | ||
42 | target/loongarch: Implement xvmul/xvmuh/xvmulw{ev/od} | ||
43 | target/loongarch: Implement xvmadd/xvmsub/xvmaddw{ev/od} | ||
44 | target/loongarch; Implement xvdiv/xvmod | ||
45 | target/loongarch: Implement xvsat | ||
46 | target/loongarch: Implement xvexth | ||
47 | target/loongarch: Implement vext2xv | ||
48 | target/loongarch: Implement xvsigncov | ||
49 | target/loongarch: Implement xvmskltz/xvmskgez/xvmsknz | ||
50 | target/loongarch: Implement xvldi | ||
51 | target/loongarch: Implement LASX logic instructions | ||
52 | target/loongarch: Implement xvsll xvsrl xvsra xvrotr | ||
53 | target/loongarch: Implement xvsllwil xvextl | ||
54 | target/loongarch: Implement xvsrlr xvsrar | ||
55 | target/loongarch: Implement xvsrln xvsran | ||
56 | target/loongarch: Implement xvsrlrn xvsrarn | ||
57 | target/loongarch: Implement xvssrln xvssran | ||
58 | target/loongarch: Implement xvssrlrn xvssrarn | ||
59 | target/loongarch: Implement xvclo xvclz | ||
60 | target/loongarch: Implement xvpcnt | ||
61 | target/loongarch: Implement xvbitclr xvbitset xvbitrev | ||
62 | target/loongarch: Implement xvfrstp | ||
63 | target/loongarch: Implement LASX fpu arith instructions | ||
64 | target/loongarch: Implement LASX fpu fcvt instructions | ||
65 | target/loongarch: Implement xvseq xvsle xvslt | ||
66 | target/loongarch: Implement xvfcmp | ||
67 | target/loongarch: Implement xvbitsel xvset | ||
68 | target/loongarch: Implement xvinsgr2vr xvpickve2gr | ||
69 | target/loongarch: Implement xvreplve xvinsve0 xvpickve | ||
70 | target/loongarch: Implement xvpack xvpick xvilv{l/h} | ||
71 | target/loongarch: Implement xvshuf xvperm{i} xvshuf4i | ||
72 | target/loongarch: Implement xvld xvst | ||
73 | target/loongarch: Move simply DO_XX marcos togther | ||
74 | target/loongarch: CPUCFG support LASX | ||
20 | 75 | ||
21 | hw/intc/loongarch_ipi.c | 6 +++--- | 76 | target/loongarch/cpu.h | 26 +- |
22 | include/hw/intc/loongarch_ipi.h | 4 +++- | 77 | target/loongarch/helper.h | 689 ++-- |
23 | 2 files changed, 6 insertions(+), 4 deletions(-) | 78 | target/loongarch/internals.h | 22 - |
79 | target/loongarch/translate.h | 1 + | ||
80 | target/loongarch/vec.h | 75 + | ||
81 | target/loongarch/insns.decode | 782 +++++ | ||
82 | linux-user/loongarch64/signal.c | 1 + | ||
83 | target/loongarch/cpu.c | 4 + | ||
84 | target/loongarch/disas.c | 924 ++++++ | ||
85 | target/loongarch/gdbstub.c | 1 + | ||
86 | target/loongarch/lsx_helper.c | 3004 ----------------- | ||
87 | target/loongarch/machine.c | 36 +- | ||
88 | target/loongarch/translate.c | 19 +- | ||
89 | target/loongarch/vec_helper.c | 3494 ++++++++++++++++++++ | ||
90 | .../{trans_lsx.c.inc => trans_vec.c.inc} | 2393 ++++++++++---- | ||
91 | target/loongarch/meson.build | 2 +- | ||
92 | 16 files changed, 7386 insertions(+), 4087 deletions(-) | ||
93 | create mode 100644 target/loongarch/vec.h | ||
94 | delete mode 100644 target/loongarch/lsx_helper.c | ||
95 | create mode 100644 target/loongarch/vec_helper.c | ||
96 | rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (61%) | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Renamed lsx_helper.c to vec_helper.c and trans_lsx.c.inc to trans_vec.c.inc | ||
2 | So LASX can used them. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-2-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/translate.c | 2 +- | ||
9 | target/loongarch/{lsx_helper.c => vec_helper.c} | 2 +- | ||
10 | .../loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} | 2 +- | ||
11 | target/loongarch/meson.build | 2 +- | ||
12 | 4 files changed, 4 insertions(+), 4 deletions(-) | ||
13 | rename target/loongarch/{lsx_helper.c => vec_helper.c} (99%) | ||
14 | rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (99%) | ||
15 | |||
16 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/translate.c | ||
19 | +++ b/target/loongarch/translate.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr) | ||
21 | #include "insn_trans/trans_fmemory.c.inc" | ||
22 | #include "insn_trans/trans_branch.c.inc" | ||
23 | #include "insn_trans/trans_privileged.c.inc" | ||
24 | -#include "insn_trans/trans_lsx.c.inc" | ||
25 | +#include "insn_trans/trans_vec.c.inc" | ||
26 | |||
27 | static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) | ||
28 | { | ||
29 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/vec_helper.c | ||
30 | similarity index 99% | ||
31 | rename from target/loongarch/lsx_helper.c | ||
32 | rename to target/loongarch/vec_helper.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/loongarch/lsx_helper.c | ||
35 | +++ b/target/loongarch/vec_helper.c | ||
36 | @@ -XXX,XX +XXX,XX @@ | ||
37 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
38 | /* | ||
39 | - * QEMU LoongArch LSX helper functions. | ||
40 | + * QEMU LoongArch vector helper functions. | ||
41 | * | ||
42 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
43 | */ | ||
44 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
45 | similarity index 99% | ||
46 | rename from target/loongarch/insn_trans/trans_lsx.c.inc | ||
47 | rename to target/loongarch/insn_trans/trans_vec.c.inc | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
53 | /* | ||
54 | - * LSX translate functions | ||
55 | + * LoongArch vector translate functions | ||
56 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
57 | */ | ||
58 | |||
59 | diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/loongarch/meson.build | ||
62 | +++ b/target/loongarch/meson.build | ||
63 | @@ -XXX,XX +XXX,XX @@ loongarch_tcg_ss.add(files( | ||
64 | 'op_helper.c', | ||
65 | 'translate.c', | ||
66 | 'gdbstub.c', | ||
67 | - 'lsx_helper.c', | ||
68 | + 'vec_helper.c', | ||
69 | )) | ||
70 | loongarch_tcg_ss.add(zlib) | ||
71 | |||
72 | -- | ||
73 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Create gvec_*_vl functions in order to hide oprsz. | ||
2 | This is used by gvec_v* functions for oprsz 16, | ||
3 | and will be used by gvec_x* functions for oprsz 32. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-3-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insn_trans/trans_vec.c.inc | 68 +++++++++++++-------- | ||
10 | 1 file changed, 44 insertions(+), 24 deletions(-) | ||
11 | |||
12 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
15 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
16 | @@ -XXX,XX +XXX,XX @@ static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
17 | return true; | ||
18 | } | ||
19 | |||
20 | +static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a, | ||
21 | + uint32_t oprsz, MemOp mop, | ||
22 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
23 | + uint32_t, uint32_t, uint32_t)) | ||
24 | +{ | ||
25 | + uint32_t vd_ofs = vec_full_offset(a->vd); | ||
26 | + uint32_t vj_ofs = vec_full_offset(a->vj); | ||
27 | + uint32_t vk_ofs = vec_full_offset(a->vk); | ||
28 | + | ||
29 | + func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); | ||
30 | + return true; | ||
31 | +} | ||
32 | + | ||
33 | static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
34 | void (*func)(unsigned, uint32_t, uint32_t, | ||
35 | uint32_t, uint32_t, uint32_t)) | ||
36 | { | ||
37 | - uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
38 | - | ||
39 | CHECK_SXE; | ||
40 | + return gvec_vvv_vl(ctx, a, 16, mop, func); | ||
41 | +} | ||
42 | |||
43 | - vd_ofs = vec_full_offset(a->vd); | ||
44 | - vj_ofs = vec_full_offset(a->vj); | ||
45 | - vk_ofs = vec_full_offset(a->vk); | ||
46 | |||
47 | - func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8); | ||
48 | +static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a, | ||
49 | + uint32_t oprsz, MemOp mop, | ||
50 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
51 | + uint32_t, uint32_t)) | ||
52 | +{ | ||
53 | + uint32_t vd_ofs = vec_full_offset(a->vd); | ||
54 | + uint32_t vj_ofs = vec_full_offset(a->vj); | ||
55 | + | ||
56 | + func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8); | ||
57 | return true; | ||
58 | } | ||
59 | |||
60 | + | ||
61 | static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, | ||
62 | void (*func)(unsigned, uint32_t, uint32_t, | ||
63 | uint32_t, uint32_t)) | ||
64 | { | ||
65 | - uint32_t vd_ofs, vj_ofs; | ||
66 | - | ||
67 | CHECK_SXE; | ||
68 | + return gvec_vv_vl(ctx, a, 16, mop, func); | ||
69 | +} | ||
70 | |||
71 | - vd_ofs = vec_full_offset(a->vd); | ||
72 | - vj_ofs = vec_full_offset(a->vj); | ||
73 | +static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a, | ||
74 | + uint32_t oprsz, MemOp mop, | ||
75 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
76 | + int64_t, uint32_t, uint32_t)) | ||
77 | +{ | ||
78 | + uint32_t vd_ofs = vec_full_offset(a->vd); | ||
79 | + uint32_t vj_ofs = vec_full_offset(a->vj); | ||
80 | |||
81 | - func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8); | ||
82 | + func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); | ||
83 | return true; | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | ||
87 | void (*func)(unsigned, uint32_t, uint32_t, | ||
88 | int64_t, uint32_t, uint32_t)) | ||
89 | { | ||
90 | - uint32_t vd_ofs, vj_ofs; | ||
91 | - | ||
92 | CHECK_SXE; | ||
93 | + return gvec_vv_i_vl(ctx, a, 16, mop, func); | ||
94 | +} | ||
95 | |||
96 | - vd_ofs = vec_full_offset(a->vd); | ||
97 | - vj_ofs = vec_full_offset(a->vj); | ||
98 | +static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a, | ||
99 | + uint32_t oprsz, MemOp mop) | ||
100 | +{ | ||
101 | + uint32_t vd_ofs = vec_full_offset(a->vd); | ||
102 | + uint32_t vj_ofs = vec_full_offset(a->vj); | ||
103 | |||
104 | - func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8); | ||
105 | + tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8); | ||
106 | return true; | ||
107 | } | ||
108 | |||
109 | static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) | ||
110 | { | ||
111 | - uint32_t vd_ofs, vj_ofs; | ||
112 | - | ||
113 | CHECK_SXE; | ||
114 | - | ||
115 | - vd_ofs = vec_full_offset(a->vd); | ||
116 | - vj_ofs = vec_full_offset(a->vj); | ||
117 | - | ||
118 | - tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8); | ||
119 | - return true; | ||
120 | + return gvec_subi_vl(ctx, a, 16, mop); | ||
121 | } | ||
122 | |||
123 | TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add) | ||
124 | -- | ||
125 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-4-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 16 +++++----- | ||
6 | target/loongarch/vec_helper.c | 12 +++---- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 35 ++++++++++++++++----- | ||
8 | 3 files changed, 41 insertions(+), 22 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32) | ||
15 | DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32) | ||
16 | DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32) | ||
17 | |||
18 | -DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32) | ||
19 | -DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32) | ||
20 | -DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32) | ||
21 | -DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32) | ||
22 | -DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32) | ||
23 | -DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32) | ||
24 | -DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32) | ||
25 | -DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32) | ||
26 | +DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | |||
35 | DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32) | ||
36 | DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32) | ||
37 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/loongarch/vec_helper.c | ||
40 | +++ b/target/loongarch/vec_helper.c | ||
41 | @@ -XXX,XX +XXX,XX @@ DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) | ||
42 | DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) | ||
43 | |||
44 | #define DO_4OP_F(NAME, BIT, E, FN, flags) \ | ||
45 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
46 | - uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \ | ||
47 | +void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ | ||
48 | + CPULoongArchState *env, uint32_t desc) \ | ||
49 | { \ | ||
50 | int i; \ | ||
51 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
52 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
53 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
54 | - VReg *Va = &(env->fpr[va].vreg); \ | ||
55 | + VReg *Vd = (VReg *)vd; \ | ||
56 | + VReg *Vj = (VReg *)vj; \ | ||
57 | + VReg *Vk = (VReg *)vk; \ | ||
58 | + VReg *Va = (VReg *)va; \ | ||
59 | \ | ||
60 | vec_clear_cause(env); \ | ||
61 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
62 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
65 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
66 | @@ -XXX,XX +XXX,XX @@ | ||
67 | #define CHECK_SXE | ||
68 | #endif | ||
69 | |||
70 | +static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
71 | + gen_helper_gvec_4_ptr *fn) | ||
72 | +{ | ||
73 | + tcg_gen_gvec_4_ptr(vec_full_offset(a->vd), | ||
74 | + vec_full_offset(a->vj), | ||
75 | + vec_full_offset(a->vk), | ||
76 | + vec_full_offset(a->va), | ||
77 | + cpu_env, | ||
78 | + oprsz, ctx->vl / 8, 0, fn); | ||
79 | + return true; | ||
80 | +} | ||
81 | + | ||
82 | +static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, | ||
83 | + gen_helper_gvec_4_ptr *fn) | ||
84 | +{ | ||
85 | + CHECK_SXE; | ||
86 | + return gen_vvvv_ptr_vl(ctx, a, 16, fn); | ||
87 | +} | ||
88 | + | ||
89 | static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
90 | void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, | ||
91 | TCGv_i32, TCGv_i32)) | ||
92 | @@ -XXX,XX +XXX,XX @@ TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d) | ||
93 | TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s) | ||
94 | TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d) | ||
95 | |||
96 | -TRANS(vfmadd_s, LSX, gen_vvvv, gen_helper_vfmadd_s) | ||
97 | -TRANS(vfmadd_d, LSX, gen_vvvv, gen_helper_vfmadd_d) | ||
98 | -TRANS(vfmsub_s, LSX, gen_vvvv, gen_helper_vfmsub_s) | ||
99 | -TRANS(vfmsub_d, LSX, gen_vvvv, gen_helper_vfmsub_d) | ||
100 | -TRANS(vfnmadd_s, LSX, gen_vvvv, gen_helper_vfnmadd_s) | ||
101 | -TRANS(vfnmadd_d, LSX, gen_vvvv, gen_helper_vfnmadd_d) | ||
102 | -TRANS(vfnmsub_s, LSX, gen_vvvv, gen_helper_vfnmsub_s) | ||
103 | -TRANS(vfnmsub_d, LSX, gen_vvvv, gen_helper_vfnmsub_d) | ||
104 | +TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s) | ||
105 | +TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d) | ||
106 | +TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s) | ||
107 | +TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d) | ||
108 | +TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s) | ||
109 | +TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d) | ||
110 | +TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s) | ||
111 | +TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d) | ||
112 | |||
113 | TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s) | ||
114 | TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d) | ||
115 | -- | ||
116 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-5-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 2 +- | ||
6 | target/loongarch/vec_helper.c | 11 +++++------ | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 22 ++++++++++++--------- | ||
8 | 3 files changed, 19 insertions(+), 16 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32) | ||
15 | DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32) | ||
16 | DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32) | ||
17 | |||
18 | -DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32) | ||
19 | +DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | ||
20 | DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32) | ||
21 | DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32) | ||
22 | DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32) | ||
23 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/loongarch/vec_helper.c | ||
26 | +++ b/target/loongarch/vec_helper.c | ||
27 | @@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H) | ||
28 | VILVH(vilvh_w, 64, W) | ||
29 | VILVH(vilvh_d, 128, D) | ||
30 | |||
31 | -void HELPER(vshuf_b)(CPULoongArchState *env, | ||
32 | - uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) | ||
33 | +void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) | ||
34 | { | ||
35 | int i, m; | ||
36 | VReg temp; | ||
37 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
38 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
39 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
40 | - VReg *Va = &(env->fpr[va].vreg); | ||
41 | + VReg *Vd = (VReg *)vd; | ||
42 | + VReg *Vj = (VReg *)vj; | ||
43 | + VReg *Vk = (VReg *)vk; | ||
44 | + VReg *Va = (VReg *)va; | ||
45 | |||
46 | m = LSX_LEN/8; | ||
47 | for (i = 0; i < m ; i++) { | ||
48 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
51 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
52 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, | ||
53 | return gen_vvvv_ptr_vl(ctx, a, 16, fn); | ||
54 | } | ||
55 | |||
56 | -static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
57 | - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, | ||
58 | - TCGv_i32, TCGv_i32)) | ||
59 | +static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
60 | + gen_helper_gvec_4 *fn) | ||
61 | { | ||
62 | - TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
63 | - TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
64 | - TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
65 | - TCGv_i32 va = tcg_constant_i32(a->va); | ||
66 | + tcg_gen_gvec_4_ool(vec_full_offset(a->vd), | ||
67 | + vec_full_offset(a->vj), | ||
68 | + vec_full_offset(a->vk), | ||
69 | + vec_full_offset(a->va), | ||
70 | + oprsz, ctx->vl / 8, 0, fn); | ||
71 | + return true; | ||
72 | +} | ||
73 | |||
74 | +static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
75 | + gen_helper_gvec_4 *fn) | ||
76 | +{ | ||
77 | CHECK_SXE; | ||
78 | - func(cpu_env, vd, vj, vk, va); | ||
79 | - return true; | ||
80 | + return gen_vvvv_vl(ctx, a, 16, fn); | ||
81 | } | ||
82 | |||
83 | static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
84 | -- | ||
85 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-6-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 48 +++++++-------- | ||
6 | target/loongarch/vec_helper.c | 50 ++++++++-------- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 66 +++++++++++++-------- | ||
8 | 3 files changed, 91 insertions(+), 73 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32) | ||
15 | DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) | ||
16 | DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) | ||
17 | |||
18 | -DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32) | ||
19 | -DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32) | ||
20 | -DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32) | ||
21 | -DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32) | ||
22 | -DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32) | ||
23 | -DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32) | ||
24 | -DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32) | ||
25 | -DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32) | ||
26 | +DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
34 | |||
35 | DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i3 | ||
38 | DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | |||
41 | -DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32) | ||
42 | -DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32) | ||
43 | -DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32) | ||
44 | -DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32) | ||
45 | +DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
46 | +DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
47 | +DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
48 | +DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
49 | |||
50 | -DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32) | ||
51 | -DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32) | ||
52 | -DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32) | ||
53 | -DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
55 | +DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
56 | +DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
57 | +DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
58 | |||
59 | DEF_HELPER_3(vflogb_s, void, env, i32, i32) | ||
60 | DEF_HELPER_3(vflogb_d, void, env, i32, i32) | ||
61 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32) | ||
62 | DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32) | ||
63 | DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32) | ||
64 | DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32) | ||
65 | -DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32) | ||
66 | -DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32) | ||
67 | +DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
68 | +DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
69 | |||
70 | DEF_HELPER_3(vfrintrne_s, void, env, i32, i32) | ||
71 | DEF_HELPER_3(vfrintrne_d, void, env, i32, i32) | ||
72 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32) | ||
73 | DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32) | ||
74 | DEF_HELPER_3(vftint_wu_s, void, env, i32, i32) | ||
75 | DEF_HELPER_3(vftint_lu_d, void, env, i32, i32) | ||
76 | -DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32) | ||
77 | -DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32) | ||
78 | -DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32) | ||
79 | -DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32) | ||
80 | -DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32) | ||
81 | +DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
82 | +DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
83 | +DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
84 | +DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
85 | +DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
86 | DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32) | ||
87 | DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32) | ||
88 | DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32) | ||
89 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vffint_s_wu, void, env, i32, i32) | ||
90 | DEF_HELPER_3(vffint_d_lu, void, env, i32, i32) | ||
91 | DEF_HELPER_3(vffintl_d_w, void, env, i32, i32) | ||
92 | DEF_HELPER_3(vffinth_d_w, void, env, i32, i32) | ||
93 | -DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32) | ||
94 | +DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
95 | |||
96 | DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
97 | DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
98 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/loongarch/vec_helper.c | ||
101 | +++ b/target/loongarch/vec_helper.c | ||
102 | @@ -XXX,XX +XXX,XX @@ static inline void vec_clear_cause(CPULoongArchState *env) | ||
103 | } | ||
104 | |||
105 | #define DO_3OP_F(NAME, BIT, E, FN) \ | ||
106 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
107 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
108 | +void HELPER(NAME)(void *vd, void *vj, void *vk, \ | ||
109 | + CPULoongArchState *env, uint32_t desc) \ | ||
110 | { \ | ||
111 | int i; \ | ||
112 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
113 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
114 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
115 | + VReg *Vd = (VReg *)vd; \ | ||
116 | + VReg *Vj = (VReg *)vj; \ | ||
117 | + VReg *Vk = (VReg *)vk; \ | ||
118 | \ | ||
119 | vec_clear_cause(env); \ | ||
120 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
121 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
122 | *Vd = temp; | ||
123 | } | ||
124 | |||
125 | -void HELPER(vfcvt_h_s)(CPULoongArchState *env, | ||
126 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
127 | +void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, | ||
128 | + CPULoongArchState *env, uint32_t desc) | ||
129 | { | ||
130 | int i; | ||
131 | VReg temp; | ||
132 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
133 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
134 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
135 | + VReg *Vd = (VReg *)vd; | ||
136 | + VReg *Vj = (VReg *)vj; | ||
137 | + VReg *Vk = (VReg *)vk; | ||
138 | |||
139 | vec_clear_cause(env); | ||
140 | for(i = 0; i < LSX_LEN/32; i++) { | ||
141 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_h_s)(CPULoongArchState *env, | ||
142 | *Vd = temp; | ||
143 | } | ||
144 | |||
145 | -void HELPER(vfcvt_s_d)(CPULoongArchState *env, | ||
146 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
147 | +void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, | ||
148 | + CPULoongArchState *env, uint32_t desc) | ||
149 | { | ||
150 | int i; | ||
151 | VReg temp; | ||
152 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
153 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
154 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
155 | + VReg *Vd = (VReg *)vd; | ||
156 | + VReg *Vj = (VReg *)vj; | ||
157 | + VReg *Vk = (VReg *)vk; | ||
158 | |||
159 | vec_clear_cause(env); | ||
160 | for(i = 0; i < LSX_LEN/64; i++) { | ||
161 | @@ -XXX,XX +XXX,XX @@ FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) | ||
162 | FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) | ||
163 | |||
164 | #define FTINT_W_D(NAME, FN) \ | ||
165 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
166 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
167 | +void HELPER(NAME)(void *vd, void *vj, void *vk, \ | ||
168 | + CPULoongArchState *env, uint32_t desc) \ | ||
169 | { \ | ||
170 | int i; \ | ||
171 | VReg temp; \ | ||
172 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
173 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
174 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
175 | + VReg *Vd = (VReg *)vd; \ | ||
176 | + VReg *Vj = (VReg *)vj; \ | ||
177 | + VReg *Vk = (VReg *)vk; \ | ||
178 | \ | ||
179 | vec_clear_cause(env); \ | ||
180 | for (i = 0; i < 2; i++) { \ | ||
181 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
182 | *Vd = temp; | ||
183 | } | ||
184 | |||
185 | -void HELPER(vffint_s_l)(CPULoongArchState *env, | ||
186 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
187 | +void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, | ||
188 | + CPULoongArchState *env, uint32_t desc) | ||
189 | { | ||
190 | int i; | ||
191 | VReg temp; | ||
192 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
193 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
194 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
195 | + VReg *Vd = (VReg *)vd; | ||
196 | + VReg *Vj = (VReg *)vj; | ||
197 | + VReg *Vk = (VReg *)vk; | ||
198 | |||
199 | vec_clear_cause(env); | ||
200 | for (i = 0; i < 2; i++) { | ||
201 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
202 | index XXXXXXX..XXXXXXX 100644 | ||
203 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
204 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
205 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
206 | return gen_vvvv_vl(ctx, a, 16, fn); | ||
207 | } | ||
208 | |||
209 | +static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
210 | + gen_helper_gvec_3_ptr *fn) | ||
211 | +{ | ||
212 | + tcg_gen_gvec_3_ptr(vec_full_offset(a->vd), | ||
213 | + vec_full_offset(a->vj), | ||
214 | + vec_full_offset(a->vk), | ||
215 | + cpu_env, | ||
216 | + oprsz, ctx->vl / 8, 0, fn); | ||
217 | + return true; | ||
218 | +} | ||
219 | + | ||
220 | +static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, | ||
221 | + gen_helper_gvec_3_ptr *fn) | ||
222 | +{ | ||
223 | + CHECK_SXE; | ||
224 | + return gen_vvv_ptr_vl(ctx, a, 16, fn); | ||
225 | +} | ||
226 | + | ||
227 | static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
228 | void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
229 | { | ||
230 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h) | ||
231 | TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b) | ||
232 | TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h) | ||
233 | |||
234 | -TRANS(vfadd_s, LSX, gen_vvv, gen_helper_vfadd_s) | ||
235 | -TRANS(vfadd_d, LSX, gen_vvv, gen_helper_vfadd_d) | ||
236 | -TRANS(vfsub_s, LSX, gen_vvv, gen_helper_vfsub_s) | ||
237 | -TRANS(vfsub_d, LSX, gen_vvv, gen_helper_vfsub_d) | ||
238 | -TRANS(vfmul_s, LSX, gen_vvv, gen_helper_vfmul_s) | ||
239 | -TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d) | ||
240 | -TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s) | ||
241 | -TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d) | ||
242 | +TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s) | ||
243 | +TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d) | ||
244 | +TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s) | ||
245 | +TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d) | ||
246 | +TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s) | ||
247 | +TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d) | ||
248 | +TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s) | ||
249 | +TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d) | ||
250 | |||
251 | TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s) | ||
252 | TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d) | ||
253 | @@ -XXX,XX +XXX,XX @@ TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d) | ||
254 | TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s) | ||
255 | TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d) | ||
256 | |||
257 | -TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s) | ||
258 | -TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d) | ||
259 | -TRANS(vfmin_s, LSX, gen_vvv, gen_helper_vfmin_s) | ||
260 | -TRANS(vfmin_d, LSX, gen_vvv, gen_helper_vfmin_d) | ||
261 | +TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s) | ||
262 | +TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d) | ||
263 | +TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s) | ||
264 | +TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d) | ||
265 | |||
266 | -TRANS(vfmaxa_s, LSX, gen_vvv, gen_helper_vfmaxa_s) | ||
267 | -TRANS(vfmaxa_d, LSX, gen_vvv, gen_helper_vfmaxa_d) | ||
268 | -TRANS(vfmina_s, LSX, gen_vvv, gen_helper_vfmina_s) | ||
269 | -TRANS(vfmina_d, LSX, gen_vvv, gen_helper_vfmina_d) | ||
270 | +TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s) | ||
271 | +TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d) | ||
272 | +TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s) | ||
273 | +TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d) | ||
274 | |||
275 | TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s) | ||
276 | TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d) | ||
277 | @@ -XXX,XX +XXX,XX @@ TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h) | ||
278 | TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h) | ||
279 | TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s) | ||
280 | TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s) | ||
281 | -TRANS(vfcvt_h_s, LSX, gen_vvv, gen_helper_vfcvt_h_s) | ||
282 | -TRANS(vfcvt_s_d, LSX, gen_vvv, gen_helper_vfcvt_s_d) | ||
283 | +TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s) | ||
284 | +TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d) | ||
285 | |||
286 | TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s) | ||
287 | TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d) | ||
288 | @@ -XXX,XX +XXX,XX @@ TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s) | ||
289 | TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d) | ||
290 | TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s) | ||
291 | TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d) | ||
292 | -TRANS(vftintrne_w_d, LSX, gen_vvv, gen_helper_vftintrne_w_d) | ||
293 | -TRANS(vftintrz_w_d, LSX, gen_vvv, gen_helper_vftintrz_w_d) | ||
294 | -TRANS(vftintrp_w_d, LSX, gen_vvv, gen_helper_vftintrp_w_d) | ||
295 | -TRANS(vftintrm_w_d, LSX, gen_vvv, gen_helper_vftintrm_w_d) | ||
296 | -TRANS(vftint_w_d, LSX, gen_vvv, gen_helper_vftint_w_d) | ||
297 | +TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d) | ||
298 | +TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d) | ||
299 | +TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d) | ||
300 | +TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d) | ||
301 | +TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d) | ||
302 | TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s) | ||
303 | TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s) | ||
304 | TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s) | ||
305 | @@ -XXX,XX +XXX,XX @@ TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu) | ||
306 | TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu) | ||
307 | TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w) | ||
308 | TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w) | ||
309 | -TRANS(vffint_s_l, LSX, gen_vvv, gen_helper_vffint_s_l) | ||
310 | +TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l) | ||
311 | |||
312 | static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
313 | { | ||
314 | -- | ||
315 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-7-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 214 +++++----- | ||
6 | target/loongarch/vec_helper.c | 444 +++++++++----------- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 19 +- | ||
8 | 3 files changed, 326 insertions(+), 351 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(idle, void, env) | ||
15 | #endif | ||
16 | |||
17 | /* LoongArch LSX */ | ||
18 | -DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32) | ||
19 | -DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32) | ||
20 | -DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32) | ||
21 | -DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32) | ||
22 | -DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32) | ||
23 | -DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32) | ||
24 | -DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32) | ||
25 | -DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32) | ||
26 | -DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32) | ||
27 | -DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32) | ||
28 | -DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32) | ||
29 | -DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32) | ||
30 | -DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32) | ||
31 | -DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32) | ||
32 | -DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32) | ||
33 | -DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32) | ||
34 | +DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
35 | +DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
36 | +DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
37 | +DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | +DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | +DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | +DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
41 | +DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
42 | +DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
43 | +DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
45 | +DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
46 | +DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
47 | +DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
48 | +DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
49 | +DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
50 | |||
51 | DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
52 | DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
53 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
54 | DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
55 | DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
56 | |||
57 | -DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32) | ||
58 | -DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32) | ||
59 | -DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32) | ||
60 | -DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32) | ||
61 | -DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32) | ||
62 | -DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32) | ||
63 | -DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32) | ||
64 | -DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32) | ||
65 | -DEF_HELPER_4(vmod_b, void, env, i32, i32, i32) | ||
66 | -DEF_HELPER_4(vmod_h, void, env, i32, i32, i32) | ||
67 | -DEF_HELPER_4(vmod_w, void, env, i32, i32, i32) | ||
68 | -DEF_HELPER_4(vmod_d, void, env, i32, i32, i32) | ||
69 | -DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32) | ||
70 | -DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32) | ||
71 | -DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32) | ||
72 | -DEF_HELPER_4(vmod_du, void, env, i32, i32, i32) | ||
73 | +DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
74 | +DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
75 | +DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
76 | +DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
77 | +DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
85 | +DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
86 | +DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
88 | +DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
89 | |||
90 | DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
91 | DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
92 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32) | ||
93 | DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32) | ||
94 | DEF_HELPER_3(vextl_qu_du, void, env, i32, i32) | ||
95 | |||
96 | -DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32) | ||
97 | -DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32) | ||
98 | -DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32) | ||
99 | -DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32) | ||
100 | +DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
101 | +DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
102 | +DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
103 | +DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
104 | DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32) | ||
105 | DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32) | ||
106 | DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32) | ||
107 | DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32) | ||
108 | |||
109 | -DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32) | ||
110 | -DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32) | ||
111 | -DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32) | ||
112 | -DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32) | ||
113 | +DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
114 | +DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
115 | +DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
116 | +DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
117 | DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32) | ||
118 | DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32) | ||
119 | DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32) | ||
120 | DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32) | ||
121 | |||
122 | -DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32) | ||
123 | -DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32) | ||
124 | -DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32) | ||
125 | -DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32) | ||
126 | -DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32) | ||
127 | -DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32) | ||
128 | +DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
129 | +DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
130 | +DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
131 | +DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
132 | +DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
133 | +DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
134 | |||
135 | DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32) | ||
136 | DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32) | ||
137 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32) | ||
138 | DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32) | ||
139 | DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32) | ||
140 | |||
141 | -DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32) | ||
142 | -DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32) | ||
143 | -DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32) | ||
144 | -DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32) | ||
145 | -DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32) | ||
146 | -DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32) | ||
147 | +DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
148 | +DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
149 | +DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
150 | +DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
151 | +DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
152 | +DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
153 | |||
154 | DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32) | ||
155 | DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32) | ||
156 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32) | ||
157 | DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32) | ||
158 | DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32) | ||
159 | |||
160 | -DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32) | ||
161 | -DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32) | ||
162 | -DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32) | ||
163 | -DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32) | ||
164 | -DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32) | ||
165 | -DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32) | ||
166 | -DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32) | ||
167 | -DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32) | ||
168 | -DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32) | ||
169 | -DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32) | ||
170 | -DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32) | ||
171 | -DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32) | ||
172 | +DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
173 | +DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
174 | +DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
175 | +DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
176 | +DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
177 | +DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
178 | +DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
179 | +DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
180 | +DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
181 | +DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
182 | +DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
183 | +DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
184 | |||
185 | DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32) | ||
186 | DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32) | ||
187 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32) | ||
188 | DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32) | ||
189 | DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32) | ||
190 | |||
191 | -DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32) | ||
192 | -DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32) | ||
193 | -DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32) | ||
194 | -DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32) | ||
195 | -DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32) | ||
196 | -DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32) | ||
197 | -DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32) | ||
198 | -DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32) | ||
199 | -DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32) | ||
200 | -DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32) | ||
201 | -DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32) | ||
202 | -DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32) | ||
203 | +DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
204 | +DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
205 | +DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
206 | +DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
207 | +DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
208 | +DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
209 | +DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
210 | +DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
211 | +DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
212 | +DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
213 | +DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
214 | +DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
215 | |||
216 | DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32) | ||
217 | DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32) | ||
218 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
219 | DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
220 | DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
221 | |||
222 | -DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32) | ||
223 | -DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32) | ||
224 | +DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
225 | +DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
226 | DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) | ||
227 | DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) | ||
228 | |||
229 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsetallnez_h, void, env, i32, i32) | ||
230 | DEF_HELPER_3(vsetallnez_w, void, env, i32, i32) | ||
231 | DEF_HELPER_3(vsetallnez_d, void, env, i32, i32) | ||
232 | |||
233 | -DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32) | ||
234 | -DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32) | ||
235 | -DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32) | ||
236 | -DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32) | ||
237 | -DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32) | ||
238 | -DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32) | ||
239 | -DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32) | ||
240 | -DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32) | ||
241 | - | ||
242 | -DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32) | ||
243 | -DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32) | ||
244 | -DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32) | ||
245 | -DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32) | ||
246 | -DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32) | ||
247 | -DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32) | ||
248 | -DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32) | ||
249 | -DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32) | ||
250 | - | ||
251 | -DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32) | ||
252 | -DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32) | ||
253 | -DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32) | ||
254 | -DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32) | ||
255 | -DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32) | ||
256 | -DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32) | ||
257 | -DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32) | ||
258 | -DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32) | ||
259 | +DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
260 | +DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
261 | +DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
262 | +DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
263 | +DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
264 | +DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
265 | +DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
266 | +DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
267 | + | ||
268 | +DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
269 | +DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
270 | +DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
271 | +DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
272 | +DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
273 | +DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
274 | +DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
275 | +DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
276 | + | ||
277 | +DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
278 | +DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
279 | +DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
280 | +DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
281 | +DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
282 | +DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
283 | +DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
284 | +DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
285 | |||
286 | DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | ||
287 | -DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32) | ||
288 | -DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32) | ||
289 | -DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32) | ||
290 | +DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
291 | +DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
292 | +DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
293 | DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32) | ||
294 | DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32) | ||
295 | DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32) | ||
296 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
297 | index XXXXXXX..XXXXXXX 100644 | ||
298 | --- a/target/loongarch/vec_helper.c | ||
299 | +++ b/target/loongarch/vec_helper.c | ||
300 | @@ -XXX,XX +XXX,XX @@ | ||
301 | #define DO_SUB(a, b) (a - b) | ||
302 | |||
303 | #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
304 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
305 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
306 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
307 | { \ | ||
308 | int i; \ | ||
309 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
310 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
311 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
312 | + VReg *Vd = (VReg *)vd; \ | ||
313 | + VReg *Vj = (VReg *)vj; \ | ||
314 | + VReg *Vk = (VReg *)vk; \ | ||
315 | typedef __typeof(Vd->E1(0)) TD; \ | ||
316 | \ | ||
317 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
318 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) | ||
319 | DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) | ||
320 | DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) | ||
321 | |||
322 | -void HELPER(vhaddw_q_d)(CPULoongArchState *env, | ||
323 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
324 | +void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
325 | { | ||
326 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
327 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
328 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
329 | + VReg *Vd = (VReg *)vd; | ||
330 | + VReg *Vj = (VReg *)vj; | ||
331 | + VReg *Vk = (VReg *)vk; | ||
332 | |||
333 | Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
334 | } | ||
335 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) | ||
336 | DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) | ||
337 | DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) | ||
338 | |||
339 | -void HELPER(vhsubw_q_d)(CPULoongArchState *env, | ||
340 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
341 | +void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
342 | { | ||
343 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
344 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
345 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
346 | + VReg *Vd = (VReg *)vd; | ||
347 | + VReg *Vj = (VReg *)vj; | ||
348 | + VReg *Vk = (VReg *)vk; | ||
349 | |||
350 | Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
351 | } | ||
352 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) | ||
353 | DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) | ||
354 | DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) | ||
355 | |||
356 | -void HELPER(vhaddw_qu_du)(CPULoongArchState *env, | ||
357 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
358 | +void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
359 | { | ||
360 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
361 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
362 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
363 | + VReg *Vd = (VReg *)vd; | ||
364 | + VReg *Vj = (VReg *)vj; | ||
365 | + VReg *Vk = (VReg *)vk; | ||
366 | |||
367 | Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
368 | int128_make64((uint64_t)Vk->D(0))); | ||
369 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) | ||
370 | DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) | ||
371 | DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) | ||
372 | |||
373 | -void HELPER(vhsubw_qu_du)(CPULoongArchState *env, | ||
374 | - uint32_t vd, uint32_t vj, uint32_t vk) | ||
375 | +void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
376 | { | ||
377 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
378 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
379 | - VReg *Vk = &(env->fpr[vk].vreg); | ||
380 | + VReg *Vd = (VReg *)vd; | ||
381 | + VReg *Vj = (VReg *)vj; | ||
382 | + VReg *Vk = (VReg *)vk; | ||
383 | |||
384 | Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
385 | int128_make64((uint64_t)Vk->D(0))); | ||
386 | @@ -XXX,XX +XXX,XX @@ VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
387 | #define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ | ||
388 | unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | ||
389 | |||
390 | -#define VDIV(NAME, BIT, E, DO_OP) \ | ||
391 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
392 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
393 | -{ \ | ||
394 | - int i; \ | ||
395 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
396 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
397 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
398 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
399 | - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
400 | - } \ | ||
401 | +#define VDIV(NAME, BIT, E, DO_OP) \ | ||
402 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
403 | +{ \ | ||
404 | + int i; \ | ||
405 | + VReg *Vd = (VReg *)vd; \ | ||
406 | + VReg *Vj = (VReg *)vj; \ | ||
407 | + VReg *Vk = (VReg *)vk; \ | ||
408 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
409 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
410 | + } \ | ||
411 | } | ||
412 | |||
413 | VDIV(vdiv_b, 8, B, DO_DIV) | ||
414 | @@ -XXX,XX +XXX,XX @@ do_vsrlr(W, uint32_t) | ||
415 | do_vsrlr(D, uint64_t) | ||
416 | |||
417 | #define VSRLR(NAME, BIT, T, E) \ | ||
418 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
419 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
420 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
421 | { \ | ||
422 | int i; \ | ||
423 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
424 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
425 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
426 | + VReg *Vd = (VReg *)vd; \ | ||
427 | + VReg *Vj = (VReg *)vj; \ | ||
428 | + VReg *Vk = (VReg *)vk; \ | ||
429 | \ | ||
430 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
431 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
432 | @@ -XXX,XX +XXX,XX @@ do_vsrar(W, int32_t) | ||
433 | do_vsrar(D, int64_t) | ||
434 | |||
435 | #define VSRAR(NAME, BIT, T, E) \ | ||
436 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
437 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
438 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
439 | { \ | ||
440 | int i; \ | ||
441 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
442 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
443 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
444 | + VReg *Vd = (VReg *)vd; \ | ||
445 | + VReg *Vj = (VReg *)vj; \ | ||
446 | + VReg *Vk = (VReg *)vk; \ | ||
447 | \ | ||
448 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
449 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
450 | @@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_d, 64, D) | ||
451 | #define R_SHIFT(a, b) (a >> b) | ||
452 | |||
453 | #define VSRLN(NAME, BIT, T, E1, E2) \ | ||
454 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
455 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
456 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
457 | { \ | ||
458 | int i; \ | ||
459 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
460 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
461 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
462 | + VReg *Vd = (VReg *)vd; \ | ||
463 | + VReg *Vj = (VReg *)vj; \ | ||
464 | + VReg *Vk = (VReg *)vk; \ | ||
465 | \ | ||
466 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
467 | Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ | ||
468 | @@ -XXX,XX +XXX,XX @@ VSRLN(vsrln_b_h, 16, uint16_t, B, H) | ||
469 | VSRLN(vsrln_h_w, 32, uint32_t, H, W) | ||
470 | VSRLN(vsrln_w_d, 64, uint64_t, W, D) | ||
471 | |||
472 | -#define VSRAN(NAME, BIT, T, E1, E2) \ | ||
473 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
474 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
475 | -{ \ | ||
476 | - int i; \ | ||
477 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
478 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
479 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
480 | - \ | ||
481 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
482 | - Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ | ||
483 | - } \ | ||
484 | - Vd->D(1) = 0; \ | ||
485 | +#define VSRAN(NAME, BIT, T, E1, E2) \ | ||
486 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
487 | +{ \ | ||
488 | + int i; \ | ||
489 | + VReg *Vd = (VReg *)vd; \ | ||
490 | + VReg *Vj = (VReg *)vj; \ | ||
491 | + VReg *Vk = (VReg *)vk; \ | ||
492 | + \ | ||
493 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
494 | + Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ | ||
495 | + } \ | ||
496 | + Vd->D(1) = 0; \ | ||
497 | } | ||
498 | |||
499 | VSRAN(vsran_b_h, 16, uint16_t, B, H) | ||
500 | @@ -XXX,XX +XXX,XX @@ VSRANI(vsrani_h_w, 32, H, W) | ||
501 | VSRANI(vsrani_w_d, 64, W, D) | ||
502 | |||
503 | #define VSRLRN(NAME, BIT, T, E1, E2) \ | ||
504 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
505 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
506 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
507 | { \ | ||
508 | int i; \ | ||
509 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
510 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
511 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
512 | + VReg *Vd = (VReg *)vd; \ | ||
513 | + VReg *Vj = (VReg *)vj; \ | ||
514 | + VReg *Vk = (VReg *)vk; \ | ||
515 | \ | ||
516 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
517 | Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
518 | @@ -XXX,XX +XXX,XX @@ VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) | ||
519 | VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) | ||
520 | |||
521 | #define VSRARN(NAME, BIT, T, E1, E2) \ | ||
522 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
523 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
524 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
525 | { \ | ||
526 | int i; \ | ||
527 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
528 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
529 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
530 | + VReg *Vd = (VReg *)vd; \ | ||
531 | + VReg *Vj = (VReg *)vj; \ | ||
532 | + VReg *Vk = (VReg *)vk; \ | ||
533 | \ | ||
534 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
535 | Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
536 | @@ -XXX,XX +XXX,XX @@ SSRLNS(H, uint32_t, int32_t, uint16_t) | ||
537 | SSRLNS(W, uint64_t, int64_t, uint32_t) | ||
538 | |||
539 | #define VSSRLN(NAME, BIT, T, E1, E2) \ | ||
540 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
541 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
542 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
543 | { \ | ||
544 | int i; \ | ||
545 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
546 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
547 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
548 | + VReg *Vd = (VReg *)vd; \ | ||
549 | + VReg *Vj = (VReg *)vj; \ | ||
550 | + VReg *Vk = (VReg *)vk; \ | ||
551 | \ | ||
552 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
553 | Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ | ||
554 | @@ -XXX,XX +XXX,XX @@ SSRANS(H, int32_t, int16_t) | ||
555 | SSRANS(W, int64_t, int32_t) | ||
556 | |||
557 | #define VSSRAN(NAME, BIT, T, E1, E2) \ | ||
558 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
559 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
560 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
561 | { \ | ||
562 | int i; \ | ||
563 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
564 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
565 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
566 | + VReg *Vd = (VReg *)vd; \ | ||
567 | + VReg *Vj = (VReg *)vj; \ | ||
568 | + VReg *Vk = (VReg *)vk; \ | ||
569 | \ | ||
570 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
571 | Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
572 | @@ -XXX,XX +XXX,XX @@ SSRLNU(H, uint32_t, uint16_t, int32_t) | ||
573 | SSRLNU(W, uint64_t, uint32_t, int64_t) | ||
574 | |||
575 | #define VSSRLNU(NAME, BIT, T, E1, E2) \ | ||
576 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
577 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
578 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
579 | { \ | ||
580 | int i; \ | ||
581 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
582 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
583 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
584 | + VReg *Vd = (VReg *)vd; \ | ||
585 | + VReg *Vj = (VReg *)vj; \ | ||
586 | + VReg *Vk = (VReg *)vk; \ | ||
587 | \ | ||
588 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
589 | Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
590 | @@ -XXX,XX +XXX,XX @@ SSRANU(H, uint32_t, uint16_t, int32_t) | ||
591 | SSRANU(W, uint64_t, uint32_t, int64_t) | ||
592 | |||
593 | #define VSSRANU(NAME, BIT, T, E1, E2) \ | ||
594 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
595 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
596 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
597 | { \ | ||
598 | int i; \ | ||
599 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
600 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
601 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
602 | + VReg *Vd = (VReg *)vd; \ | ||
603 | + VReg *Vj = (VReg *)vj; \ | ||
604 | + VReg *Vk = (VReg *)vk; \ | ||
605 | \ | ||
606 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
607 | Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
608 | @@ -XXX,XX +XXX,XX @@ SSRLRNS(H, W, uint32_t, int32_t, uint16_t) | ||
609 | SSRLRNS(W, D, uint64_t, int64_t, uint32_t) | ||
610 | |||
611 | #define VSSRLRN(NAME, BIT, T, E1, E2) \ | ||
612 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
613 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
614 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
615 | { \ | ||
616 | int i; \ | ||
617 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
618 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
619 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
620 | + VReg *Vd = (VReg *)vd; \ | ||
621 | + VReg *Vj = (VReg *)vj; \ | ||
622 | + VReg *Vk = (VReg *)vk; \ | ||
623 | \ | ||
624 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
625 | Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
626 | @@ -XXX,XX +XXX,XX @@ SSRARNS(H, W, int32_t, int16_t) | ||
627 | SSRARNS(W, D, int64_t, int32_t) | ||
628 | |||
629 | #define VSSRARN(NAME, BIT, T, E1, E2) \ | ||
630 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
631 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
632 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
633 | { \ | ||
634 | int i; \ | ||
635 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
636 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
637 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
638 | + VReg *Vd = (VReg *)vd; \ | ||
639 | + VReg *Vj = (VReg *)vj; \ | ||
640 | + VReg *Vk = (VReg *)vk; \ | ||
641 | \ | ||
642 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
643 | Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
644 | @@ -XXX,XX +XXX,XX @@ SSRLRNU(H, W, uint32_t, uint16_t, int32_t) | ||
645 | SSRLRNU(W, D, uint64_t, uint32_t, int64_t) | ||
646 | |||
647 | #define VSSRLRNU(NAME, BIT, T, E1, E2) \ | ||
648 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
649 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
650 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
651 | { \ | ||
652 | int i; \ | ||
653 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
654 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
655 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
656 | + VReg *Vd = (VReg *)vd; \ | ||
657 | + VReg *Vj = (VReg *)vj; \ | ||
658 | + VReg *Vk = (VReg *)vk; \ | ||
659 | \ | ||
660 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
661 | Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
662 | @@ -XXX,XX +XXX,XX @@ SSRARNU(H, W, uint32_t, uint16_t, int32_t) | ||
663 | SSRARNU(W, D, uint64_t, uint32_t, int64_t) | ||
664 | |||
665 | #define VSSRARNU(NAME, BIT, T, E1, E2) \ | ||
666 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
667 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
668 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
669 | { \ | ||
670 | int i; \ | ||
671 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
672 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
673 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
674 | + VReg *Vd = (VReg *)vd; \ | ||
675 | + VReg *Vj = (VReg *)vj; \ | ||
676 | + VReg *Vk = (VReg *)vk; \ | ||
677 | \ | ||
678 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
679 | Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
680 | @@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) | ||
681 | DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) | ||
682 | DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) | ||
683 | |||
684 | -#define VFRSTP(NAME, BIT, MASK, E) \ | ||
685 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
686 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
687 | -{ \ | ||
688 | - int i, m; \ | ||
689 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
690 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
691 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
692 | - \ | ||
693 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
694 | - if (Vj->E(i) < 0) { \ | ||
695 | - break; \ | ||
696 | - } \ | ||
697 | - } \ | ||
698 | - m = Vk->E(0) & MASK; \ | ||
699 | - Vd->E(m) = i; \ | ||
700 | +#define VFRSTP(NAME, BIT, MASK, E) \ | ||
701 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
702 | +{ \ | ||
703 | + int i, m; \ | ||
704 | + VReg *Vd = (VReg *)vd; \ | ||
705 | + VReg *Vj = (VReg *)vj; \ | ||
706 | + VReg *Vk = (VReg *)vk; \ | ||
707 | + \ | ||
708 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
709 | + if (Vj->E(i) < 0) { \ | ||
710 | + break; \ | ||
711 | + } \ | ||
712 | + } \ | ||
713 | + m = Vk->E(0) & MASK; \ | ||
714 | + Vd->E(m) = i; \ | ||
715 | } | ||
716 | |||
717 | VFRSTP(vfrstp_b, 8, 0xf, B) | ||
718 | @@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_h, MO_16) | ||
719 | SETALLNEZ(vsetallnez_w, MO_32) | ||
720 | SETALLNEZ(vsetallnez_d, MO_64) | ||
721 | |||
722 | -#define VPACKEV(NAME, BIT, E) \ | ||
723 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
724 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
725 | -{ \ | ||
726 | - int i; \ | ||
727 | - VReg temp; \ | ||
728 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
729 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
730 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
731 | - \ | ||
732 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
733 | - temp.E(2 * i + 1) = Vj->E(2 * i); \ | ||
734 | - temp.E(2 *i) = Vk->E(2 * i); \ | ||
735 | - } \ | ||
736 | - *Vd = temp; \ | ||
737 | +#define VPACKEV(NAME, BIT, E) \ | ||
738 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
739 | +{ \ | ||
740 | + int i; \ | ||
741 | + VReg temp; \ | ||
742 | + VReg *Vd = (VReg *)vd; \ | ||
743 | + VReg *Vj = (VReg *)vj; \ | ||
744 | + VReg *Vk = (VReg *)vk; \ | ||
745 | + \ | ||
746 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
747 | + temp.E(2 * i + 1) = Vj->E(2 * i); \ | ||
748 | + temp.E(2 *i) = Vk->E(2 * i); \ | ||
749 | + } \ | ||
750 | + *Vd = temp; \ | ||
751 | } | ||
752 | |||
753 | VPACKEV(vpackev_b, 16, B) | ||
754 | @@ -XXX,XX +XXX,XX @@ VPACKEV(vpackev_h, 32, H) | ||
755 | VPACKEV(vpackev_w, 64, W) | ||
756 | VPACKEV(vpackev_d, 128, D) | ||
757 | |||
758 | -#define VPACKOD(NAME, BIT, E) \ | ||
759 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
760 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
761 | -{ \ | ||
762 | - int i; \ | ||
763 | - VReg temp; \ | ||
764 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
765 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
766 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
767 | - \ | ||
768 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
769 | - temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ | ||
770 | - temp.E(2 * i) = Vk->E(2 * i + 1); \ | ||
771 | - } \ | ||
772 | - *Vd = temp; \ | ||
773 | +#define VPACKOD(NAME, BIT, E) \ | ||
774 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
775 | +{ \ | ||
776 | + int i; \ | ||
777 | + VReg temp; \ | ||
778 | + VReg *Vd = (VReg *)vd; \ | ||
779 | + VReg *Vj = (VReg *)vj; \ | ||
780 | + VReg *Vk = (VReg *)vk; \ | ||
781 | + \ | ||
782 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
783 | + temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ | ||
784 | + temp.E(2 * i) = Vk->E(2 * i + 1); \ | ||
785 | + } \ | ||
786 | + *Vd = temp; \ | ||
787 | } | ||
788 | |||
789 | VPACKOD(vpackod_b, 16, B) | ||
790 | @@ -XXX,XX +XXX,XX @@ VPACKOD(vpackod_h, 32, H) | ||
791 | VPACKOD(vpackod_w, 64, W) | ||
792 | VPACKOD(vpackod_d, 128, D) | ||
793 | |||
794 | -#define VPICKEV(NAME, BIT, E) \ | ||
795 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
796 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
797 | -{ \ | ||
798 | - int i; \ | ||
799 | - VReg temp; \ | ||
800 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
801 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
802 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
803 | - \ | ||
804 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
805 | - temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ | ||
806 | - temp.E(i) = Vk->E(2 * i); \ | ||
807 | - } \ | ||
808 | - *Vd = temp; \ | ||
809 | +#define VPICKEV(NAME, BIT, E) \ | ||
810 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
811 | +{ \ | ||
812 | + int i; \ | ||
813 | + VReg temp; \ | ||
814 | + VReg *Vd = (VReg *)vd; \ | ||
815 | + VReg *Vj = (VReg *)vj; \ | ||
816 | + VReg *Vk = (VReg *)vk; \ | ||
817 | + \ | ||
818 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
819 | + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ | ||
820 | + temp.E(i) = Vk->E(2 * i); \ | ||
821 | + } \ | ||
822 | + *Vd = temp; \ | ||
823 | } | ||
824 | |||
825 | VPICKEV(vpickev_b, 16, B) | ||
826 | @@ -XXX,XX +XXX,XX @@ VPICKEV(vpickev_h, 32, H) | ||
827 | VPICKEV(vpickev_w, 64, W) | ||
828 | VPICKEV(vpickev_d, 128, D) | ||
829 | |||
830 | -#define VPICKOD(NAME, BIT, E) \ | ||
831 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
832 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
833 | -{ \ | ||
834 | - int i; \ | ||
835 | - VReg temp; \ | ||
836 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
837 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
838 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
839 | - \ | ||
840 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
841 | - temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ | ||
842 | - temp.E(i) = Vk->E(2 * i + 1); \ | ||
843 | - } \ | ||
844 | - *Vd = temp; \ | ||
845 | +#define VPICKOD(NAME, BIT, E) \ | ||
846 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
847 | +{ \ | ||
848 | + int i; \ | ||
849 | + VReg temp; \ | ||
850 | + VReg *Vd = (VReg *)vd; \ | ||
851 | + VReg *Vj = (VReg *)vj; \ | ||
852 | + VReg *Vk = (VReg *)vk; \ | ||
853 | + \ | ||
854 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
855 | + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ | ||
856 | + temp.E(i) = Vk->E(2 * i + 1); \ | ||
857 | + } \ | ||
858 | + *Vd = temp; \ | ||
859 | } | ||
860 | |||
861 | VPICKOD(vpickod_b, 16, B) | ||
862 | @@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_h, 32, H) | ||
863 | VPICKOD(vpickod_w, 64, W) | ||
864 | VPICKOD(vpickod_d, 128, D) | ||
865 | |||
866 | -#define VILVL(NAME, BIT, E) \ | ||
867 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
868 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
869 | -{ \ | ||
870 | - int i; \ | ||
871 | - VReg temp; \ | ||
872 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
873 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
874 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
875 | - \ | ||
876 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
877 | - temp.E(2 * i + 1) = Vj->E(i); \ | ||
878 | - temp.E(2 * i) = Vk->E(i); \ | ||
879 | - } \ | ||
880 | - *Vd = temp; \ | ||
881 | +#define VILVL(NAME, BIT, E) \ | ||
882 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
883 | +{ \ | ||
884 | + int i; \ | ||
885 | + VReg temp; \ | ||
886 | + VReg *Vd = (VReg *)vd; \ | ||
887 | + VReg *Vj = (VReg *)vj; \ | ||
888 | + VReg *Vk = (VReg *)vk; \ | ||
889 | + \ | ||
890 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
891 | + temp.E(2 * i + 1) = Vj->E(i); \ | ||
892 | + temp.E(2 * i) = Vk->E(i); \ | ||
893 | + } \ | ||
894 | + *Vd = temp; \ | ||
895 | } | ||
896 | |||
897 | VILVL(vilvl_b, 16, B) | ||
898 | @@ -XXX,XX +XXX,XX @@ VILVL(vilvl_h, 32, H) | ||
899 | VILVL(vilvl_w, 64, W) | ||
900 | VILVL(vilvl_d, 128, D) | ||
901 | |||
902 | -#define VILVH(NAME, BIT, E) \ | ||
903 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
904 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
905 | -{ \ | ||
906 | - int i; \ | ||
907 | - VReg temp; \ | ||
908 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
909 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
910 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
911 | - \ | ||
912 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
913 | - temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ | ||
914 | - temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ | ||
915 | - } \ | ||
916 | - *Vd = temp; \ | ||
917 | +#define VILVH(NAME, BIT, E) \ | ||
918 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
919 | +{ \ | ||
920 | + int i; \ | ||
921 | + VReg temp; \ | ||
922 | + VReg *Vd = (VReg *)vd; \ | ||
923 | + VReg *Vj = (VReg *)vj; \ | ||
924 | + VReg *Vk = (VReg *)vk; \ | ||
925 | + \ | ||
926 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
927 | + temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ | ||
928 | + temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ | ||
929 | + } \ | ||
930 | + *Vd = temp; \ | ||
931 | } | ||
932 | |||
933 | VILVH(vilvh_b, 16, B) | ||
934 | @@ -XXX,XX +XXX,XX @@ void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) | ||
935 | *Vd = temp; | ||
936 | } | ||
937 | |||
938 | -#define VSHUF(NAME, BIT, E) \ | ||
939 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
940 | - uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
941 | -{ \ | ||
942 | - int i, m; \ | ||
943 | - VReg temp; \ | ||
944 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
945 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
946 | - VReg *Vk = &(env->fpr[vk].vreg); \ | ||
947 | - \ | ||
948 | - m = LSX_LEN/BIT; \ | ||
949 | - for (i = 0; i < m; i++) { \ | ||
950 | - uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ | ||
951 | - temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ | ||
952 | - } \ | ||
953 | - *Vd = temp; \ | ||
954 | +#define VSHUF(NAME, BIT, E) \ | ||
955 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
956 | +{ \ | ||
957 | + int i, m; \ | ||
958 | + VReg temp; \ | ||
959 | + VReg *Vd = (VReg *)vd; \ | ||
960 | + VReg *Vj = (VReg *)vj; \ | ||
961 | + VReg *Vk = (VReg *)vk; \ | ||
962 | + \ | ||
963 | + m = LSX_LEN/BIT; \ | ||
964 | + for (i = 0; i < m; i++) { \ | ||
965 | + uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ | ||
966 | + temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ | ||
967 | + } \ | ||
968 | + *Vd = temp; \ | ||
969 | } | ||
970 | |||
971 | VSHUF(vshuf_h, 16, H) | ||
972 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
973 | index XXXXXXX..XXXXXXX 100644 | ||
974 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
975 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
976 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, | ||
977 | return gen_vvv_ptr_vl(ctx, a, 16, fn); | ||
978 | } | ||
979 | |||
980 | -static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
981 | - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
982 | +static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
983 | + gen_helper_gvec_3 *fn) | ||
984 | { | ||
985 | - TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
986 | - TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
987 | - TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
988 | + tcg_gen_gvec_3_ool(vec_full_offset(a->vd), | ||
989 | + vec_full_offset(a->vj), | ||
990 | + vec_full_offset(a->vk), | ||
991 | + oprsz, ctx->vl / 8, 0, fn); | ||
992 | + return true; | ||
993 | +} | ||
994 | |||
995 | +static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
996 | +{ | ||
997 | CHECK_SXE; | ||
998 | - | ||
999 | - func(cpu_env, vd, vj, vk); | ||
1000 | - return true; | ||
1001 | + return gen_vvv_vl(ctx, a, 16, fn); | ||
1002 | } | ||
1003 | |||
1004 | static bool gen_vv(DisasContext *ctx, arg_vv *a, | ||
1005 | -- | ||
1006 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-8-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 118 +++++++------- | ||
6 | target/loongarch/vec_helper.c | 161 +++++++++++--------- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 129 +++++++++------- | ||
8 | 3 files changed, 219 insertions(+), 189 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
15 | DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
16 | DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
17 | |||
18 | -DEF_HELPER_3(vflogb_s, void, env, i32, i32) | ||
19 | -DEF_HELPER_3(vflogb_d, void, env, i32, i32) | ||
20 | - | ||
21 | -DEF_HELPER_3(vfclass_s, void, env, i32, i32) | ||
22 | -DEF_HELPER_3(vfclass_d, void, env, i32, i32) | ||
23 | - | ||
24 | -DEF_HELPER_3(vfsqrt_s, void, env, i32, i32) | ||
25 | -DEF_HELPER_3(vfsqrt_d, void, env, i32, i32) | ||
26 | -DEF_HELPER_3(vfrecip_s, void, env, i32, i32) | ||
27 | -DEF_HELPER_3(vfrecip_d, void, env, i32, i32) | ||
28 | -DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32) | ||
29 | -DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32) | ||
30 | - | ||
31 | -DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32) | ||
32 | -DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32) | ||
33 | -DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32) | ||
34 | -DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32) | ||
35 | +DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
37 | + | ||
38 | +DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
42 | +DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
43 | +DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
45 | +DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
46 | +DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
47 | + | ||
48 | +DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
49 | +DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
50 | +DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
51 | +DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
52 | DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
53 | DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
54 | |||
55 | -DEF_HELPER_3(vfrintrne_s, void, env, i32, i32) | ||
56 | -DEF_HELPER_3(vfrintrne_d, void, env, i32, i32) | ||
57 | -DEF_HELPER_3(vfrintrz_s, void, env, i32, i32) | ||
58 | -DEF_HELPER_3(vfrintrz_d, void, env, i32, i32) | ||
59 | -DEF_HELPER_3(vfrintrp_s, void, env, i32, i32) | ||
60 | -DEF_HELPER_3(vfrintrp_d, void, env, i32, i32) | ||
61 | -DEF_HELPER_3(vfrintrm_s, void, env, i32, i32) | ||
62 | -DEF_HELPER_3(vfrintrm_d, void, env, i32, i32) | ||
63 | -DEF_HELPER_3(vfrint_s, void, env, i32, i32) | ||
64 | -DEF_HELPER_3(vfrint_d, void, env, i32, i32) | ||
65 | - | ||
66 | -DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32) | ||
67 | -DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32) | ||
68 | -DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32) | ||
69 | -DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32) | ||
70 | -DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32) | ||
71 | -DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32) | ||
72 | -DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32) | ||
73 | -DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32) | ||
74 | -DEF_HELPER_3(vftint_w_s, void, env, i32, i32) | ||
75 | -DEF_HELPER_3(vftint_l_d, void, env, i32, i32) | ||
76 | -DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32) | ||
77 | -DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32) | ||
78 | -DEF_HELPER_3(vftint_wu_s, void, env, i32, i32) | ||
79 | -DEF_HELPER_3(vftint_lu_d, void, env, i32, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
85 | +DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
86 | +DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
88 | +DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
89 | +DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
90 | + | ||
91 | +DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
92 | +DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
93 | +DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
94 | +DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
95 | +DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
96 | +DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
98 | +DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
99 | +DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
100 | +DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
101 | +DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
102 | +DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
103 | +DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
104 | +DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
105 | DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
106 | DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
107 | DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
108 | DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
109 | DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
110 | -DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32) | ||
111 | -DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32) | ||
112 | -DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32) | ||
113 | -DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32) | ||
114 | -DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32) | ||
115 | -DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32) | ||
116 | -DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32) | ||
117 | -DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32) | ||
118 | -DEF_HELPER_3(vftintl_l_s, void, env, i32, i32) | ||
119 | -DEF_HELPER_3(vftinth_l_s, void, env, i32, i32) | ||
120 | - | ||
121 | -DEF_HELPER_3(vffint_s_w, void, env, i32, i32) | ||
122 | -DEF_HELPER_3(vffint_d_l, void, env, i32, i32) | ||
123 | -DEF_HELPER_3(vffint_s_wu, void, env, i32, i32) | ||
124 | -DEF_HELPER_3(vffint_d_lu, void, env, i32, i32) | ||
125 | -DEF_HELPER_3(vffintl_d_w, void, env, i32, i32) | ||
126 | -DEF_HELPER_3(vffinth_d_w, void, env, i32, i32) | ||
127 | +DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
128 | +DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
129 | +DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
130 | +DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
131 | +DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
132 | +DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
133 | +DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
134 | +DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
135 | +DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
136 | +DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
137 | + | ||
138 | +DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
139 | +DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
140 | +DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
141 | +DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
142 | +DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
143 | +DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) | ||
144 | DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
145 | |||
146 | DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
147 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
148 | index XXXXXXX..XXXXXXX 100644 | ||
149 | --- a/target/loongarch/vec_helper.c | ||
150 | +++ b/target/loongarch/vec_helper.c | ||
151 | @@ -XXX,XX +XXX,XX @@ DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, | ||
152 | DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, | ||
153 | float_muladd_negate_c | float_muladd_negate_result) | ||
154 | |||
155 | -#define DO_2OP_F(NAME, BIT, E, FN) \ | ||
156 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
157 | -{ \ | ||
158 | - int i; \ | ||
159 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
160 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
161 | - \ | ||
162 | - vec_clear_cause(env); \ | ||
163 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
164 | - Vd->E(i) = FN(env, Vj->E(i)); \ | ||
165 | - } \ | ||
166 | +#define DO_2OP_F(NAME, BIT, E, FN) \ | ||
167 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
168 | + CPULoongArchState *env, uint32_t desc) \ | ||
169 | +{ \ | ||
170 | + int i; \ | ||
171 | + VReg *Vd = (VReg *)vd; \ | ||
172 | + VReg *Vj = (VReg *)vj; \ | ||
173 | + \ | ||
174 | + vec_clear_cause(env); \ | ||
175 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
176 | + Vd->E(i) = FN(env, Vj->E(i)); \ | ||
177 | + } \ | ||
178 | } | ||
179 | |||
180 | #define FLOGB(BIT, T) \ | ||
181 | @@ -XXX,XX +XXX,XX @@ static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ | ||
182 | FLOGB(32, uint32_t) | ||
183 | FLOGB(64, uint64_t) | ||
184 | |||
185 | -#define FCLASS(NAME, BIT, E, FN) \ | ||
186 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
187 | -{ \ | ||
188 | - int i; \ | ||
189 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
190 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
191 | - \ | ||
192 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
193 | - Vd->E(i) = FN(env, Vj->E(i)); \ | ||
194 | - } \ | ||
195 | +#define FCLASS(NAME, BIT, E, FN) \ | ||
196 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
197 | + CPULoongArchState *env, uint32_t desc) \ | ||
198 | +{ \ | ||
199 | + int i; \ | ||
200 | + VReg *Vd = (VReg *)vd; \ | ||
201 | + VReg *Vj = (VReg *)vj; \ | ||
202 | + \ | ||
203 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
204 | + Vd->E(i) = FN(env, Vj->E(i)); \ | ||
205 | + } \ | ||
206 | } | ||
207 | |||
208 | FCLASS(vfclass_s, 32, UW, helper_fclass_s) | ||
209 | @@ -XXX,XX +XXX,XX @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status) | ||
210 | return float64_to_float32(d, status); | ||
211 | } | ||
212 | |||
213 | -void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
214 | +void HELPER(vfcvtl_s_h)(void *vd, void *vj, | ||
215 | + CPULoongArchState *env, uint32_t desc) | ||
216 | { | ||
217 | int i; | ||
218 | VReg temp; | ||
219 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
220 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
221 | + VReg *Vd = (VReg *)vd; | ||
222 | + VReg *Vj = (VReg *)vj; | ||
223 | |||
224 | vec_clear_cause(env); | ||
225 | for (i = 0; i < LSX_LEN/32; i++) { | ||
226 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
227 | *Vd = temp; | ||
228 | } | ||
229 | |||
230 | -void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
231 | +void HELPER(vfcvtl_d_s)(void *vd, void *vj, | ||
232 | + CPULoongArchState *env, uint32_t desc) | ||
233 | { | ||
234 | int i; | ||
235 | VReg temp; | ||
236 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
237 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
238 | + VReg *Vd = (VReg *)vd; | ||
239 | + VReg *Vj = (VReg *)vj; | ||
240 | |||
241 | vec_clear_cause(env); | ||
242 | for (i = 0; i < LSX_LEN/64; i++) { | ||
243 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
244 | *Vd = temp; | ||
245 | } | ||
246 | |||
247 | -void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
248 | +void HELPER(vfcvth_s_h)(void *vd, void *vj, | ||
249 | + CPULoongArchState *env, uint32_t desc) | ||
250 | { | ||
251 | int i; | ||
252 | VReg temp; | ||
253 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
254 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
255 | + VReg *Vd = (VReg *)vd; | ||
256 | + VReg *Vj = (VReg *)vj; | ||
257 | |||
258 | vec_clear_cause(env); | ||
259 | for (i = 0; i < LSX_LEN/32; i++) { | ||
260 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
261 | *Vd = temp; | ||
262 | } | ||
263 | |||
264 | -void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
265 | +void HELPER(vfcvth_d_s)(void *vd, void *vj, | ||
266 | + CPULoongArchState *env, uint32_t desc) | ||
267 | { | ||
268 | int i; | ||
269 | VReg temp; | ||
270 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
271 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
272 | + VReg *Vd = (VReg *)vd; | ||
273 | + VReg *Vj = (VReg *)vj; | ||
274 | |||
275 | vec_clear_cause(env); | ||
276 | for (i = 0; i < LSX_LEN/64; i++) { | ||
277 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, | ||
278 | *Vd = temp; | ||
279 | } | ||
280 | |||
281 | -void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
282 | +void HELPER(vfrint_s)(void *vd, void *vj, | ||
283 | + CPULoongArchState *env, uint32_t desc) | ||
284 | { | ||
285 | int i; | ||
286 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
287 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
288 | + VReg *Vd = (VReg *)vd; | ||
289 | + VReg *Vj = (VReg *)vj; | ||
290 | |||
291 | vec_clear_cause(env); | ||
292 | for (i = 0; i < 4; i++) { | ||
293 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
294 | } | ||
295 | } | ||
296 | |||
297 | -void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
298 | +void HELPER(vfrint_d)(void *vd, void *vj, | ||
299 | + CPULoongArchState *env, uint32_t desc) | ||
300 | { | ||
301 | int i; | ||
302 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
303 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
304 | + VReg *Vd = (VReg *)vd; | ||
305 | + VReg *Vj = (VReg *)vj; | ||
306 | |||
307 | vec_clear_cause(env); | ||
308 | for (i = 0; i < 2; i++) { | ||
309 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
310 | } | ||
311 | |||
312 | #define FCVT_2OP(NAME, BIT, E, MODE) \ | ||
313 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
314 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
315 | + CPULoongArchState *env, uint32_t desc) \ | ||
316 | { \ | ||
317 | int i; \ | ||
318 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
319 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
320 | + VReg *Vd = (VReg *)vd; \ | ||
321 | + VReg *Vj = (VReg *)vj; \ | ||
322 | \ | ||
323 | vec_clear_cause(env); \ | ||
324 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
325 | @@ -XXX,XX +XXX,XX @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | ||
326 | FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | ||
327 | FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | ||
328 | |||
329 | -#define FTINTL_L_S(NAME, FN) \ | ||
330 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
331 | -{ \ | ||
332 | - int i; \ | ||
333 | - VReg temp; \ | ||
334 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
335 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
336 | - \ | ||
337 | - vec_clear_cause(env); \ | ||
338 | - for (i = 0; i < 2; i++) { \ | ||
339 | - temp.D(i) = FN(env, Vj->UW(i)); \ | ||
340 | - } \ | ||
341 | - *Vd = temp; \ | ||
342 | +#define FTINTL_L_S(NAME, FN) \ | ||
343 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
344 | + CPULoongArchState *env, uint32_t desc) \ | ||
345 | +{ \ | ||
346 | + int i; \ | ||
347 | + VReg temp; \ | ||
348 | + VReg *Vd = (VReg *)vd; \ | ||
349 | + VReg *Vj = (VReg *)vj; \ | ||
350 | + \ | ||
351 | + vec_clear_cause(env); \ | ||
352 | + for (i = 0; i < 2; i++) { \ | ||
353 | + temp.D(i) = FN(env, Vj->UW(i)); \ | ||
354 | + } \ | ||
355 | + *Vd = temp; \ | ||
356 | } | ||
357 | |||
358 | FTINTL_L_S(vftintl_l_s, do_float32_to_int64) | ||
359 | @@ -XXX,XX +XXX,XX @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) | ||
360 | FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) | ||
361 | FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) | ||
362 | |||
363 | -#define FTINTH_L_S(NAME, FN) \ | ||
364 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
365 | -{ \ | ||
366 | - int i; \ | ||
367 | - VReg temp; \ | ||
368 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
369 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
370 | - \ | ||
371 | - vec_clear_cause(env); \ | ||
372 | - for (i = 0; i < 2; i++) { \ | ||
373 | - temp.D(i) = FN(env, Vj->UW(i + 2)); \ | ||
374 | - } \ | ||
375 | - *Vd = temp; \ | ||
376 | +#define FTINTH_L_S(NAME, FN) \ | ||
377 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
378 | + CPULoongArchState *env, uint32_t desc) \ | ||
379 | +{ \ | ||
380 | + int i; \ | ||
381 | + VReg temp; \ | ||
382 | + VReg *Vd = (VReg *)vd; \ | ||
383 | + VReg *Vj = (VReg *)vj; \ | ||
384 | + \ | ||
385 | + vec_clear_cause(env); \ | ||
386 | + for (i = 0; i < 2; i++) { \ | ||
387 | + temp.D(i) = FN(env, Vj->UW(i + 2)); \ | ||
388 | + } \ | ||
389 | + *Vd = temp; \ | ||
390 | } | ||
391 | |||
392 | FTINTH_L_S(vftinth_l_s, do_float32_to_int64) | ||
393 | @@ -XXX,XX +XXX,XX @@ DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) | ||
394 | DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) | ||
395 | DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) | ||
396 | |||
397 | -void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
398 | +void HELPER(vffintl_d_w)(void *vd, void *vj, | ||
399 | + CPULoongArchState *env, uint32_t desc) | ||
400 | { | ||
401 | int i; | ||
402 | VReg temp; | ||
403 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
404 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
405 | + VReg *Vd = (VReg *)vd; | ||
406 | + VReg *Vj = (VReg *)vj; | ||
407 | |||
408 | vec_clear_cause(env); | ||
409 | for (i = 0; i < 2; i++) { | ||
410 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
411 | *Vd = temp; | ||
412 | } | ||
413 | |||
414 | -void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
415 | +void HELPER(vffinth_d_w)(void *vd, void *vj, | ||
416 | + CPULoongArchState *env, uint32_t desc) | ||
417 | { | ||
418 | int i; | ||
419 | VReg temp; | ||
420 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
421 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
422 | + VReg *Vd = (VReg *)vd; | ||
423 | + VReg *Vj = (VReg *)vj; | ||
424 | |||
425 | vec_clear_cause(env); | ||
426 | for (i = 0; i < 2; i++) { | ||
427 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
428 | index XXXXXXX..XXXXXXX 100644 | ||
429 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
430 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
431 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
432 | return gen_vvv_vl(ctx, a, 16, fn); | ||
433 | } | ||
434 | |||
435 | +static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
436 | + gen_helper_gvec_2_ptr *fn) | ||
437 | +{ | ||
438 | + tcg_gen_gvec_2_ptr(vec_full_offset(a->vd), | ||
439 | + vec_full_offset(a->vj), | ||
440 | + cpu_env, | ||
441 | + oprsz, ctx->vl / 8, 0, fn); | ||
442 | + return true; | ||
443 | +} | ||
444 | + | ||
445 | +static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, | ||
446 | + gen_helper_gvec_2_ptr *fn) | ||
447 | +{ | ||
448 | + CHECK_SXE; | ||
449 | + return gen_vv_ptr_vl(ctx, a, 16, fn); | ||
450 | +} | ||
451 | + | ||
452 | static bool gen_vv(DisasContext *ctx, arg_vv *a, | ||
453 | void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
454 | { | ||
455 | @@ -XXX,XX +XXX,XX @@ TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d) | ||
456 | TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s) | ||
457 | TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d) | ||
458 | |||
459 | -TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s) | ||
460 | -TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d) | ||
461 | +TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s) | ||
462 | +TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d) | ||
463 | |||
464 | -TRANS(vfclass_s, LSX, gen_vv, gen_helper_vfclass_s) | ||
465 | -TRANS(vfclass_d, LSX, gen_vv, gen_helper_vfclass_d) | ||
466 | +TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s) | ||
467 | +TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d) | ||
468 | |||
469 | -TRANS(vfsqrt_s, LSX, gen_vv, gen_helper_vfsqrt_s) | ||
470 | -TRANS(vfsqrt_d, LSX, gen_vv, gen_helper_vfsqrt_d) | ||
471 | -TRANS(vfrecip_s, LSX, gen_vv, gen_helper_vfrecip_s) | ||
472 | -TRANS(vfrecip_d, LSX, gen_vv, gen_helper_vfrecip_d) | ||
473 | -TRANS(vfrsqrt_s, LSX, gen_vv, gen_helper_vfrsqrt_s) | ||
474 | -TRANS(vfrsqrt_d, LSX, gen_vv, gen_helper_vfrsqrt_d) | ||
475 | +TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s) | ||
476 | +TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d) | ||
477 | +TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s) | ||
478 | +TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d) | ||
479 | +TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) | ||
480 | +TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) | ||
481 | |||
482 | -TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h) | ||
483 | -TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h) | ||
484 | -TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s) | ||
485 | -TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s) | ||
486 | +TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h) | ||
487 | +TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h) | ||
488 | +TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s) | ||
489 | +TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s) | ||
490 | TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s) | ||
491 | TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d) | ||
492 | |||
493 | -TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s) | ||
494 | -TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d) | ||
495 | -TRANS(vfrintrz_s, LSX, gen_vv, gen_helper_vfrintrz_s) | ||
496 | -TRANS(vfrintrz_d, LSX, gen_vv, gen_helper_vfrintrz_d) | ||
497 | -TRANS(vfrintrp_s, LSX, gen_vv, gen_helper_vfrintrp_s) | ||
498 | -TRANS(vfrintrp_d, LSX, gen_vv, gen_helper_vfrintrp_d) | ||
499 | -TRANS(vfrintrm_s, LSX, gen_vv, gen_helper_vfrintrm_s) | ||
500 | -TRANS(vfrintrm_d, LSX, gen_vv, gen_helper_vfrintrm_d) | ||
501 | -TRANS(vfrint_s, LSX, gen_vv, gen_helper_vfrint_s) | ||
502 | -TRANS(vfrint_d, LSX, gen_vv, gen_helper_vfrint_d) | ||
503 | - | ||
504 | -TRANS(vftintrne_w_s, LSX, gen_vv, gen_helper_vftintrne_w_s) | ||
505 | -TRANS(vftintrne_l_d, LSX, gen_vv, gen_helper_vftintrne_l_d) | ||
506 | -TRANS(vftintrz_w_s, LSX, gen_vv, gen_helper_vftintrz_w_s) | ||
507 | -TRANS(vftintrz_l_d, LSX, gen_vv, gen_helper_vftintrz_l_d) | ||
508 | -TRANS(vftintrp_w_s, LSX, gen_vv, gen_helper_vftintrp_w_s) | ||
509 | -TRANS(vftintrp_l_d, LSX, gen_vv, gen_helper_vftintrp_l_d) | ||
510 | -TRANS(vftintrm_w_s, LSX, gen_vv, gen_helper_vftintrm_w_s) | ||
511 | -TRANS(vftintrm_l_d, LSX, gen_vv, gen_helper_vftintrm_l_d) | ||
512 | -TRANS(vftint_w_s, LSX, gen_vv, gen_helper_vftint_w_s) | ||
513 | -TRANS(vftint_l_d, LSX, gen_vv, gen_helper_vftint_l_d) | ||
514 | -TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s) | ||
515 | -TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d) | ||
516 | -TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s) | ||
517 | -TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d) | ||
518 | +TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s) | ||
519 | +TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d) | ||
520 | +TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s) | ||
521 | +TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d) | ||
522 | +TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s) | ||
523 | +TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d) | ||
524 | +TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s) | ||
525 | +TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d) | ||
526 | +TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s) | ||
527 | +TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d) | ||
528 | + | ||
529 | +TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s) | ||
530 | +TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d) | ||
531 | +TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s) | ||
532 | +TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d) | ||
533 | +TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s) | ||
534 | +TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d) | ||
535 | +TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s) | ||
536 | +TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d) | ||
537 | +TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s) | ||
538 | +TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d) | ||
539 | +TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s) | ||
540 | +TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d) | ||
541 | +TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s) | ||
542 | +TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d) | ||
543 | TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d) | ||
544 | TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d) | ||
545 | TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d) | ||
546 | TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d) | ||
547 | TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d) | ||
548 | -TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s) | ||
549 | -TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s) | ||
550 | -TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s) | ||
551 | -TRANS(vftintrzh_l_s, LSX, gen_vv, gen_helper_vftintrzh_l_s) | ||
552 | -TRANS(vftintrpl_l_s, LSX, gen_vv, gen_helper_vftintrpl_l_s) | ||
553 | -TRANS(vftintrph_l_s, LSX, gen_vv, gen_helper_vftintrph_l_s) | ||
554 | -TRANS(vftintrml_l_s, LSX, gen_vv, gen_helper_vftintrml_l_s) | ||
555 | -TRANS(vftintrmh_l_s, LSX, gen_vv, gen_helper_vftintrmh_l_s) | ||
556 | -TRANS(vftintl_l_s, LSX, gen_vv, gen_helper_vftintl_l_s) | ||
557 | -TRANS(vftinth_l_s, LSX, gen_vv, gen_helper_vftinth_l_s) | ||
558 | - | ||
559 | -TRANS(vffint_s_w, LSX, gen_vv, gen_helper_vffint_s_w) | ||
560 | -TRANS(vffint_d_l, LSX, gen_vv, gen_helper_vffint_d_l) | ||
561 | -TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu) | ||
562 | -TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu) | ||
563 | -TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w) | ||
564 | -TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w) | ||
565 | +TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s) | ||
566 | +TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s) | ||
567 | +TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s) | ||
568 | +TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s) | ||
569 | +TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s) | ||
570 | +TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s) | ||
571 | +TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s) | ||
572 | +TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s) | ||
573 | +TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s) | ||
574 | +TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s) | ||
575 | + | ||
576 | +TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w) | ||
577 | +TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l) | ||
578 | +TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu) | ||
579 | +TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu) | ||
580 | +TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w) | ||
581 | +TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w) | ||
582 | TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l) | ||
583 | |||
584 | static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
585 | -- | ||
586 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-9-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 58 ++++----- | ||
6 | target/loongarch/vec_helper.c | 124 ++++++++++---------- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 16 ++- | ||
8 | 3 files changed, 101 insertions(+), 97 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
15 | DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
16 | DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
17 | |||
18 | -DEF_HELPER_3(vexth_h_b, void, env, i32, i32) | ||
19 | -DEF_HELPER_3(vexth_w_h, void, env, i32, i32) | ||
20 | -DEF_HELPER_3(vexth_d_w, void, env, i32, i32) | ||
21 | -DEF_HELPER_3(vexth_q_d, void, env, i32, i32) | ||
22 | -DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32) | ||
23 | -DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32) | ||
24 | -DEF_HELPER_3(vexth_du_wu, void, env, i32, i32) | ||
25 | -DEF_HELPER_3(vexth_qu_du, void, env, i32, i32) | ||
26 | +DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
27 | +DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
28 | +DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
29 | +DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
30 | +DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
31 | +DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
32 | +DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
33 | +DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
34 | |||
35 | DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
36 | DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
37 | DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | |||
40 | -DEF_HELPER_3(vmskltz_b, void, env, i32, i32) | ||
41 | -DEF_HELPER_3(vmskltz_h, void, env, i32, i32) | ||
42 | -DEF_HELPER_3(vmskltz_w, void, env, i32, i32) | ||
43 | -DEF_HELPER_3(vmskltz_d, void, env, i32, i32) | ||
44 | -DEF_HELPER_3(vmskgez_b, void, env, i32, i32) | ||
45 | -DEF_HELPER_3(vmsknz_b, void, env, i32,i32) | ||
46 | +DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
47 | +DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
48 | +DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
49 | +DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
50 | +DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
51 | +DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
52 | |||
53 | DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
54 | |||
55 | DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32) | ||
56 | DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32) | ||
57 | DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32) | ||
58 | -DEF_HELPER_3(vextl_q_d, void, env, i32, i32) | ||
59 | +DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
60 | DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32) | ||
61 | DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32) | ||
62 | DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32) | ||
63 | -DEF_HELPER_3(vextl_qu_du, void, env, i32, i32) | ||
64 | +DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
65 | |||
66 | DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
67 | DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
68 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32) | ||
69 | DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32) | ||
70 | DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32) | ||
71 | |||
72 | -DEF_HELPER_3(vclo_b, void, env, i32, i32) | ||
73 | -DEF_HELPER_3(vclo_h, void, env, i32, i32) | ||
74 | -DEF_HELPER_3(vclo_w, void, env, i32, i32) | ||
75 | -DEF_HELPER_3(vclo_d, void, env, i32, i32) | ||
76 | -DEF_HELPER_3(vclz_b, void, env, i32, i32) | ||
77 | -DEF_HELPER_3(vclz_h, void, env, i32, i32) | ||
78 | -DEF_HELPER_3(vclz_w, void, env, i32, i32) | ||
79 | -DEF_HELPER_3(vclz_d, void, env, i32, i32) | ||
80 | - | ||
81 | -DEF_HELPER_3(vpcnt_b, void, env, i32, i32) | ||
82 | -DEF_HELPER_3(vpcnt_h, void, env, i32, i32) | ||
83 | -DEF_HELPER_3(vpcnt_w, void, env, i32, i32) | ||
84 | -DEF_HELPER_3(vpcnt_d, void, env, i32, i32) | ||
85 | +DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
86 | +DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
87 | +DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
88 | +DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
89 | +DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
90 | +DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
91 | +DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
92 | +DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
93 | + | ||
94 | +DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
95 | +DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
96 | +DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
97 | +DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
98 | |||
99 | DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
100 | DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
101 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/loongarch/vec_helper.c | ||
104 | +++ b/target/loongarch/vec_helper.c | ||
105 | @@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_hu, 16, UH) | ||
106 | VSAT_U(vsat_wu, 32, UW) | ||
107 | VSAT_U(vsat_du, 64, UD) | ||
108 | |||
109 | -#define VEXTH(NAME, BIT, E1, E2) \ | ||
110 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
111 | -{ \ | ||
112 | - int i; \ | ||
113 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
114 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
115 | - \ | ||
116 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
117 | - Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ | ||
118 | - } \ | ||
119 | +#define VEXTH(NAME, BIT, E1, E2) \ | ||
120 | +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
121 | +{ \ | ||
122 | + int i; \ | ||
123 | + VReg *Vd = (VReg *)vd; \ | ||
124 | + VReg *Vj = (VReg *)vj; \ | ||
125 | + \ | ||
126 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
127 | + Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ | ||
128 | + } \ | ||
129 | } | ||
130 | |||
131 | -void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
132 | +void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) | ||
133 | { | ||
134 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
135 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
136 | + VReg *Vd = (VReg *)vd; | ||
137 | + VReg *Vj = (VReg *)vj; | ||
138 | |||
139 | Vd->Q(0) = int128_makes64(Vj->D(1)); | ||
140 | } | ||
141 | |||
142 | -void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
143 | +void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) | ||
144 | { | ||
145 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
146 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
147 | + VReg *Vd = (VReg *)vd; | ||
148 | + VReg *Vj = (VReg *)vj; | ||
149 | |||
150 | Vd->Q(0) = int128_make64((uint64_t)Vj->D(1)); | ||
151 | } | ||
152 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_b(int64_t val) | ||
153 | return c >> 56; | ||
154 | } | ||
155 | |||
156 | -void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
157 | +void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) | ||
158 | { | ||
159 | uint16_t temp = 0; | ||
160 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
161 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
162 | + VReg *Vd = (VReg *)vd; | ||
163 | + VReg *Vj = (VReg *)vj; | ||
164 | |||
165 | temp = do_vmskltz_b(Vj->D(0)); | ||
166 | temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
167 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_h(int64_t val) | ||
168 | return c >> 60; | ||
169 | } | ||
170 | |||
171 | -void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
172 | +void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) | ||
173 | { | ||
174 | uint16_t temp = 0; | ||
175 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
176 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
177 | + VReg *Vd = (VReg *)vd; | ||
178 | + VReg *Vj = (VReg *)vj; | ||
179 | |||
180 | temp = do_vmskltz_h(Vj->D(0)); | ||
181 | temp |= (do_vmskltz_h(Vj->D(1)) << 4); | ||
182 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_w(int64_t val) | ||
183 | return c >> 62; | ||
184 | } | ||
185 | |||
186 | -void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
187 | +void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) | ||
188 | { | ||
189 | uint16_t temp = 0; | ||
190 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
191 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
192 | + VReg *Vd = (VReg *)vd; | ||
193 | + VReg *Vj = (VReg *)vj; | ||
194 | |||
195 | temp = do_vmskltz_w(Vj->D(0)); | ||
196 | temp |= (do_vmskltz_w(Vj->D(1)) << 2); | ||
197 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_d(int64_t val) | ||
198 | { | ||
199 | return (uint64_t)val >> 63; | ||
200 | } | ||
201 | -void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
202 | +void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) | ||
203 | { | ||
204 | uint16_t temp = 0; | ||
205 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
206 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
207 | + VReg *Vd = (VReg *)vd; | ||
208 | + VReg *Vj = (VReg *)vj; | ||
209 | |||
210 | temp = do_vmskltz_d(Vj->D(0)); | ||
211 | temp |= (do_vmskltz_d(Vj->D(1)) << 1); | ||
212 | @@ -XXX,XX +XXX,XX @@ void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
213 | Vd->D(1) = 0; | ||
214 | } | ||
215 | |||
216 | -void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
217 | +void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) | ||
218 | { | ||
219 | uint16_t temp = 0; | ||
220 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
221 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
222 | + VReg *Vd = (VReg *)vd; | ||
223 | + VReg *Vj = (VReg *)vj; | ||
224 | |||
225 | temp = do_vmskltz_b(Vj->D(0)); | ||
226 | temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
227 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskez_b(uint64_t a) | ||
228 | return c >> 56; | ||
229 | } | ||
230 | |||
231 | -void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
232 | +void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) | ||
233 | { | ||
234 | uint16_t temp = 0; | ||
235 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
236 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
237 | + VReg *Vd = (VReg *)vd; | ||
238 | + VReg *Vj = (VReg *)vj; | ||
239 | |||
240 | temp = do_vmskez_b(Vj->D(0)); | ||
241 | temp |= (do_vmskez_b(Vj->D(1)) << 8); | ||
242 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
243 | *Vd = temp; \ | ||
244 | } | ||
245 | |||
246 | -void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
247 | +void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) | ||
248 | { | ||
249 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
250 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
251 | + VReg *Vd = (VReg *)vd; | ||
252 | + VReg *Vj = (VReg *)vj; | ||
253 | |||
254 | Vd->Q(0) = int128_makes64(Vj->D(0)); | ||
255 | } | ||
256 | |||
257 | -void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
258 | +void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) | ||
259 | { | ||
260 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
261 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
262 | + VReg *Vd = (VReg *)vd; | ||
263 | + VReg *Vj = (VReg *)vj; | ||
264 | |||
265 | Vd->Q(0) = int128_make64(Vj->D(0)); | ||
266 | } | ||
267 | @@ -XXX,XX +XXX,XX @@ VSSRARNUI(vssrarni_bu_h, 16, B, H) | ||
268 | VSSRARNUI(vssrarni_hu_w, 32, H, W) | ||
269 | VSSRARNUI(vssrarni_wu_d, 64, W, D) | ||
270 | |||
271 | -#define DO_2OP(NAME, BIT, E, DO_OP) \ | ||
272 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
273 | -{ \ | ||
274 | - int i; \ | ||
275 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
276 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
277 | - \ | ||
278 | - for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
279 | - { \ | ||
280 | - Vd->E(i) = DO_OP(Vj->E(i)); \ | ||
281 | - } \ | ||
282 | +#define DO_2OP(NAME, BIT, E, DO_OP) \ | ||
283 | +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
284 | +{ \ | ||
285 | + int i; \ | ||
286 | + VReg *Vd = (VReg *)vd; \ | ||
287 | + VReg *Vj = (VReg *)vj; \ | ||
288 | + \ | ||
289 | + for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
290 | + { \ | ||
291 | + Vd->E(i) = DO_OP(Vj->E(i)); \ | ||
292 | + } \ | ||
293 | } | ||
294 | |||
295 | #define DO_CLO_B(N) (clz32(~N & 0xff) - 24) | ||
296 | @@ -XXX,XX +XXX,XX @@ DO_2OP(vclz_h, 16, UH, DO_CLZ_H) | ||
297 | DO_2OP(vclz_w, 32, UW, DO_CLZ_W) | ||
298 | DO_2OP(vclz_d, 64, UD, DO_CLZ_D) | ||
299 | |||
300 | -#define VPCNT(NAME, BIT, E, FN) \ | ||
301 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
302 | -{ \ | ||
303 | - int i; \ | ||
304 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
305 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
306 | - \ | ||
307 | - for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
308 | - { \ | ||
309 | - Vd->E(i) = FN(Vj->E(i)); \ | ||
310 | - } \ | ||
311 | +#define VPCNT(NAME, BIT, E, FN) \ | ||
312 | +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
313 | +{ \ | ||
314 | + int i; \ | ||
315 | + VReg *Vd = (VReg *)vd; \ | ||
316 | + VReg *Vj = (VReg *)vj; \ | ||
317 | + \ | ||
318 | + for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
319 | + { \ | ||
320 | + Vd->E(i) = FN(Vj->E(i)); \ | ||
321 | + } \ | ||
322 | } | ||
323 | |||
324 | VPCNT(vpcnt_b, 8, UB, ctpop8) | ||
325 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
326 | index XXXXXXX..XXXXXXX 100644 | ||
327 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
328 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
329 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, | ||
330 | return gen_vv_ptr_vl(ctx, a, 16, fn); | ||
331 | } | ||
332 | |||
333 | -static bool gen_vv(DisasContext *ctx, arg_vv *a, | ||
334 | - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
335 | +static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
336 | + gen_helper_gvec_2 *fn) | ||
337 | { | ||
338 | - TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
339 | - TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
340 | + tcg_gen_gvec_2_ool(vec_full_offset(a->vd), | ||
341 | + vec_full_offset(a->vj), | ||
342 | + oprsz, ctx->vl / 8, 0, fn); | ||
343 | + return true; | ||
344 | +} | ||
345 | |||
346 | +static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
347 | +{ | ||
348 | CHECK_SXE; | ||
349 | - func(cpu_env, vd, vj); | ||
350 | - return true; | ||
351 | + return gen_vv_vl(ctx, a, 16, fn); | ||
352 | } | ||
353 | |||
354 | static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, | ||
355 | -- | ||
356 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-10-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/helper.h | 146 +++---- | ||
6 | target/loongarch/vec_helper.c | 445 +++++++++----------- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 18 +- | ||
8 | 3 files changed, 291 insertions(+), 318 deletions(-) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/helper.h | ||
13 | +++ b/target/loongarch/helper.h | ||
14 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
15 | |||
16 | DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
17 | |||
18 | -DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32) | ||
19 | -DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32) | ||
20 | -DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32) | ||
21 | +DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
22 | +DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
23 | +DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
24 | DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
25 | -DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32) | ||
26 | -DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32) | ||
27 | -DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32) | ||
28 | +DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
29 | +DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
30 | +DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
31 | DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
32 | |||
33 | DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
34 | DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
35 | DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
36 | DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
37 | -DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32) | ||
38 | -DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32) | ||
39 | -DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32) | ||
40 | -DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32) | ||
41 | +DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
42 | +DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
43 | +DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
45 | |||
46 | DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
47 | DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
48 | DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
49 | DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
50 | -DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32) | ||
51 | -DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32) | ||
52 | -DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32) | ||
53 | -DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
55 | +DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
56 | +DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
57 | +DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
58 | |||
59 | DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
60 | DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
61 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
62 | DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
63 | DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
64 | |||
65 | -DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32) | ||
66 | -DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32) | ||
67 | -DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32) | ||
68 | -DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32) | ||
69 | -DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32) | ||
70 | -DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32) | ||
71 | -DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32) | ||
72 | -DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32) | ||
73 | +DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
74 | +DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
75 | +DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
76 | +DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
77 | +DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
81 | |||
82 | DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
83 | DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
84 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
85 | DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
86 | DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
87 | |||
88 | -DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32) | ||
89 | -DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32) | ||
90 | -DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32) | ||
91 | -DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32) | ||
92 | -DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32) | ||
93 | -DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32) | ||
94 | -DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32) | ||
95 | -DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32) | ||
96 | +DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
98 | +DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
99 | +DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
100 | +DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
101 | +DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
102 | +DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
103 | +DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
104 | |||
105 | DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
106 | DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
107 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
108 | DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
109 | DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
110 | |||
111 | -DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32) | ||
112 | -DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32) | ||
113 | -DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32) | ||
114 | -DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32) | ||
115 | -DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32) | ||
116 | -DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32) | ||
117 | -DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32) | ||
118 | -DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32) | ||
119 | -DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32) | ||
120 | -DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32) | ||
121 | -DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32) | ||
122 | -DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32) | ||
123 | -DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32) | ||
124 | -DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32) | ||
125 | -DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32) | ||
126 | -DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32) | ||
127 | +DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
128 | +DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
129 | +DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
130 | +DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
131 | +DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
132 | +DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
133 | +DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
134 | +DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
135 | +DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
136 | +DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
137 | +DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
138 | +DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
139 | +DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
140 | +DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
141 | +DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
142 | +DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
143 | |||
144 | DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
145 | DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
146 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
147 | DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
148 | DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
149 | |||
150 | -DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32) | ||
151 | -DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32) | ||
152 | -DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32) | ||
153 | -DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32) | ||
154 | -DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32) | ||
155 | -DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32) | ||
156 | -DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32) | ||
157 | -DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32) | ||
158 | -DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32) | ||
159 | -DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32) | ||
160 | -DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32) | ||
161 | -DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32) | ||
162 | -DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32) | ||
163 | -DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32) | ||
164 | -DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32) | ||
165 | -DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32) | ||
166 | +DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
167 | +DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
168 | +DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
169 | +DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
170 | +DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
171 | +DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
172 | +DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
173 | +DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
174 | +DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
175 | +DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
176 | +DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
177 | +DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
178 | +DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
179 | +DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
180 | +DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
181 | +DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
182 | |||
183 | DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
184 | DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
185 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
186 | |||
187 | DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
188 | DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
189 | -DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) | ||
190 | -DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) | ||
191 | +DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
192 | +DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
193 | |||
194 | DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
195 | DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) | ||
196 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | ||
197 | DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
198 | DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
199 | DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
200 | -DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32) | ||
201 | -DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32) | ||
202 | -DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32) | ||
203 | -DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32) | ||
204 | +DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
205 | +DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
206 | +DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
207 | +DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
208 | |||
209 | -DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32) | ||
210 | +DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
211 | |||
212 | -DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32) | ||
213 | -DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32) | ||
214 | -DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32) | ||
215 | -DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32) | ||
216 | +DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
217 | +DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
218 | +DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
219 | +DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
220 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
221 | index XXXXXXX..XXXXXXX 100644 | ||
222 | --- a/target/loongarch/vec_helper.c | ||
223 | +++ b/target/loongarch/vec_helper.c | ||
224 | @@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
225 | } | ||
226 | } | ||
227 | |||
228 | -#define VSLLWIL(NAME, BIT, E1, E2) \ | ||
229 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
230 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
231 | -{ \ | ||
232 | - int i; \ | ||
233 | - VReg temp; \ | ||
234 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
235 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
236 | - typedef __typeof(temp.E1(0)) TD; \ | ||
237 | - \ | ||
238 | - temp.D(0) = 0; \ | ||
239 | - temp.D(1) = 0; \ | ||
240 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
241 | - temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ | ||
242 | - } \ | ||
243 | - *Vd = temp; \ | ||
244 | +#define VSLLWIL(NAME, BIT, E1, E2) \ | ||
245 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
246 | +{ \ | ||
247 | + int i; \ | ||
248 | + VReg temp; \ | ||
249 | + VReg *Vd = (VReg *)vd; \ | ||
250 | + VReg *Vj = (VReg *)vj; \ | ||
251 | + typedef __typeof(temp.E1(0)) TD; \ | ||
252 | + \ | ||
253 | + temp.D(0) = 0; \ | ||
254 | + temp.D(1) = 0; \ | ||
255 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
256 | + temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ | ||
257 | + } \ | ||
258 | + *Vd = temp; \ | ||
259 | } | ||
260 | |||
261 | void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) | ||
262 | @@ -XXX,XX +XXX,XX @@ VSRLR(vsrlr_h, 16, uint16_t, H) | ||
263 | VSRLR(vsrlr_w, 32, uint32_t, W) | ||
264 | VSRLR(vsrlr_d, 64, uint64_t, D) | ||
265 | |||
266 | -#define VSRLRI(NAME, BIT, E) \ | ||
267 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
268 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
269 | -{ \ | ||
270 | - int i; \ | ||
271 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
272 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
273 | - \ | ||
274 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
275 | - Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ | ||
276 | - } \ | ||
277 | +#define VSRLRI(NAME, BIT, E) \ | ||
278 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
279 | +{ \ | ||
280 | + int i; \ | ||
281 | + VReg *Vd = (VReg *)vd; \ | ||
282 | + VReg *Vj = (VReg *)vj; \ | ||
283 | + \ | ||
284 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
285 | + Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ | ||
286 | + } \ | ||
287 | } | ||
288 | |||
289 | VSRLRI(vsrlri_b, 8, B) | ||
290 | @@ -XXX,XX +XXX,XX @@ VSRAR(vsrar_h, 16, uint16_t, H) | ||
291 | VSRAR(vsrar_w, 32, uint32_t, W) | ||
292 | VSRAR(vsrar_d, 64, uint64_t, D) | ||
293 | |||
294 | -#define VSRARI(NAME, BIT, E) \ | ||
295 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
296 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
297 | -{ \ | ||
298 | - int i; \ | ||
299 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
300 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
301 | - \ | ||
302 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
303 | - Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ | ||
304 | - } \ | ||
305 | +#define VSRARI(NAME, BIT, E) \ | ||
306 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
307 | +{ \ | ||
308 | + int i; \ | ||
309 | + VReg *Vd = (VReg *)vd; \ | ||
310 | + VReg *Vj = (VReg *)vj; \ | ||
311 | + \ | ||
312 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
313 | + Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ | ||
314 | + } \ | ||
315 | } | ||
316 | |||
317 | VSRARI(vsrari_b, 8, B) | ||
318 | @@ -XXX,XX +XXX,XX @@ VSRAN(vsran_b_h, 16, uint16_t, B, H) | ||
319 | VSRAN(vsran_h_w, 32, uint32_t, H, W) | ||
320 | VSRAN(vsran_w_d, 64, uint64_t, W, D) | ||
321 | |||
322 | -#define VSRLNI(NAME, BIT, T, E1, E2) \ | ||
323 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
324 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
325 | -{ \ | ||
326 | - int i, max; \ | ||
327 | - VReg temp; \ | ||
328 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
329 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
330 | - \ | ||
331 | - temp.D(0) = 0; \ | ||
332 | - temp.D(1) = 0; \ | ||
333 | - max = LSX_LEN/BIT; \ | ||
334 | - for (i = 0; i < max; i++) { \ | ||
335 | - temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ | ||
336 | - temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ | ||
337 | - } \ | ||
338 | - *Vd = temp; \ | ||
339 | -} | ||
340 | - | ||
341 | -void HELPER(vsrlni_d_q)(CPULoongArchState *env, | ||
342 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
343 | +#define VSRLNI(NAME, BIT, T, E1, E2) \ | ||
344 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
345 | +{ \ | ||
346 | + int i, max; \ | ||
347 | + VReg temp; \ | ||
348 | + VReg *Vd = (VReg *)vd; \ | ||
349 | + VReg *Vj = (VReg *)vj; \ | ||
350 | + \ | ||
351 | + temp.D(0) = 0; \ | ||
352 | + temp.D(1) = 0; \ | ||
353 | + max = LSX_LEN/BIT; \ | ||
354 | + for (i = 0; i < max; i++) { \ | ||
355 | + temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ | ||
356 | + temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ | ||
357 | + } \ | ||
358 | + *Vd = temp; \ | ||
359 | +} | ||
360 | + | ||
361 | +void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
362 | { | ||
363 | VReg temp; | ||
364 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
365 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
366 | + VReg *Vd = (VReg *)vd; | ||
367 | + VReg *Vj = (VReg *)vj; | ||
368 | |||
369 | temp.D(0) = 0; | ||
370 | temp.D(1) = 0; | ||
371 | @@ -XXX,XX +XXX,XX @@ VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) | ||
372 | VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) | ||
373 | VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) | ||
374 | |||
375 | -#define VSRANI(NAME, BIT, E1, E2) \ | ||
376 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
377 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
378 | -{ \ | ||
379 | - int i, max; \ | ||
380 | - VReg temp; \ | ||
381 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
382 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
383 | - \ | ||
384 | - temp.D(0) = 0; \ | ||
385 | - temp.D(1) = 0; \ | ||
386 | - max = LSX_LEN/BIT; \ | ||
387 | - for (i = 0; i < max; i++) { \ | ||
388 | - temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ | ||
389 | - temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ | ||
390 | - } \ | ||
391 | - *Vd = temp; \ | ||
392 | -} | ||
393 | - | ||
394 | -void HELPER(vsrani_d_q)(CPULoongArchState *env, | ||
395 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
396 | +#define VSRANI(NAME, BIT, E1, E2) \ | ||
397 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
398 | +{ \ | ||
399 | + int i, max; \ | ||
400 | + VReg temp; \ | ||
401 | + VReg *Vd = (VReg *)vd; \ | ||
402 | + VReg *Vj = (VReg *)vj; \ | ||
403 | + \ | ||
404 | + temp.D(0) = 0; \ | ||
405 | + temp.D(1) = 0; \ | ||
406 | + max = LSX_LEN/BIT; \ | ||
407 | + for (i = 0; i < max; i++) { \ | ||
408 | + temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ | ||
409 | + temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ | ||
410 | + } \ | ||
411 | + *Vd = temp; \ | ||
412 | +} | ||
413 | + | ||
414 | +void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
415 | { | ||
416 | VReg temp; | ||
417 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
418 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
419 | + VReg *Vd = (VReg *)vd; | ||
420 | + VReg *Vj = (VReg *)vj; | ||
421 | |||
422 | temp.D(0) = 0; | ||
423 | temp.D(1) = 0; | ||
424 | @@ -XXX,XX +XXX,XX @@ VSRARN(vsrarn_b_h, 16, uint8_t, B, H) | ||
425 | VSRARN(vsrarn_h_w, 32, uint16_t, H, W) | ||
426 | VSRARN(vsrarn_w_d, 64, uint32_t, W, D) | ||
427 | |||
428 | -#define VSRLRNI(NAME, BIT, E1, E2) \ | ||
429 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
430 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
431 | -{ \ | ||
432 | - int i, max; \ | ||
433 | - VReg temp; \ | ||
434 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
435 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
436 | - \ | ||
437 | - temp.D(0) = 0; \ | ||
438 | - temp.D(1) = 0; \ | ||
439 | - max = LSX_LEN/BIT; \ | ||
440 | - for (i = 0; i < max; i++) { \ | ||
441 | - temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ | ||
442 | - temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ | ||
443 | - } \ | ||
444 | - *Vd = temp; \ | ||
445 | -} | ||
446 | - | ||
447 | -void HELPER(vsrlrni_d_q)(CPULoongArchState *env, | ||
448 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
449 | +#define VSRLRNI(NAME, BIT, E1, E2) \ | ||
450 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
451 | +{ \ | ||
452 | + int i, max; \ | ||
453 | + VReg temp; \ | ||
454 | + VReg *Vd = (VReg *)vd; \ | ||
455 | + VReg *Vj = (VReg *)vj; \ | ||
456 | + \ | ||
457 | + temp.D(0) = 0; \ | ||
458 | + temp.D(1) = 0; \ | ||
459 | + max = LSX_LEN/BIT; \ | ||
460 | + for (i = 0; i < max; i++) { \ | ||
461 | + temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ | ||
462 | + temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ | ||
463 | + } \ | ||
464 | + *Vd = temp; \ | ||
465 | +} | ||
466 | + | ||
467 | +void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
468 | { | ||
469 | VReg temp; | ||
470 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
471 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
472 | + VReg *Vd = (VReg *)vd; | ||
473 | + VReg *Vj = (VReg *)vj; | ||
474 | Int128 r1, r2; | ||
475 | |||
476 | if (imm == 0) { | ||
477 | @@ -XXX,XX +XXX,XX @@ VSRLRNI(vsrlrni_b_h, 16, B, H) | ||
478 | VSRLRNI(vsrlrni_h_w, 32, H, W) | ||
479 | VSRLRNI(vsrlrni_w_d, 64, W, D) | ||
480 | |||
481 | -#define VSRARNI(NAME, BIT, E1, E2) \ | ||
482 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
483 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
484 | -{ \ | ||
485 | - int i, max; \ | ||
486 | - VReg temp; \ | ||
487 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
488 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
489 | - \ | ||
490 | - temp.D(0) = 0; \ | ||
491 | - temp.D(1) = 0; \ | ||
492 | - max = LSX_LEN/BIT; \ | ||
493 | - for (i = 0; i < max; i++) { \ | ||
494 | - temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ | ||
495 | - temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ | ||
496 | - } \ | ||
497 | - *Vd = temp; \ | ||
498 | -} | ||
499 | - | ||
500 | -void HELPER(vsrarni_d_q)(CPULoongArchState *env, | ||
501 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
502 | +#define VSRARNI(NAME, BIT, E1, E2) \ | ||
503 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
504 | +{ \ | ||
505 | + int i, max; \ | ||
506 | + VReg temp; \ | ||
507 | + VReg *Vd = (VReg *)vd; \ | ||
508 | + VReg *Vj = (VReg *)vj; \ | ||
509 | + \ | ||
510 | + temp.D(0) = 0; \ | ||
511 | + temp.D(1) = 0; \ | ||
512 | + max = LSX_LEN/BIT; \ | ||
513 | + for (i = 0; i < max; i++) { \ | ||
514 | + temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ | ||
515 | + temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ | ||
516 | + } \ | ||
517 | + *Vd = temp; \ | ||
518 | +} | ||
519 | + | ||
520 | +void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
521 | { | ||
522 | VReg temp; | ||
523 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
524 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
525 | + VReg *Vd = (VReg *)vd; | ||
526 | + VReg *Vj = (VReg *)vj; | ||
527 | Int128 r1, r2; | ||
528 | |||
529 | if (imm == 0) { | ||
530 | @@ -XXX,XX +XXX,XX @@ VSSRANU(vssran_hu_w, 32, uint32_t, H, W) | ||
531 | VSSRANU(vssran_wu_d, 64, uint64_t, W, D) | ||
532 | |||
533 | #define VSSRLNI(NAME, BIT, E1, E2) \ | ||
534 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
535 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
536 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
537 | { \ | ||
538 | int i; \ | ||
539 | VReg temp; \ | ||
540 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
541 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
542 | + VReg *Vd = (VReg *)vd; \ | ||
543 | + VReg *Vj = (VReg *)vj; \ | ||
544 | \ | ||
545 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
546 | temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
547 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
548 | *Vd = temp; \ | ||
549 | } | ||
550 | |||
551 | -void HELPER(vssrlni_d_q)(CPULoongArchState *env, | ||
552 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
553 | +void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
554 | { | ||
555 | Int128 shft_res1, shft_res2, mask; | ||
556 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
557 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
558 | + VReg *Vd = (VReg *)vd; | ||
559 | + VReg *Vj = (VReg *)vj; | ||
560 | |||
561 | if (imm == 0) { | ||
562 | shft_res1 = Vj->Q(0); | ||
563 | @@ -XXX,XX +XXX,XX @@ VSSRLNI(vssrlni_h_w, 32, H, W) | ||
564 | VSSRLNI(vssrlni_w_d, 64, W, D) | ||
565 | |||
566 | #define VSSRANI(NAME, BIT, E1, E2) \ | ||
567 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
568 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
569 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
570 | { \ | ||
571 | int i; \ | ||
572 | VReg temp; \ | ||
573 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
574 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
575 | + VReg *Vd = (VReg *)vd; \ | ||
576 | + VReg *Vj = (VReg *)vj; \ | ||
577 | \ | ||
578 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
579 | temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
580 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
581 | *Vd = temp; \ | ||
582 | } | ||
583 | |||
584 | -void HELPER(vssrani_d_q)(CPULoongArchState *env, | ||
585 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
586 | +void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
587 | { | ||
588 | Int128 shft_res1, shft_res2, mask, min; | ||
589 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
590 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
591 | + VReg *Vd = (VReg *)vd; | ||
592 | + VReg *Vj = (VReg *)vj; | ||
593 | |||
594 | if (imm == 0) { | ||
595 | shft_res1 = Vj->Q(0); | ||
596 | @@ -XXX,XX +XXX,XX @@ VSSRANI(vssrani_h_w, 32, H, W) | ||
597 | VSSRANI(vssrani_w_d, 64, W, D) | ||
598 | |||
599 | #define VSSRLNUI(NAME, BIT, E1, E2) \ | ||
600 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
601 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
602 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
603 | { \ | ||
604 | int i; \ | ||
605 | VReg temp; \ | ||
606 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
607 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
608 | + VReg *Vd = (VReg *)vd; \ | ||
609 | + VReg *Vj = (VReg *)vj; \ | ||
610 | \ | ||
611 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
612 | temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
613 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
614 | *Vd = temp; \ | ||
615 | } | ||
616 | |||
617 | -void HELPER(vssrlni_du_q)(CPULoongArchState *env, | ||
618 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
619 | +void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
620 | { | ||
621 | Int128 shft_res1, shft_res2, mask; | ||
622 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
623 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
624 | + VReg *Vd = (VReg *)vd; | ||
625 | + VReg *Vj = (VReg *)vj; | ||
626 | |||
627 | if (imm == 0) { | ||
628 | shft_res1 = Vj->Q(0); | ||
629 | @@ -XXX,XX +XXX,XX @@ VSSRLNUI(vssrlni_hu_w, 32, H, W) | ||
630 | VSSRLNUI(vssrlni_wu_d, 64, W, D) | ||
631 | |||
632 | #define VSSRANUI(NAME, BIT, E1, E2) \ | ||
633 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
634 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
635 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
636 | { \ | ||
637 | int i; \ | ||
638 | VReg temp; \ | ||
639 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
640 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
641 | + VReg *Vd = (VReg *)vd; \ | ||
642 | + VReg *Vj = (VReg *)vj; \ | ||
643 | \ | ||
644 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
645 | temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
646 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
647 | *Vd = temp; \ | ||
648 | } | ||
649 | |||
650 | -void HELPER(vssrani_du_q)(CPULoongArchState *env, | ||
651 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
652 | +void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
653 | { | ||
654 | Int128 shft_res1, shft_res2, mask; | ||
655 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
656 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
657 | + VReg *Vd = (VReg *)vd; | ||
658 | + VReg *Vj = (VReg *)vj; | ||
659 | |||
660 | if (imm == 0) { | ||
661 | shft_res1 = Vj->Q(0); | ||
662 | @@ -XXX,XX +XXX,XX @@ VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) | ||
663 | VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) | ||
664 | |||
665 | #define VSSRLRNI(NAME, BIT, E1, E2) \ | ||
666 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
667 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
668 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
669 | { \ | ||
670 | int i; \ | ||
671 | VReg temp; \ | ||
672 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
673 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
674 | + VReg *Vd = (VReg *)vd; \ | ||
675 | + VReg *Vj = (VReg *)vj; \ | ||
676 | \ | ||
677 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
678 | temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
679 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
680 | } | ||
681 | |||
682 | #define VSSRLRNI_Q(NAME, sh) \ | ||
683 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
684 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
685 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
686 | { \ | ||
687 | Int128 shft_res1, shft_res2, mask, r1, r2; \ | ||
688 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
689 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
690 | + VReg *Vd = (VReg *)vd; \ | ||
691 | + VReg *Vj = (VReg *)vj; \ | ||
692 | \ | ||
693 | if (imm == 0) { \ | ||
694 | shft_res1 = Vj->Q(0); \ | ||
695 | @@ -XXX,XX +XXX,XX @@ VSSRLRNI(vssrlrni_w_d, 64, W, D) | ||
696 | VSSRLRNI_Q(vssrlrni_d_q, 63) | ||
697 | |||
698 | #define VSSRARNI(NAME, BIT, E1, E2) \ | ||
699 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
700 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
701 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
702 | { \ | ||
703 | int i; \ | ||
704 | VReg temp; \ | ||
705 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
706 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
707 | + VReg *Vd = (VReg *)vd; \ | ||
708 | + VReg *Vj = (VReg *)vj; \ | ||
709 | \ | ||
710 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
711 | temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
712 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, | ||
713 | *Vd = temp; \ | ||
714 | } | ||
715 | |||
716 | -void HELPER(vssrarni_d_q)(CPULoongArchState *env, | ||
717 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
718 | +void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
719 | { | ||
720 | Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
721 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
722 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
723 | + VReg *Vd = (VReg *)vd; | ||
724 | + VReg *Vj = (VReg *)vj; | ||
725 | |||
726 | if (imm == 0) { | ||
727 | shft_res1 = Vj->Q(0); | ||
728 | @@ -XXX,XX +XXX,XX @@ VSSRARNI(vssrarni_h_w, 32, H, W) | ||
729 | VSSRARNI(vssrarni_w_d, 64, W, D) | ||
730 | |||
731 | #define VSSRLRNUI(NAME, BIT, E1, E2) \ | ||
732 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
733 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
734 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
735 | { \ | ||
736 | int i; \ | ||
737 | VReg temp; \ | ||
738 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
739 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
740 | + VReg *Vd = (VReg *)vd; \ | ||
741 | + VReg *Vj = (VReg *)vj; \ | ||
742 | \ | ||
743 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
744 | temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
745 | @@ -XXX,XX +XXX,XX @@ VSSRLRNUI(vssrlrni_wu_d, 64, W, D) | ||
746 | VSSRLRNI_Q(vssrlrni_du_q, 64) | ||
747 | |||
748 | #define VSSRARNUI(NAME, BIT, E1, E2) \ | ||
749 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
750 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
751 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
752 | { \ | ||
753 | int i; \ | ||
754 | VReg temp; \ | ||
755 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
756 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
757 | + VReg *Vd = (VReg *)vd; \ | ||
758 | + VReg *Vj = (VReg *)vj; \ | ||
759 | \ | ||
760 | for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
761 | temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
762 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
763 | *Vd = temp; \ | ||
764 | } | ||
765 | |||
766 | -void HELPER(vssrarni_du_q)(CPULoongArchState *env, | ||
767 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
768 | +void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
769 | { | ||
770 | Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
771 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
772 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
773 | + VReg *Vd = (VReg *)vd; | ||
774 | + VReg *Vj = (VReg *)vj; | ||
775 | |||
776 | if (imm == 0) { | ||
777 | shft_res1 = Vj->Q(0); | ||
778 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
779 | VFRSTP(vfrstp_b, 8, 0xf, B) | ||
780 | VFRSTP(vfrstp_h, 16, 0x7, H) | ||
781 | |||
782 | -#define VFRSTPI(NAME, BIT, E) \ | ||
783 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
784 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
785 | -{ \ | ||
786 | - int i, m; \ | ||
787 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
788 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
789 | - \ | ||
790 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
791 | - if (Vj->E(i) < 0) { \ | ||
792 | - break; \ | ||
793 | - } \ | ||
794 | - } \ | ||
795 | - m = imm % (LSX_LEN/BIT); \ | ||
796 | - Vd->E(m) = i; \ | ||
797 | +#define VFRSTPI(NAME, BIT, E) \ | ||
798 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
799 | +{ \ | ||
800 | + int i, m; \ | ||
801 | + VReg *Vd = (VReg *)vd; \ | ||
802 | + VReg *Vj = (VReg *)vj; \ | ||
803 | + \ | ||
804 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
805 | + if (Vj->E(i) < 0) { \ | ||
806 | + break; \ | ||
807 | + } \ | ||
808 | + } \ | ||
809 | + m = imm % (LSX_LEN/BIT); \ | ||
810 | + Vd->E(m) = i; \ | ||
811 | } | ||
812 | |||
813 | VFRSTPI(vfrstpi_b, 8, B) | ||
814 | @@ -XXX,XX +XXX,XX @@ VSHUF(vshuf_h, 16, H) | ||
815 | VSHUF(vshuf_w, 32, W) | ||
816 | VSHUF(vshuf_d, 64, D) | ||
817 | |||
818 | -#define VSHUF4I(NAME, BIT, E) \ | ||
819 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
820 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
821 | -{ \ | ||
822 | - int i; \ | ||
823 | - VReg temp; \ | ||
824 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
825 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
826 | - \ | ||
827 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
828 | - temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ | ||
829 | - (2 * ((i) & 0x03))) & 0x03)); \ | ||
830 | - } \ | ||
831 | - *Vd = temp; \ | ||
832 | +#define VSHUF4I(NAME, BIT, E) \ | ||
833 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
834 | +{ \ | ||
835 | + int i; \ | ||
836 | + VReg temp; \ | ||
837 | + VReg *Vd = (VReg *)vd; \ | ||
838 | + VReg *Vj = (VReg *)vj; \ | ||
839 | + \ | ||
840 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
841 | + temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ | ||
842 | + (2 * ((i) & 0x03))) & 0x03)); \ | ||
843 | + } \ | ||
844 | + *Vd = temp; \ | ||
845 | } | ||
846 | |||
847 | VSHUF4I(vshuf4i_b, 8, B) | ||
848 | VSHUF4I(vshuf4i_h, 16, H) | ||
849 | VSHUF4I(vshuf4i_w, 32, W) | ||
850 | |||
851 | -void HELPER(vshuf4i_d)(CPULoongArchState *env, | ||
852 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
853 | +void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
854 | { | ||
855 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
856 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
857 | + VReg *Vd = (VReg *)vd; | ||
858 | + VReg *Vj = (VReg *)vj; | ||
859 | |||
860 | VReg temp; | ||
861 | temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); | ||
862 | @@ -XXX,XX +XXX,XX @@ void HELPER(vshuf4i_d)(CPULoongArchState *env, | ||
863 | *Vd = temp; | ||
864 | } | ||
865 | |||
866 | -void HELPER(vpermi_w)(CPULoongArchState *env, | ||
867 | - uint32_t vd, uint32_t vj, uint32_t imm) | ||
868 | +void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
869 | { | ||
870 | VReg temp; | ||
871 | - VReg *Vd = &(env->fpr[vd].vreg); | ||
872 | - VReg *Vj = &(env->fpr[vj].vreg); | ||
873 | + VReg *Vd = (VReg *)vd; | ||
874 | + VReg *Vj = (VReg *)vj; | ||
875 | |||
876 | temp.W(0) = Vj->W(imm & 0x3); | ||
877 | temp.W(1) = Vj->W((imm >> 2) & 0x3); | ||
878 | @@ -XXX,XX +XXX,XX @@ void HELPER(vpermi_w)(CPULoongArchState *env, | ||
879 | *Vd = temp; | ||
880 | } | ||
881 | |||
882 | -#define VEXTRINS(NAME, BIT, E, MASK) \ | ||
883 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
884 | - uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
885 | -{ \ | ||
886 | - int ins, extr; \ | ||
887 | - VReg *Vd = &(env->fpr[vd].vreg); \ | ||
888 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
889 | - \ | ||
890 | - ins = (imm >> 4) & MASK; \ | ||
891 | - extr = imm & MASK; \ | ||
892 | - Vd->E(ins) = Vj->E(extr); \ | ||
893 | +#define VEXTRINS(NAME, BIT, E, MASK) \ | ||
894 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
895 | +{ \ | ||
896 | + int ins, extr; \ | ||
897 | + VReg *Vd = (VReg *)vd; \ | ||
898 | + VReg *Vj = (VReg *)vj; \ | ||
899 | + \ | ||
900 | + ins = (imm >> 4) & MASK; \ | ||
901 | + extr = imm & MASK; \ | ||
902 | + Vd->E(ins) = Vj->E(extr); \ | ||
903 | } | ||
904 | |||
905 | VEXTRINS(vextrins_b, 8, B, 0xf) | ||
906 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
907 | index XXXXXXX..XXXXXXX 100644 | ||
908 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
909 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
910 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
911 | return gen_vv_vl(ctx, a, 16, fn); | ||
912 | } | ||
913 | |||
914 | -static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, | ||
915 | - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
916 | +static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, | ||
917 | + gen_helper_gvec_2i *fn) | ||
918 | { | ||
919 | - TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
920 | - TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
921 | - TCGv_i32 imm = tcg_constant_i32(a->imm); | ||
922 | + tcg_gen_gvec_2i_ool(vec_full_offset(a->vd), | ||
923 | + vec_full_offset(a->vj), | ||
924 | + tcg_constant_i64(a->imm), | ||
925 | + oprsz, ctx->vl / 8, 0, fn); | ||
926 | + return true; | ||
927 | +} | ||
928 | |||
929 | +static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) | ||
930 | +{ | ||
931 | CHECK_SXE; | ||
932 | - func(cpu_env, vd, vj, imm); | ||
933 | - return true; | ||
934 | + return gen_vv_i_vl(ctx, a, 16, fn); | ||
935 | } | ||
936 | |||
937 | static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
938 | -- | ||
939 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Introduce a new function check_vec to replace CHECK_SXE | ||
1 | 2 | ||
3 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20230914022645.1151356-11-gaosong@loongson.cn> | ||
6 | --- | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 248 +++++++++++++++----- | ||
8 | 1 file changed, 192 insertions(+), 56 deletions(-) | ||
9 | |||
10 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
13 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | */ | ||
16 | |||
17 | #ifndef CONFIG_USER_ONLY | ||
18 | -#define CHECK_SXE do { \ | ||
19 | - if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \ | ||
20 | - generate_exception(ctx, EXCCODE_SXD); \ | ||
21 | - return true; \ | ||
22 | - } \ | ||
23 | -} while (0) | ||
24 | + | ||
25 | +static bool check_vec(DisasContext *ctx, uint32_t oprsz) | ||
26 | +{ | ||
27 | + if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) { | ||
28 | + generate_exception(ctx, EXCCODE_SXD); | ||
29 | + return false; | ||
30 | + } | ||
31 | + return true; | ||
32 | +} | ||
33 | + | ||
34 | #else | ||
35 | -#define CHECK_SXE | ||
36 | + | ||
37 | +static bool check_vec(DisasContext *ctx, uint32_t oprsz) | ||
38 | +{ | ||
39 | + return true; | ||
40 | +} | ||
41 | + | ||
42 | #endif | ||
43 | |||
44 | static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
45 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
46 | static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, | ||
47 | gen_helper_gvec_4_ptr *fn) | ||
48 | { | ||
49 | - CHECK_SXE; | ||
50 | + if (!check_vec(ctx, 16)) { | ||
51 | + return true; | ||
52 | + } | ||
53 | + | ||
54 | return gen_vvvv_ptr_vl(ctx, a, 16, fn); | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
58 | static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
59 | gen_helper_gvec_4 *fn) | ||
60 | { | ||
61 | - CHECK_SXE; | ||
62 | + if (!check_vec(ctx, 16)) { | ||
63 | + return true; | ||
64 | + } | ||
65 | + | ||
66 | return gen_vvvv_vl(ctx, a, 16, fn); | ||
67 | } | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
70 | static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, | ||
71 | gen_helper_gvec_3_ptr *fn) | ||
72 | { | ||
73 | - CHECK_SXE; | ||
74 | + if (!check_vec(ctx, 16)) { | ||
75 | + return true; | ||
76 | + } | ||
77 | + | ||
78 | return gen_vvv_ptr_vl(ctx, a, 16, fn); | ||
79 | } | ||
80 | |||
81 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
82 | |||
83 | static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
84 | { | ||
85 | - CHECK_SXE; | ||
86 | + if (!check_vec(ctx, 16)) { | ||
87 | + return true; | ||
88 | + } | ||
89 | + | ||
90 | return gen_vvv_vl(ctx, a, 16, fn); | ||
91 | } | ||
92 | |||
93 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
94 | static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, | ||
95 | gen_helper_gvec_2_ptr *fn) | ||
96 | { | ||
97 | - CHECK_SXE; | ||
98 | + if (!check_vec(ctx, 16)) { | ||
99 | + return true; | ||
100 | + } | ||
101 | + | ||
102 | return gen_vv_ptr_vl(ctx, a, 16, fn); | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
106 | |||
107 | static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
108 | { | ||
109 | - CHECK_SXE; | ||
110 | + if (!check_vec(ctx, 16)) { | ||
111 | + return true; | ||
112 | + } | ||
113 | + | ||
114 | return gen_vv_vl(ctx, a, 16, fn); | ||
115 | } | ||
116 | |||
117 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, | ||
118 | |||
119 | static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) | ||
120 | { | ||
121 | - CHECK_SXE; | ||
122 | + if (!check_vec(ctx, 16)) { | ||
123 | + return true; | ||
124 | + } | ||
125 | + | ||
126 | return gen_vv_i_vl(ctx, a, 16, fn); | ||
127 | } | ||
128 | |||
129 | @@ -XXX,XX +XXX,XX @@ static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
130 | TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
131 | TCGv_i32 cd = tcg_constant_i32(a->cd); | ||
132 | |||
133 | - CHECK_SXE; | ||
134 | + if (!check_vec(ctx, 16)) { | ||
135 | + return true; | ||
136 | + } | ||
137 | + | ||
138 | func(cpu_env, cd, vj); | ||
139 | return true; | ||
140 | } | ||
141 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
142 | void (*func)(unsigned, uint32_t, uint32_t, | ||
143 | uint32_t, uint32_t, uint32_t)) | ||
144 | { | ||
145 | - CHECK_SXE; | ||
146 | + if (!check_vec(ctx, 16)) { | ||
147 | + return true; | ||
148 | + } | ||
149 | + | ||
150 | return gvec_vvv_vl(ctx, a, 16, mop, func); | ||
151 | } | ||
152 | |||
153 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, | ||
154 | void (*func)(unsigned, uint32_t, uint32_t, | ||
155 | uint32_t, uint32_t)) | ||
156 | { | ||
157 | - CHECK_SXE; | ||
158 | + if (!check_vec(ctx, 16)) { | ||
159 | + return true; | ||
160 | + } | ||
161 | + | ||
162 | return gvec_vv_vl(ctx, a, 16, mop, func); | ||
163 | } | ||
164 | |||
165 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | ||
166 | void (*func)(unsigned, uint32_t, uint32_t, | ||
167 | int64_t, uint32_t, uint32_t)) | ||
168 | { | ||
169 | - CHECK_SXE; | ||
170 | + if (!check_vec(ctx, 16)) { | ||
171 | + return true; | ||
172 | + } | ||
173 | + | ||
174 | return gvec_vv_i_vl(ctx, a, 16, mop, func); | ||
175 | } | ||
176 | |||
177 | @@ -XXX,XX +XXX,XX @@ static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a, | ||
178 | |||
179 | static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) | ||
180 | { | ||
181 | - CHECK_SXE; | ||
182 | + if (!check_vec(ctx, 16)) { | ||
183 | + return true; | ||
184 | + } | ||
185 | + | ||
186 | return gvec_subi_vl(ctx, a, 16, mop); | ||
187 | } | ||
188 | |||
189 | @@ -XXX,XX +XXX,XX @@ static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \ | ||
190 | return false; \ | ||
191 | } \ | ||
192 | \ | ||
193 | - CHECK_SXE; \ | ||
194 | + if (!check_vec(ctx, 16)) { \ | ||
195 | + return true; \ | ||
196 | + } \ | ||
197 | \ | ||
198 | rh = tcg_temp_new_i64(); \ | ||
199 | rl = tcg_temp_new_i64(); \ | ||
200 | @@ -XXX,XX +XXX,XX @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a) | ||
201 | return false; | ||
202 | } | ||
203 | |||
204 | - CHECK_SXE; | ||
205 | + if (!check_vec(ctx, 16)) { | ||
206 | + return true; | ||
207 | + } | ||
208 | |||
209 | sel = (a->imm >> 12) & 0x1; | ||
210 | |||
211 | @@ -XXX,XX +XXX,XX @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a) | ||
212 | return false; | ||
213 | } | ||
214 | |||
215 | - CHECK_SXE; | ||
216 | + if (!check_vec(ctx, 16)) { | ||
217 | + return true; | ||
218 | + } | ||
219 | |||
220 | vd_ofs = vec_full_offset(a->vd); | ||
221 | vj_ofs = vec_full_offset(a->vj); | ||
222 | @@ -XXX,XX +XXX,XX @@ static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
223 | { | ||
224 | uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
225 | |||
226 | - CHECK_SXE; | ||
227 | + if (!check_vec(ctx, 16)) { | ||
228 | + return true; | ||
229 | + } | ||
230 | |||
231 | vd_ofs = vec_full_offset(a->vd); | ||
232 | vj_ofs = vec_full_offset(a->vj); | ||
233 | @@ -XXX,XX +XXX,XX @@ static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
234 | { \ | ||
235 | uint32_t vd_ofs, vj_ofs; \ | ||
236 | \ | ||
237 | - CHECK_SXE; \ | ||
238 | + if (!check_vec(ctx, 16)) { \ | ||
239 | + return true; \ | ||
240 | + } \ | ||
241 | \ | ||
242 | static const TCGOpcode vecop_list[] = { \ | ||
243 | INDEX_op_cmp_vec, 0 \ | ||
244 | @@ -XXX,XX +XXX,XX @@ static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
245 | { \ | ||
246 | uint32_t vd_ofs, vj_ofs; \ | ||
247 | \ | ||
248 | - CHECK_SXE; \ | ||
249 | + if (!check_vec(ctx, 16)) { \ | ||
250 | + return true; \ | ||
251 | + } \ | ||
252 | \ | ||
253 | static const TCGOpcode vecop_list[] = { \ | ||
254 | INDEX_op_cmp_vec, 0 \ | ||
255 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a) | ||
256 | return false; | ||
257 | } | ||
258 | |||
259 | - CHECK_SXE; | ||
260 | + if (!check_vec(ctx, 16)) { | ||
261 | + return true; | ||
262 | + } | ||
263 | |||
264 | fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); | ||
265 | flags = get_fcmp_flags(a->fcond >> 1); | ||
266 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a) | ||
267 | return false; | ||
268 | } | ||
269 | |||
270 | - CHECK_SXE; | ||
271 | + if (!check_vec(ctx, 16)) { | ||
272 | + return true; | ||
273 | + } | ||
274 | |||
275 | fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); | ||
276 | flags = get_fcmp_flags(a->fcond >> 1); | ||
277 | @@ -XXX,XX +XXX,XX @@ static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a) | ||
278 | return false; | ||
279 | } | ||
280 | |||
281 | - CHECK_SXE; | ||
282 | + if (!check_vec(ctx, 16)) { | ||
283 | + return true; | ||
284 | + } | ||
285 | |||
286 | tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va), | ||
287 | vec_full_offset(a->vk), vec_full_offset(a->vj), | ||
288 | @@ -XXX,XX +XXX,XX @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a) | ||
289 | return false; | ||
290 | } | ||
291 | |||
292 | - CHECK_SXE; | ||
293 | + if (!check_vec(ctx, 16)) { | ||
294 | + return true; | ||
295 | + } | ||
296 | |||
297 | tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj), | ||
298 | 16, ctx->vl/8, a->imm, &op); | ||
299 | @@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \ | ||
300 | return false; \ | ||
301 | } \ | ||
302 | \ | ||
303 | - CHECK_SXE; \ | ||
304 | + if (!check_vec(ctx, 16)) { \ | ||
305 | + return true; \ | ||
306 | + } \ | ||
307 | + \ | ||
308 | tcg_gen_or_i64(t1, al, ah); \ | ||
309 | tcg_gen_setcondi_i64(COND, t1, t1, 0); \ | ||
310 | tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ | ||
311 | @@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a) | ||
312 | return false; | ||
313 | } | ||
314 | |||
315 | - CHECK_SXE; | ||
316 | + if (!check_vec(ctx, 16)) { | ||
317 | + return true; | ||
318 | + } | ||
319 | + | ||
320 | tcg_gen_st8_i64(src, cpu_env, | ||
321 | offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm))); | ||
322 | return true; | ||
323 | @@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a) | ||
324 | return false; | ||
325 | } | ||
326 | |||
327 | - CHECK_SXE; | ||
328 | + if (!check_vec(ctx, 16)) { | ||
329 | + return true; | ||
330 | + } | ||
331 | + | ||
332 | tcg_gen_st16_i64(src, cpu_env, | ||
333 | offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm))); | ||
334 | return true; | ||
335 | @@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a) | ||
336 | return false; | ||
337 | } | ||
338 | |||
339 | - CHECK_SXE; | ||
340 | + if (!check_vec(ctx, 16)) { | ||
341 | + return true; | ||
342 | + } | ||
343 | + | ||
344 | tcg_gen_st32_i64(src, cpu_env, | ||
345 | offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm))); | ||
346 | return true; | ||
347 | @@ -XXX,XX +XXX,XX @@ static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a) | ||
348 | return false; | ||
349 | } | ||
350 | |||
351 | - CHECK_SXE; | ||
352 | + if (!check_vec(ctx, 16)) { | ||
353 | + return true; | ||
354 | + } | ||
355 | + | ||
356 | tcg_gen_st_i64(src, cpu_env, | ||
357 | offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm))); | ||
358 | return true; | ||
359 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a) | ||
360 | return false; | ||
361 | } | ||
362 | |||
363 | - CHECK_SXE; | ||
364 | + if (!check_vec(ctx, 16)) { | ||
365 | + return true; | ||
366 | + } | ||
367 | + | ||
368 | tcg_gen_ld8s_i64(dst, cpu_env, | ||
369 | offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
370 | return true; | ||
371 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a) | ||
372 | return false; | ||
373 | } | ||
374 | |||
375 | - CHECK_SXE; | ||
376 | + if (!check_vec(ctx, 16)) { | ||
377 | + return true; | ||
378 | + } | ||
379 | + | ||
380 | tcg_gen_ld16s_i64(dst, cpu_env, | ||
381 | offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
382 | return true; | ||
383 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a) | ||
384 | return false; | ||
385 | } | ||
386 | |||
387 | - CHECK_SXE; | ||
388 | + if (!check_vec(ctx, 16)) { | ||
389 | + return true; | ||
390 | + } | ||
391 | + | ||
392 | tcg_gen_ld32s_i64(dst, cpu_env, | ||
393 | offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
394 | return true; | ||
395 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a) | ||
396 | return false; | ||
397 | } | ||
398 | |||
399 | - CHECK_SXE; | ||
400 | + if (!check_vec(ctx, 16)) { | ||
401 | + return true; | ||
402 | + } | ||
403 | + | ||
404 | tcg_gen_ld_i64(dst, cpu_env, | ||
405 | offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
406 | return true; | ||
407 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a) | ||
408 | return false; | ||
409 | } | ||
410 | |||
411 | - CHECK_SXE; | ||
412 | + if (!check_vec(ctx, 16)) { | ||
413 | + return true; | ||
414 | + } | ||
415 | + | ||
416 | tcg_gen_ld8u_i64(dst, cpu_env, | ||
417 | offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
418 | return true; | ||
419 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a) | ||
420 | return false; | ||
421 | } | ||
422 | |||
423 | - CHECK_SXE; | ||
424 | + if (!check_vec(ctx, 16)) { | ||
425 | + return true; | ||
426 | + } | ||
427 | + | ||
428 | tcg_gen_ld16u_i64(dst, cpu_env, | ||
429 | offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
430 | return true; | ||
431 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a) | ||
432 | return false; | ||
433 | } | ||
434 | |||
435 | - CHECK_SXE; | ||
436 | + if (!check_vec(ctx, 16)) { | ||
437 | + return true; | ||
438 | + } | ||
439 | + | ||
440 | tcg_gen_ld32u_i64(dst, cpu_env, | ||
441 | offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
442 | return true; | ||
443 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a) | ||
444 | return false; | ||
445 | } | ||
446 | |||
447 | - CHECK_SXE; | ||
448 | + if (!check_vec(ctx, 16)) { | ||
449 | + return true; | ||
450 | + } | ||
451 | + | ||
452 | tcg_gen_ld_i64(dst, cpu_env, | ||
453 | offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
454 | return true; | ||
455 | @@ -XXX,XX +XXX,XX @@ static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) | ||
456 | return false; | ||
457 | } | ||
458 | |||
459 | - CHECK_SXE; | ||
460 | + if (!check_vec(ctx, 16)) { | ||
461 | + return true; | ||
462 | + } | ||
463 | |||
464 | tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), | ||
465 | 16, ctx->vl/8, src); | ||
466 | @@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) | ||
467 | return false; | ||
468 | } | ||
469 | |||
470 | - CHECK_SXE; | ||
471 | + if (!check_vec(ctx, 16)) { | ||
472 | + return true; | ||
473 | + } | ||
474 | + | ||
475 | tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd), | ||
476 | offsetof(CPULoongArchState, | ||
477 | fpr[a->vj].vreg.B((a->imm))), | ||
478 | @@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a) | ||
479 | return false; | ||
480 | } | ||
481 | |||
482 | - CHECK_SXE; | ||
483 | + if (!check_vec(ctx, 16)) { | ||
484 | + return true; | ||
485 | + } | ||
486 | + | ||
487 | tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd), | ||
488 | offsetof(CPULoongArchState, | ||
489 | fpr[a->vj].vreg.H((a->imm))), | ||
490 | @@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a) | ||
491 | return false; | ||
492 | } | ||
493 | |||
494 | - CHECK_SXE; | ||
495 | + if (!check_vec(ctx, 16)) { | ||
496 | + return true; | ||
497 | + } | ||
498 | + | ||
499 | tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd), | ||
500 | offsetof(CPULoongArchState, | ||
501 | fpr[a->vj].vreg.W((a->imm))), | ||
502 | @@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) | ||
503 | return false; | ||
504 | } | ||
505 | |||
506 | - CHECK_SXE; | ||
507 | + if (!check_vec(ctx, 16)) { | ||
508 | + return true; | ||
509 | + } | ||
510 | + | ||
511 | tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd), | ||
512 | offsetof(CPULoongArchState, | ||
513 | fpr[a->vj].vreg.D((a->imm))), | ||
514 | @@ -XXX,XX +XXX,XX @@ static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, | ||
515 | return false; | ||
516 | } | ||
517 | |||
518 | - CHECK_SXE; | ||
519 | + if (!check_vec(ctx, 16)) { | ||
520 | + return true; | ||
521 | + } | ||
522 | |||
523 | tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1); | ||
524 | tcg_gen_shli_i64(t0, t0, vece); | ||
525 | @@ -XXX,XX +XXX,XX @@ static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a) | ||
526 | return false; | ||
527 | } | ||
528 | |||
529 | - CHECK_SXE; | ||
530 | + if (!check_vec(ctx, 16)) { | ||
531 | + return true; | ||
532 | + } | ||
533 | |||
534 | desthigh = tcg_temp_new_i64(); | ||
535 | destlow = tcg_temp_new_i64(); | ||
536 | @@ -XXX,XX +XXX,XX @@ static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a) | ||
537 | return false; | ||
538 | } | ||
539 | |||
540 | - CHECK_SXE; | ||
541 | + if (!check_vec(ctx, 16)) { | ||
542 | + return true; | ||
543 | + } | ||
544 | |||
545 | desthigh = tcg_temp_new_i64(); | ||
546 | destlow = tcg_temp_new_i64(); | ||
547 | @@ -XXX,XX +XXX,XX @@ static bool trans_vld(DisasContext *ctx, arg_vr_i *a) | ||
548 | return false; | ||
549 | } | ||
550 | |||
551 | - CHECK_SXE; | ||
552 | + if (!check_vec(ctx, 16)) { | ||
553 | + return true; | ||
554 | + } | ||
555 | |||
556 | addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
557 | val = tcg_temp_new_i128(); | ||
558 | @@ -XXX,XX +XXX,XX @@ static bool trans_vst(DisasContext *ctx, arg_vr_i *a) | ||
559 | return false; | ||
560 | } | ||
561 | |||
562 | - CHECK_SXE; | ||
563 | + if (!check_vec(ctx, 16)) { | ||
564 | + return true; | ||
565 | + } | ||
566 | |||
567 | addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
568 | val = tcg_temp_new_i128(); | ||
569 | @@ -XXX,XX +XXX,XX @@ static bool trans_vldx(DisasContext *ctx, arg_vrr *a) | ||
570 | return false; | ||
571 | } | ||
572 | |||
573 | - CHECK_SXE; | ||
574 | + if (!check_vec(ctx, 16)) { | ||
575 | + return true; | ||
576 | + } | ||
577 | |||
578 | src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
579 | src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
580 | @@ -XXX,XX +XXX,XX @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a) | ||
581 | return false; | ||
582 | } | ||
583 | |||
584 | - CHECK_SXE; | ||
585 | + if (!check_vec(ctx, 16)) { | ||
586 | + return true; | ||
587 | + } | ||
588 | |||
589 | src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
590 | src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
591 | @@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \ | ||
592 | return false; \ | ||
593 | } \ | ||
594 | \ | ||
595 | - CHECK_SXE; \ | ||
596 | + if (!check_vec(ctx, 16)) { \ | ||
597 | + return true; \ | ||
598 | + } \ | ||
599 | \ | ||
600 | addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
601 | val = tcg_temp_new_i64(); \ | ||
602 | @@ -XXX,XX +XXX,XX @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \ | ||
603 | return false; \ | ||
604 | } \ | ||
605 | \ | ||
606 | - CHECK_SXE; \ | ||
607 | + if (!check_vec(ctx, 16)) { \ | ||
608 | + return true; \ | ||
609 | + } \ | ||
610 | \ | ||
611 | addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
612 | val = tcg_temp_new_i64(); \ | ||
613 | -- | ||
614 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-12-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/cpu.h | 24 ++++++++++++---------- | ||
6 | target/loongarch/internals.h | 22 -------------------- | ||
7 | target/loongarch/vec.h | 33 ++++++++++++++++++++++++++++++ | ||
8 | linux-user/loongarch64/signal.c | 1 + | ||
9 | target/loongarch/cpu.c | 1 + | ||
10 | target/loongarch/gdbstub.c | 1 + | ||
11 | target/loongarch/machine.c | 36 ++++++++++++++++++++++++++++++++- | ||
12 | target/loongarch/translate.c | 1 + | ||
13 | target/loongarch/vec_helper.c | 1 + | ||
14 | 9 files changed, 86 insertions(+), 34 deletions(-) | ||
15 | create mode 100644 target/loongarch/vec.h | ||
1 | 16 | ||
17 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/cpu.h | ||
20 | +++ b/target/loongarch/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ FIELD(TLB_MISC, ASID, 1, 10) | ||
22 | FIELD(TLB_MISC, VPPN, 13, 35) | ||
23 | FIELD(TLB_MISC, PS, 48, 6) | ||
24 | |||
25 | -#define LSX_LEN (128) | ||
26 | +#define LSX_LEN (128) | ||
27 | +#define LASX_LEN (256) | ||
28 | + | ||
29 | typedef union VReg { | ||
30 | - int8_t B[LSX_LEN / 8]; | ||
31 | - int16_t H[LSX_LEN / 16]; | ||
32 | - int32_t W[LSX_LEN / 32]; | ||
33 | - int64_t D[LSX_LEN / 64]; | ||
34 | - uint8_t UB[LSX_LEN / 8]; | ||
35 | - uint16_t UH[LSX_LEN / 16]; | ||
36 | - uint32_t UW[LSX_LEN / 32]; | ||
37 | - uint64_t UD[LSX_LEN / 64]; | ||
38 | - Int128 Q[LSX_LEN / 128]; | ||
39 | -}VReg; | ||
40 | + int8_t B[LASX_LEN / 8]; | ||
41 | + int16_t H[LASX_LEN / 16]; | ||
42 | + int32_t W[LASX_LEN / 32]; | ||
43 | + int64_t D[LASX_LEN / 64]; | ||
44 | + uint8_t UB[LASX_LEN / 8]; | ||
45 | + uint16_t UH[LASX_LEN / 16]; | ||
46 | + uint32_t UW[LASX_LEN / 32]; | ||
47 | + uint64_t UD[LASX_LEN / 64]; | ||
48 | + Int128 Q[LASX_LEN / 128]; | ||
49 | +} VReg; | ||
50 | |||
51 | typedef union fpr_t fpr_t; | ||
52 | union fpr_t { | ||
53 | diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/loongarch/internals.h | ||
56 | +++ b/target/loongarch/internals.h | ||
57 | @@ -XXX,XX +XXX,XX @@ | ||
58 | /* Global bit for huge page */ | ||
59 | #define LOONGARCH_HGLOBAL_SHIFT 12 | ||
60 | |||
61 | -#if HOST_BIG_ENDIAN | ||
62 | -#define B(x) B[15 - (x)] | ||
63 | -#define H(x) H[7 - (x)] | ||
64 | -#define W(x) W[3 - (x)] | ||
65 | -#define D(x) D[1 - (x)] | ||
66 | -#define UB(x) UB[15 - (x)] | ||
67 | -#define UH(x) UH[7 - (x)] | ||
68 | -#define UW(x) UW[3 - (x)] | ||
69 | -#define UD(x) UD[1 -(x)] | ||
70 | -#define Q(x) Q[x] | ||
71 | -#else | ||
72 | -#define B(x) B[x] | ||
73 | -#define H(x) H[x] | ||
74 | -#define W(x) W[x] | ||
75 | -#define D(x) D[x] | ||
76 | -#define UB(x) UB[x] | ||
77 | -#define UH(x) UH[x] | ||
78 | -#define UW(x) UW[x] | ||
79 | -#define UD(x) UD[x] | ||
80 | -#define Q(x) Q[x] | ||
81 | -#endif | ||
82 | - | ||
83 | void loongarch_translate_init(void); | ||
84 | |||
85 | void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags); | ||
86 | diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h | ||
87 | new file mode 100644 | ||
88 | index XXXXXXX..XXXXXXX | ||
89 | --- /dev/null | ||
90 | +++ b/target/loongarch/vec.h | ||
91 | @@ -XXX,XX +XXX,XX @@ | ||
92 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
93 | +/* | ||
94 | + * QEMU LoongArch vector utilitites | ||
95 | + * | ||
96 | + * Copyright (c) 2023 Loongson Technology Corporation Limited | ||
97 | + */ | ||
98 | + | ||
99 | +#ifndef LOONGARCH_VEC_H | ||
100 | +#define LOONGARCH_VEC_H | ||
101 | + | ||
102 | +#if HOST_BIG_ENDIAN | ||
103 | +#define B(x) B[(x) ^ 15] | ||
104 | +#define H(x) H[(x) ^ 7] | ||
105 | +#define W(x) W[(x) ^ 3] | ||
106 | +#define D(x) D[(x) ^ 1] | ||
107 | +#define UB(x) UB[(x) ^ 15] | ||
108 | +#define UH(x) UH[(x) ^ 7] | ||
109 | +#define UW(x) UW[(x) ^ 3] | ||
110 | +#define UD(x) UD[(x) ^ 1] | ||
111 | +#define Q(x) Q[x] | ||
112 | +#else | ||
113 | +#define B(x) B[x] | ||
114 | +#define H(x) H[x] | ||
115 | +#define W(x) W[x] | ||
116 | +#define D(x) D[x] | ||
117 | +#define UB(x) UB[x] | ||
118 | +#define UH(x) UH[x] | ||
119 | +#define UW(x) UW[x] | ||
120 | +#define UD(x) UD[x] | ||
121 | +#define Q(x) Q[x] | ||
122 | +#endif /* HOST_BIG_ENDIAN */ | ||
123 | + | ||
124 | +#endif /* LOONGARCH_VEC_H */ | ||
125 | diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c | ||
126 | index XXXXXXX..XXXXXXX 100644 | ||
127 | --- a/linux-user/loongarch64/signal.c | ||
128 | +++ b/linux-user/loongarch64/signal.c | ||
129 | @@ -XXX,XX +XXX,XX @@ | ||
130 | #include "linux-user/trace.h" | ||
131 | |||
132 | #include "target/loongarch/internals.h" | ||
133 | +#include "target/loongarch/vec.h" | ||
134 | |||
135 | /* FP context was used */ | ||
136 | #define SC_USED_FP (1 << 0) | ||
137 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/target/loongarch/cpu.c | ||
140 | +++ b/target/loongarch/cpu.c | ||
141 | @@ -XXX,XX +XXX,XX @@ | ||
142 | #include "cpu-csr.h" | ||
143 | #include "sysemu/reset.h" | ||
144 | #include "tcg/tcg.h" | ||
145 | +#include "vec.h" | ||
146 | |||
147 | const char * const regnames[32] = { | ||
148 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", | ||
149 | diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c | ||
150 | index XXXXXXX..XXXXXXX 100644 | ||
151 | --- a/target/loongarch/gdbstub.c | ||
152 | +++ b/target/loongarch/gdbstub.c | ||
153 | @@ -XXX,XX +XXX,XX @@ | ||
154 | #include "internals.h" | ||
155 | #include "exec/gdbstub.h" | ||
156 | #include "gdbstub/helpers.h" | ||
157 | +#include "vec.h" | ||
158 | |||
159 | uint64_t read_fcc(CPULoongArchState *env) | ||
160 | { | ||
161 | diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c | ||
162 | index XXXXXXX..XXXXXXX 100644 | ||
163 | --- a/target/loongarch/machine.c | ||
164 | +++ b/target/loongarch/machine.c | ||
165 | @@ -XXX,XX +XXX,XX @@ | ||
166 | #include "qemu/osdep.h" | ||
167 | #include "cpu.h" | ||
168 | #include "migration/cpu.h" | ||
169 | -#include "internals.h" | ||
170 | +#include "vec.h" | ||
171 | |||
172 | static const VMStateDescription vmstate_fpu_reg = { | ||
173 | .name = "fpu_reg", | ||
174 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_lsx = { | ||
175 | }, | ||
176 | }; | ||
177 | |||
178 | +static const VMStateDescription vmstate_lasxh_reg = { | ||
179 | + .name = "lasxh_reg", | ||
180 | + .version_id = 1, | ||
181 | + .minimum_version_id = 1, | ||
182 | + .fields = (VMStateField[]) { | ||
183 | + VMSTATE_UINT64(UD(2), VReg), | ||
184 | + VMSTATE_UINT64(UD(3), VReg), | ||
185 | + VMSTATE_END_OF_LIST() | ||
186 | + } | ||
187 | +}; | ||
188 | + | ||
189 | +#define VMSTATE_LASXH_REGS(_field, _state, _start) \ | ||
190 | + VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \ | ||
191 | + vmstate_lasxh_reg, fpr_t) | ||
192 | + | ||
193 | +static bool lasx_needed(void *opaque) | ||
194 | +{ | ||
195 | + LoongArchCPU *cpu = opaque; | ||
196 | + | ||
197 | + return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LASX); | ||
198 | +} | ||
199 | + | ||
200 | +static const VMStateDescription vmstate_lasx = { | ||
201 | + .name = "cpu/lasx", | ||
202 | + .version_id = 1, | ||
203 | + .minimum_version_id = 1, | ||
204 | + .needed = lasx_needed, | ||
205 | + .fields = (VMStateField[]) { | ||
206 | + VMSTATE_LASXH_REGS(env.fpr, LoongArchCPU, 0), | ||
207 | + VMSTATE_END_OF_LIST() | ||
208 | + }, | ||
209 | +}; | ||
210 | + | ||
211 | /* TLB state */ | ||
212 | const VMStateDescription vmstate_tlb = { | ||
213 | .name = "cpu/tlb", | ||
214 | @@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_loongarch_cpu = { | ||
215 | .subsections = (const VMStateDescription*[]) { | ||
216 | &vmstate_fpu, | ||
217 | &vmstate_lsx, | ||
218 | + &vmstate_lasx, | ||
219 | NULL | ||
220 | } | ||
221 | }; | ||
222 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
223 | index XXXXXXX..XXXXXXX 100644 | ||
224 | --- a/target/loongarch/translate.c | ||
225 | +++ b/target/loongarch/translate.c | ||
226 | @@ -XXX,XX +XXX,XX @@ | ||
227 | #include "fpu/softfloat.h" | ||
228 | #include "translate.h" | ||
229 | #include "internals.h" | ||
230 | +#include "vec.h" | ||
231 | |||
232 | /* Global register indices */ | ||
233 | TCGv cpu_gpr[32], cpu_pc; | ||
234 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
235 | index XXXXXXX..XXXXXXX 100644 | ||
236 | --- a/target/loongarch/vec_helper.c | ||
237 | +++ b/target/loongarch/vec_helper.c | ||
238 | @@ -XXX,XX +XXX,XX @@ | ||
239 | #include "fpu/softfloat.h" | ||
240 | #include "internals.h" | ||
241 | #include "tcg/tcg.h" | ||
242 | +#include "vec.h" | ||
243 | |||
244 | #define DO_ADD(a, b) (a + b) | ||
245 | #define DO_SUB(a, b) (a - b) | ||
246 | -- | ||
247 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
3 | Message-Id: <20230914022645.1151356-13-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/cpu.h | 2 ++ | ||
6 | target/loongarch/cpu.c | 2 ++ | ||
7 | target/loongarch/insn_trans/trans_vec.c.inc | 6 ++++++ | ||
8 | 3 files changed, 10 insertions(+) | ||
1 | 9 | ||
10 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/cpu.h | ||
13 | +++ b/target/loongarch/cpu.h | ||
14 | @@ -XXX,XX +XXX,XX @@ static inline void set_pc(CPULoongArchState *env, uint64_t value) | ||
15 | #define HW_FLAGS_CRMD_PG R_CSR_CRMD_PG_MASK /* 0x10 */ | ||
16 | #define HW_FLAGS_EUEN_FPE 0x04 | ||
17 | #define HW_FLAGS_EUEN_SXE 0x08 | ||
18 | +#define HW_FLAGS_EUEN_ASXE 0x10 | ||
19 | #define HW_FLAGS_VA32 0x20 | ||
20 | |||
21 | static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, | ||
22 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, | ||
23 | *flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); | ||
24 | *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; | ||
25 | *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; | ||
26 | + *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE; | ||
27 | *flags |= is_va32(env) * HW_FLAGS_VA32; | ||
28 | } | ||
29 | |||
30 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/loongarch/cpu.c | ||
33 | +++ b/target/loongarch/cpu.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static const char * const excp_names[] = { | ||
35 | [EXCCODE_DBP] = "Debug breakpoint", | ||
36 | [EXCCODE_BCE] = "Bound Check Exception", | ||
37 | [EXCCODE_SXD] = "128 bit vector instructions Disable exception", | ||
38 | + [EXCCODE_ASXD] = "256 bit vector instructions Disable exception", | ||
39 | }; | ||
40 | |||
41 | const char *loongarch_exception_name(int32_t exception) | ||
42 | @@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_do_interrupt(CPUState *cs) | ||
43 | case EXCCODE_FPD: | ||
44 | case EXCCODE_FPE: | ||
45 | case EXCCODE_SXD: | ||
46 | + case EXCCODE_ASXD: | ||
47 | env->CSR_BADV = env->pc; | ||
48 | QEMU_FALLTHROUGH; | ||
49 | case EXCCODE_BCE: | ||
50 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
53 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz) | ||
55 | generate_exception(ctx, EXCCODE_SXD); | ||
56 | return false; | ||
57 | } | ||
58 | + | ||
59 | + if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) { | ||
60 | + generate_exception(ctx, EXCCODE_ASXD); | ||
61 | + return false; | ||
62 | + } | ||
63 | + | ||
64 | return true; | ||
65 | } | ||
66 | |||
67 | -- | ||
68 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-14-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/translate.h | 1 + | ||
6 | 1 file changed, 1 insertion(+) | ||
1 | 7 | ||
8 | diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/loongarch/translate.h | ||
11 | +++ b/target/loongarch/translate.h | ||
12 | @@ -XXX,XX +XXX,XX @@ | ||
13 | #define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW)) | ||
14 | #define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM)) | ||
15 | #define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX)) | ||
16 | +#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX)) | ||
17 | #define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR)) | ||
18 | |||
19 | /* | ||
20 | -- | ||
21 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVADD.{B/H/W/D/Q}; | ||
3 | - XVSUB.{B/H/W/D/Q}. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-15-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 14 +++ | ||
10 | target/loongarch/disas.c | 23 +++++ | ||
11 | target/loongarch/translate.c | 4 + | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 107 +++++++++++++------- | ||
13 | 4 files changed, 109 insertions(+), 39 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1 | ||
20 | vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2 | ||
21 | vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3 | ||
22 | vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4 | ||
23 | + | ||
24 | +# | ||
25 | +# LoongArch LASX instructions | ||
26 | +# | ||
27 | +xvadd_b 0111 01000000 10100 ..... ..... ..... @vvv | ||
28 | +xvadd_h 0111 01000000 10101 ..... ..... ..... @vvv | ||
29 | +xvadd_w 0111 01000000 10110 ..... ..... ..... @vvv | ||
30 | +xvadd_d 0111 01000000 10111 ..... ..... ..... @vvv | ||
31 | +xvadd_q 0111 01010010 11010 ..... ..... ..... @vvv | ||
32 | +xvsub_b 0111 01000000 11000 ..... ..... ..... @vvv | ||
33 | +xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv | ||
34 | +xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv | ||
35 | +xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv | ||
36 | +xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv | ||
37 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/target/loongarch/disas.c | ||
40 | +++ b/target/loongarch/disas.c | ||
41 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vstelm_d, vr_ii) | ||
42 | INSN_LSX(vstelm_w, vr_ii) | ||
43 | INSN_LSX(vstelm_h, vr_ii) | ||
44 | INSN_LSX(vstelm_b, vr_ii) | ||
45 | + | ||
46 | +#define INSN_LASX(insn, type) \ | ||
47 | +static bool trans_##insn(DisasContext *ctx, arg_##type * a) \ | ||
48 | +{ \ | ||
49 | + output_##type ## _x(ctx, a, #insn); \ | ||
50 | + return true; \ | ||
51 | +} | ||
52 | + | ||
53 | +static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic) | ||
54 | +{ | ||
55 | + output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk); | ||
56 | +} | ||
57 | + | ||
58 | +INSN_LASX(xvadd_b, vvv) | ||
59 | +INSN_LASX(xvadd_h, vvv) | ||
60 | +INSN_LASX(xvadd_w, vvv) | ||
61 | +INSN_LASX(xvadd_d, vvv) | ||
62 | +INSN_LASX(xvadd_q, vvv) | ||
63 | +INSN_LASX(xvsub_b, vvv) | ||
64 | +INSN_LASX(xvsub_h, vvv) | ||
65 | +INSN_LASX(xvsub_w, vvv) | ||
66 | +INSN_LASX(xvsub_d, vvv) | ||
67 | +INSN_LASX(xvsub_q, vvv) | ||
68 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/translate.c | ||
71 | +++ b/target/loongarch/translate.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase, | ||
73 | ctx->vl = LSX_LEN; | ||
74 | } | ||
75 | |||
76 | + if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) { | ||
77 | + ctx->vl = LASX_LEN; | ||
78 | + } | ||
79 | + | ||
80 | ctx->la64 = is_la64(env); | ||
81 | ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0; | ||
82 | |||
83 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
86 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
87 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a, | ||
88 | uint32_t vj_ofs = vec_full_offset(a->vj); | ||
89 | uint32_t vk_ofs = vec_full_offset(a->vk); | ||
90 | |||
91 | + if (!check_vec(ctx, oprsz)) { | ||
92 | + return true; | ||
93 | + } | ||
94 | + | ||
95 | func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); | ||
96 | return true; | ||
97 | } | ||
98 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
99 | void (*func)(unsigned, uint32_t, uint32_t, | ||
100 | uint32_t, uint32_t, uint32_t)) | ||
101 | { | ||
102 | - if (!check_vec(ctx, 16)) { | ||
103 | - return true; | ||
104 | - } | ||
105 | - | ||
106 | return gvec_vvv_vl(ctx, a, 16, mop, func); | ||
107 | } | ||
108 | |||
109 | +static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
110 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
111 | + uint32_t, uint32_t, uint32_t)) | ||
112 | +{ | ||
113 | + return gvec_vvv_vl(ctx, a, 32, mop, func); | ||
114 | +} | ||
115 | |||
116 | static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a, | ||
117 | uint32_t oprsz, MemOp mop, | ||
118 | @@ -XXX,XX +XXX,XX @@ TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add) | ||
119 | TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add) | ||
120 | TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add) | ||
121 | TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add) | ||
122 | +TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add) | ||
123 | +TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add) | ||
124 | +TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add) | ||
125 | +TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add) | ||
126 | + | ||
127 | +static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
128 | + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, | ||
129 | + TCGv_i64, TCGv_i64, TCGv_i64)) | ||
130 | +{ | ||
131 | + int i; | ||
132 | + TCGv_i64 rh, rl, ah, al, bh, bl; | ||
133 | + | ||
134 | + if (!check_vec(ctx, oprsz)) { | ||
135 | + return true; | ||
136 | + } | ||
137 | |||
138 | -#define VADDSUB_Q(NAME) \ | ||
139 | -static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \ | ||
140 | -{ \ | ||
141 | - TCGv_i64 rh, rl, ah, al, bh, bl; \ | ||
142 | - \ | ||
143 | - if (!avail_LSX(ctx)) { \ | ||
144 | - return false; \ | ||
145 | - } \ | ||
146 | - \ | ||
147 | - if (!check_vec(ctx, 16)) { \ | ||
148 | - return true; \ | ||
149 | - } \ | ||
150 | - \ | ||
151 | - rh = tcg_temp_new_i64(); \ | ||
152 | - rl = tcg_temp_new_i64(); \ | ||
153 | - ah = tcg_temp_new_i64(); \ | ||
154 | - al = tcg_temp_new_i64(); \ | ||
155 | - bh = tcg_temp_new_i64(); \ | ||
156 | - bl = tcg_temp_new_i64(); \ | ||
157 | - \ | ||
158 | - get_vreg64(ah, a->vj, 1); \ | ||
159 | - get_vreg64(al, a->vj, 0); \ | ||
160 | - get_vreg64(bh, a->vk, 1); \ | ||
161 | - get_vreg64(bl, a->vk, 0); \ | ||
162 | - \ | ||
163 | - tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \ | ||
164 | - \ | ||
165 | - set_vreg64(rh, a->vd, 1); \ | ||
166 | - set_vreg64(rl, a->vd, 0); \ | ||
167 | - \ | ||
168 | - return true; \ | ||
169 | -} | ||
170 | - | ||
171 | -VADDSUB_Q(add) | ||
172 | -VADDSUB_Q(sub) | ||
173 | + rh = tcg_temp_new_i64(); | ||
174 | + rl = tcg_temp_new_i64(); | ||
175 | + ah = tcg_temp_new_i64(); | ||
176 | + al = tcg_temp_new_i64(); | ||
177 | + bh = tcg_temp_new_i64(); | ||
178 | + bl = tcg_temp_new_i64(); | ||
179 | + | ||
180 | + for (i = 0; i < oprsz / 16; i++) { | ||
181 | + get_vreg64(ah, a->vj, 1 + i * 2); | ||
182 | + get_vreg64(al, a->vj, i * 2); | ||
183 | + get_vreg64(bh, a->vk, 1 + i * 2); | ||
184 | + get_vreg64(bl, a->vk, i * 2); | ||
185 | + | ||
186 | + func(rl, rh, al, ah, bl, bh); | ||
187 | + | ||
188 | + set_vreg64(rh, a->vd, 1 + i * 2); | ||
189 | + set_vreg64(rl, a->vd, i * 2); | ||
190 | + } | ||
191 | + return true; | ||
192 | +} | ||
193 | + | ||
194 | +static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a, | ||
195 | + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, | ||
196 | + TCGv_i64, TCGv_i64, TCGv_i64)) | ||
197 | +{ | ||
198 | + return gen_vaddsub_q_vl(ctx, a, 16, func); | ||
199 | +} | ||
200 | + | ||
201 | +static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a, | ||
202 | + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, | ||
203 | + TCGv_i64, TCGv_i64, TCGv_i64)) | ||
204 | +{ | ||
205 | + return gen_vaddsub_q_vl(ctx, a, 32, func); | ||
206 | +} | ||
207 | |||
208 | TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub) | ||
209 | TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub) | ||
210 | TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub) | ||
211 | TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub) | ||
212 | +TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub) | ||
213 | +TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub) | ||
214 | +TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub) | ||
215 | +TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub) | ||
216 | + | ||
217 | +TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64) | ||
218 | +TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64) | ||
219 | +TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64) | ||
220 | +TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64) | ||
221 | |||
222 | TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi) | ||
223 | TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi) | ||
224 | -- | ||
225 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVREPLGR2VR.{B/H/W/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-16-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 +++++ | ||
9 | target/loongarch/disas.c | 10 +++++++++ | ||
10 | target/loongarch/insn_trans/trans_vec.c.inc | 25 +++++++++++++++------ | ||
11 | 3 files changed, 33 insertions(+), 7 deletions(-) | ||
12 | |||
13 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/insns.decode | ||
16 | +++ b/target/loongarch/insns.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv | ||
18 | xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv | ||
19 | xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv | ||
20 | xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv | ||
21 | + | ||
22 | +xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
23 | +xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
24 | +xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
25 | +xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr | ||
26 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/loongarch/disas.c | ||
29 | +++ b/target/loongarch/disas.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic) | ||
31 | output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk); | ||
32 | } | ||
33 | |||
34 | +static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic) | ||
35 | +{ | ||
36 | + output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj); | ||
37 | +} | ||
38 | + | ||
39 | INSN_LASX(xvadd_b, vvv) | ||
40 | INSN_LASX(xvadd_h, vvv) | ||
41 | INSN_LASX(xvadd_w, vvv) | ||
42 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsub_h, vvv) | ||
43 | INSN_LASX(xvsub_w, vvv) | ||
44 | INSN_LASX(xvsub_d, vvv) | ||
45 | INSN_LASX(xvsub_q, vvv) | ||
46 | + | ||
47 | +INSN_LASX(xvreplgr2vr_b, vr) | ||
48 | +INSN_LASX(xvreplgr2vr_h, vr) | ||
49 | +INSN_LASX(xvreplgr2vr_w, vr) | ||
50 | +INSN_LASX(xvreplgr2vr_d, vr) | ||
51 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
54 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
55 | @@ -XXX,XX +XXX,XX @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a) | ||
56 | return true; | ||
57 | } | ||
58 | |||
59 | -static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) | ||
60 | +static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a, | ||
61 | + uint32_t oprsz, MemOp mop) | ||
62 | { | ||
63 | TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
64 | |||
65 | - if (!avail_LSX(ctx)) { | ||
66 | - return false; | ||
67 | - } | ||
68 | - | ||
69 | - if (!check_vec(ctx, 16)) { | ||
70 | + if (!check_vec(ctx, oprsz)) { | ||
71 | return true; | ||
72 | } | ||
73 | |||
74 | tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), | ||
75 | - 16, ctx->vl/8, src); | ||
76 | + oprsz, ctx->vl/8, src); | ||
77 | return true; | ||
78 | } | ||
79 | |||
80 | +static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) | ||
81 | +{ | ||
82 | + return gvec_dup_vl(ctx, a, 16, mop); | ||
83 | +} | ||
84 | + | ||
85 | +static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop) | ||
86 | +{ | ||
87 | + return gvec_dup_vl(ctx, a, 32, mop); | ||
88 | +} | ||
89 | + | ||
90 | TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8) | ||
91 | TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16) | ||
92 | TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32) | ||
93 | TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64) | ||
94 | +TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8) | ||
95 | +TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16) | ||
96 | +TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32) | ||
97 | +TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64) | ||
98 | |||
99 | static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) | ||
100 | { | ||
101 | -- | ||
102 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVADDI.{B/H/W/D}U; | ||
3 | - XVSUBI.{B/H/W/D}U. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-17-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 9 ++++++ | ||
10 | target/loongarch/disas.c | 14 ++++++++ | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 36 ++++++++++++++++----- | ||
12 | 3 files changed, 51 insertions(+), 8 deletions(-) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv | ||
19 | xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv | ||
20 | xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv | ||
21 | |||
22 | +xvaddi_bu 0111 01101000 10100 ..... ..... ..... @vv_ui5 | ||
23 | +xvaddi_hu 0111 01101000 10101 ..... ..... ..... @vv_ui5 | ||
24 | +xvaddi_wu 0111 01101000 10110 ..... ..... ..... @vv_ui5 | ||
25 | +xvaddi_du 0111 01101000 10111 ..... ..... ..... @vv_ui5 | ||
26 | +xvsubi_bu 0111 01101000 11000 ..... ..... ..... @vv_ui5 | ||
27 | +xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5 | ||
28 | +xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5 | ||
29 | +xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5 | ||
30 | + | ||
31 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
32 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
33 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
34 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/loongarch/disas.c | ||
37 | +++ b/target/loongarch/disas.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic) | ||
39 | output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj); | ||
40 | } | ||
41 | |||
42 | +static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic) | ||
43 | +{ | ||
44 | + output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm); | ||
45 | +} | ||
46 | + | ||
47 | INSN_LASX(xvadd_b, vvv) | ||
48 | INSN_LASX(xvadd_h, vvv) | ||
49 | INSN_LASX(xvadd_w, vvv) | ||
50 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsub_w, vvv) | ||
51 | INSN_LASX(xvsub_d, vvv) | ||
52 | INSN_LASX(xvsub_q, vvv) | ||
53 | |||
54 | +INSN_LASX(xvaddi_bu, vv_i) | ||
55 | +INSN_LASX(xvaddi_hu, vv_i) | ||
56 | +INSN_LASX(xvaddi_wu, vv_i) | ||
57 | +INSN_LASX(xvaddi_du, vv_i) | ||
58 | +INSN_LASX(xvsubi_bu, vv_i) | ||
59 | +INSN_LASX(xvsubi_hu, vv_i) | ||
60 | +INSN_LASX(xvsubi_wu, vv_i) | ||
61 | +INSN_LASX(xvsubi_du, vv_i) | ||
62 | + | ||
63 | INSN_LASX(xvreplgr2vr_b, vr) | ||
64 | INSN_LASX(xvreplgr2vr_h, vr) | ||
65 | INSN_LASX(xvreplgr2vr_w, vr) | ||
66 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
69 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
70 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a, | ||
71 | uint32_t vd_ofs = vec_full_offset(a->vd); | ||
72 | uint32_t vj_ofs = vec_full_offset(a->vj); | ||
73 | |||
74 | + if (!check_vec(ctx, oprsz)) { | ||
75 | + return true; | ||
76 | + } | ||
77 | + | ||
78 | func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); | ||
79 | return true; | ||
80 | } | ||
81 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | ||
82 | void (*func)(unsigned, uint32_t, uint32_t, | ||
83 | int64_t, uint32_t, uint32_t)) | ||
84 | { | ||
85 | - if (!check_vec(ctx, 16)) { | ||
86 | - return true; | ||
87 | - } | ||
88 | - | ||
89 | return gvec_vv_i_vl(ctx, a, 16, mop, func); | ||
90 | } | ||
91 | |||
92 | +static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | ||
93 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
94 | + int64_t, uint32_t, uint32_t)) | ||
95 | +{ | ||
96 | + return gvec_vv_i_vl(ctx,a, 32, mop, func); | ||
97 | +} | ||
98 | + | ||
99 | static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a, | ||
100 | uint32_t oprsz, MemOp mop) | ||
101 | { | ||
102 | uint32_t vd_ofs = vec_full_offset(a->vd); | ||
103 | uint32_t vj_ofs = vec_full_offset(a->vj); | ||
104 | |||
105 | + if (!check_vec(ctx, oprsz)) { | ||
106 | + return true; | ||
107 | + } | ||
108 | + | ||
109 | tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8); | ||
110 | return true; | ||
111 | } | ||
112 | |||
113 | static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) | ||
114 | { | ||
115 | - if (!check_vec(ctx, 16)) { | ||
116 | - return true; | ||
117 | - } | ||
118 | - | ||
119 | return gvec_subi_vl(ctx, a, 16, mop); | ||
120 | } | ||
121 | |||
122 | +static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop) | ||
123 | +{ | ||
124 | + return gvec_subi_vl(ctx, a, 32, mop); | ||
125 | +} | ||
126 | + | ||
127 | TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add) | ||
128 | TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add) | ||
129 | TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add) | ||
130 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubi_bu, LSX, gvec_subi, MO_8) | ||
131 | TRANS(vsubi_hu, LSX, gvec_subi, MO_16) | ||
132 | TRANS(vsubi_wu, LSX, gvec_subi, MO_32) | ||
133 | TRANS(vsubi_du, LSX, gvec_subi, MO_64) | ||
134 | +TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi) | ||
135 | +TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi) | ||
136 | +TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi) | ||
137 | +TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi) | ||
138 | +TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8) | ||
139 | +TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16) | ||
140 | +TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32) | ||
141 | +TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64) | ||
142 | |||
143 | TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg) | ||
144 | TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg) | ||
145 | -- | ||
146 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVNEG.{B/H/W/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-18-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 +++++ | ||
9 | target/loongarch/disas.c | 10 ++++++++++ | ||
10 | target/loongarch/insn_trans/trans_vec.c.inc | 19 +++++++++++++++---- | ||
11 | 3 files changed, 30 insertions(+), 4 deletions(-) | ||
12 | |||
13 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/insns.decode | ||
16 | +++ b/target/loongarch/insns.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5 | ||
18 | xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5 | ||
19 | xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5 | ||
20 | |||
21 | +xvneg_b 0111 01101001 11000 01100 ..... ..... @vv | ||
22 | +xvneg_h 0111 01101001 11000 01101 ..... ..... @vv | ||
23 | +xvneg_w 0111 01101001 11000 01110 ..... ..... @vv | ||
24 | +xvneg_d 0111 01101001 11000 01111 ..... ..... @vv | ||
25 | + | ||
26 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
27 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
28 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
29 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/loongarch/disas.c | ||
32 | +++ b/target/loongarch/disas.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic) | ||
34 | output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm); | ||
35 | } | ||
36 | |||
37 | +static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic) | ||
38 | +{ | ||
39 | + output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj); | ||
40 | +} | ||
41 | + | ||
42 | INSN_LASX(xvadd_b, vvv) | ||
43 | INSN_LASX(xvadd_h, vvv) | ||
44 | INSN_LASX(xvadd_w, vvv) | ||
45 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsubi_hu, vv_i) | ||
46 | INSN_LASX(xvsubi_wu, vv_i) | ||
47 | INSN_LASX(xvsubi_du, vv_i) | ||
48 | |||
49 | +INSN_LASX(xvneg_b, vv) | ||
50 | +INSN_LASX(xvneg_h, vv) | ||
51 | +INSN_LASX(xvneg_w, vv) | ||
52 | +INSN_LASX(xvneg_d, vv) | ||
53 | + | ||
54 | INSN_LASX(xvreplgr2vr_b, vr) | ||
55 | INSN_LASX(xvreplgr2vr_h, vr) | ||
56 | INSN_LASX(xvreplgr2vr_w, vr) | ||
57 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
60 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
61 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a, | ||
62 | uint32_t vd_ofs = vec_full_offset(a->vd); | ||
63 | uint32_t vj_ofs = vec_full_offset(a->vj); | ||
64 | |||
65 | + if (!check_vec(ctx, oprsz)) { | ||
66 | + return true; | ||
67 | + } | ||
68 | + | ||
69 | func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8); | ||
70 | return true; | ||
71 | } | ||
72 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, | ||
73 | void (*func)(unsigned, uint32_t, uint32_t, | ||
74 | uint32_t, uint32_t)) | ||
75 | { | ||
76 | - if (!check_vec(ctx, 16)) { | ||
77 | - return true; | ||
78 | - } | ||
79 | - | ||
80 | return gvec_vv_vl(ctx, a, 16, mop, func); | ||
81 | } | ||
82 | |||
83 | +static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop, | ||
84 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
85 | + uint32_t, uint32_t)) | ||
86 | +{ | ||
87 | + return gvec_vv_vl(ctx, a, 32, mop, func); | ||
88 | +} | ||
89 | + | ||
90 | static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a, | ||
91 | uint32_t oprsz, MemOp mop, | ||
92 | void (*func)(unsigned, uint32_t, uint32_t, | ||
93 | @@ -XXX,XX +XXX,XX @@ TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg) | ||
94 | TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg) | ||
95 | TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg) | ||
96 | TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg) | ||
97 | +TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg) | ||
98 | +TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg) | ||
99 | +TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg) | ||
100 | +TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg) | ||
101 | |||
102 | TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd) | ||
103 | TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd) | ||
104 | -- | ||
105 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSADD.{B/H/W/D}[U]; | ||
3 | - XVSSUB.{B/H/W/D}[U]. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-19-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 18 ++++++++++++++++++ | ||
10 | target/loongarch/disas.c | 17 +++++++++++++++++ | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 17 +++++++++++++++++ | ||
12 | 3 files changed, 52 insertions(+) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvneg_h 0111 01101001 11000 01101 ..... ..... @vv | ||
19 | xvneg_w 0111 01101001 11000 01110 ..... ..... @vv | ||
20 | xvneg_d 0111 01101001 11000 01111 ..... ..... @vv | ||
21 | |||
22 | +xvsadd_b 0111 01000100 01100 ..... ..... ..... @vvv | ||
23 | +xvsadd_h 0111 01000100 01101 ..... ..... ..... @vvv | ||
24 | +xvsadd_w 0111 01000100 01110 ..... ..... ..... @vvv | ||
25 | +xvsadd_d 0111 01000100 01111 ..... ..... ..... @vvv | ||
26 | +xvsadd_bu 0111 01000100 10100 ..... ..... ..... @vvv | ||
27 | +xvsadd_hu 0111 01000100 10101 ..... ..... ..... @vvv | ||
28 | +xvsadd_wu 0111 01000100 10110 ..... ..... ..... @vvv | ||
29 | +xvsadd_du 0111 01000100 10111 ..... ..... ..... @vvv | ||
30 | + | ||
31 | +xvssub_b 0111 01000100 10000 ..... ..... ..... @vvv | ||
32 | +xvssub_h 0111 01000100 10001 ..... ..... ..... @vvv | ||
33 | +xvssub_w 0111 01000100 10010 ..... ..... ..... @vvv | ||
34 | +xvssub_d 0111 01000100 10011 ..... ..... ..... @vvv | ||
35 | +xvssub_bu 0111 01000100 11000 ..... ..... ..... @vvv | ||
36 | +xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv | ||
37 | +xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv | ||
38 | +xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv | ||
39 | + | ||
40 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
41 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
42 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
43 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/loongarch/disas.c | ||
46 | +++ b/target/loongarch/disas.c | ||
47 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvneg_h, vv) | ||
48 | INSN_LASX(xvneg_w, vv) | ||
49 | INSN_LASX(xvneg_d, vv) | ||
50 | |||
51 | +INSN_LASX(xvsadd_b, vvv) | ||
52 | +INSN_LASX(xvsadd_h, vvv) | ||
53 | +INSN_LASX(xvsadd_w, vvv) | ||
54 | +INSN_LASX(xvsadd_d, vvv) | ||
55 | +INSN_LASX(xvsadd_bu, vvv) | ||
56 | +INSN_LASX(xvsadd_hu, vvv) | ||
57 | +INSN_LASX(xvsadd_wu, vvv) | ||
58 | +INSN_LASX(xvsadd_du, vvv) | ||
59 | +INSN_LASX(xvssub_b, vvv) | ||
60 | +INSN_LASX(xvssub_h, vvv) | ||
61 | +INSN_LASX(xvssub_w, vvv) | ||
62 | +INSN_LASX(xvssub_d, vvv) | ||
63 | +INSN_LASX(xvssub_bu, vvv) | ||
64 | +INSN_LASX(xvssub_hu, vvv) | ||
65 | +INSN_LASX(xvssub_wu, vvv) | ||
66 | +INSN_LASX(xvssub_du, vvv) | ||
67 | + | ||
68 | INSN_LASX(xvreplgr2vr_b, vr) | ||
69 | INSN_LASX(xvreplgr2vr_h, vr) | ||
70 | INSN_LASX(xvreplgr2vr_w, vr) | ||
71 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
74 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
75 | @@ -XXX,XX +XXX,XX @@ TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub) | ||
76 | TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub) | ||
77 | TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub) | ||
78 | |||
79 | +TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd) | ||
80 | +TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd) | ||
81 | +TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd) | ||
82 | +TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd) | ||
83 | +TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd) | ||
84 | +TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd) | ||
85 | +TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd) | ||
86 | +TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd) | ||
87 | +TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub) | ||
88 | +TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub) | ||
89 | +TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub) | ||
90 | +TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub) | ||
91 | +TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub) | ||
92 | +TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub) | ||
93 | +TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub) | ||
94 | +TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub) | ||
95 | + | ||
96 | TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b) | ||
97 | TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h) | ||
98 | TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w) | ||
99 | -- | ||
100 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}; | ||
3 | - XVHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-20-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 18 +++++++++++ | ||
10 | target/loongarch/disas.c | 17 +++++++++++ | ||
11 | target/loongarch/vec_helper.c | 34 ++++++++++++++++----- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 30 +++++++++++++++--- | ||
13 | 4 files changed, 88 insertions(+), 11 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv | ||
20 | xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv | ||
21 | xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv | ||
22 | |||
23 | +xvhaddw_h_b 0111 01000101 01000 ..... ..... ..... @vvv | ||
24 | +xvhaddw_w_h 0111 01000101 01001 ..... ..... ..... @vvv | ||
25 | +xvhaddw_d_w 0111 01000101 01010 ..... ..... ..... @vvv | ||
26 | +xvhaddw_q_d 0111 01000101 01011 ..... ..... ..... @vvv | ||
27 | +xvhaddw_hu_bu 0111 01000101 10000 ..... ..... ..... @vvv | ||
28 | +xvhaddw_wu_hu 0111 01000101 10001 ..... ..... ..... @vvv | ||
29 | +xvhaddw_du_wu 0111 01000101 10010 ..... ..... ..... @vvv | ||
30 | +xvhaddw_qu_du 0111 01000101 10011 ..... ..... ..... @vvv | ||
31 | + | ||
32 | +xvhsubw_h_b 0111 01000101 01100 ..... ..... ..... @vvv | ||
33 | +xvhsubw_w_h 0111 01000101 01101 ..... ..... ..... @vvv | ||
34 | +xvhsubw_d_w 0111 01000101 01110 ..... ..... ..... @vvv | ||
35 | +xvhsubw_q_d 0111 01000101 01111 ..... ..... ..... @vvv | ||
36 | +xvhsubw_hu_bu 0111 01000101 10100 ..... ..... ..... @vvv | ||
37 | +xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv | ||
38 | +xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv | ||
39 | +xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv | ||
40 | + | ||
41 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
42 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
43 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
44 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/loongarch/disas.c | ||
47 | +++ b/target/loongarch/disas.c | ||
48 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssub_hu, vvv) | ||
49 | INSN_LASX(xvssub_wu, vvv) | ||
50 | INSN_LASX(xvssub_du, vvv) | ||
51 | |||
52 | +INSN_LASX(xvhaddw_h_b, vvv) | ||
53 | +INSN_LASX(xvhaddw_w_h, vvv) | ||
54 | +INSN_LASX(xvhaddw_d_w, vvv) | ||
55 | +INSN_LASX(xvhaddw_q_d, vvv) | ||
56 | +INSN_LASX(xvhaddw_hu_bu, vvv) | ||
57 | +INSN_LASX(xvhaddw_wu_hu, vvv) | ||
58 | +INSN_LASX(xvhaddw_du_wu, vvv) | ||
59 | +INSN_LASX(xvhaddw_qu_du, vvv) | ||
60 | +INSN_LASX(xvhsubw_h_b, vvv) | ||
61 | +INSN_LASX(xvhsubw_w_h, vvv) | ||
62 | +INSN_LASX(xvhsubw_d_w, vvv) | ||
63 | +INSN_LASX(xvhsubw_q_d, vvv) | ||
64 | +INSN_LASX(xvhsubw_hu_bu, vvv) | ||
65 | +INSN_LASX(xvhsubw_wu_hu, vvv) | ||
66 | +INSN_LASX(xvhsubw_du_wu, vvv) | ||
67 | +INSN_LASX(xvhsubw_qu_du, vvv) | ||
68 | + | ||
69 | INSN_LASX(xvreplgr2vr_b, vr) | ||
70 | INSN_LASX(xvreplgr2vr_h, vr) | ||
71 | INSN_LASX(xvreplgr2vr_w, vr) | ||
72 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/loongarch/vec_helper.c | ||
75 | +++ b/target/loongarch/vec_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #include "internals.h" | ||
78 | #include "tcg/tcg.h" | ||
79 | #include "vec.h" | ||
80 | +#include "tcg/tcg-gvec-desc.h" | ||
81 | |||
82 | #define DO_ADD(a, b) (a + b) | ||
83 | #define DO_SUB(a, b) (a - b) | ||
84 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
85 | VReg *Vj = (VReg *)vj; \ | ||
86 | VReg *Vk = (VReg *)vk; \ | ||
87 | typedef __typeof(Vd->E1(0)) TD; \ | ||
88 | + int oprsz = simd_oprsz(desc); \ | ||
89 | \ | ||
90 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
91 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
92 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ | ||
93 | } \ | ||
94 | } | ||
95 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) | ||
96 | |||
97 | void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
98 | { | ||
99 | + int i; | ||
100 | VReg *Vd = (VReg *)vd; | ||
101 | VReg *Vj = (VReg *)vj; | ||
102 | VReg *Vk = (VReg *)vk; | ||
103 | + int oprsz = simd_oprsz(desc); | ||
104 | |||
105 | - Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
106 | + for (i = 0; i < oprsz / 16 ; i++) { | ||
107 | + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), | ||
108 | + int128_makes64(Vk->D(2 * i))); | ||
109 | + } | ||
110 | } | ||
111 | |||
112 | DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) | ||
113 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) | ||
114 | |||
115 | void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
116 | { | ||
117 | + int i; | ||
118 | VReg *Vd = (VReg *)vd; | ||
119 | VReg *Vj = (VReg *)vj; | ||
120 | VReg *Vk = (VReg *)vk; | ||
121 | + int oprsz = simd_oprsz(desc); | ||
122 | |||
123 | - Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
124 | + for (i = 0; i < oprsz / 16; i++) { | ||
125 | + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), | ||
126 | + int128_makes64(Vk->D(2 * i))); | ||
127 | + } | ||
128 | } | ||
129 | |||
130 | DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) | ||
131 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) | ||
132 | |||
133 | void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
134 | { | ||
135 | + int i; | ||
136 | VReg *Vd = (VReg *)vd; | ||
137 | VReg *Vj = (VReg *)vj; | ||
138 | VReg *Vk = (VReg *)vk; | ||
139 | + int oprsz = simd_oprsz(desc); | ||
140 | |||
141 | - Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
142 | - int128_make64((uint64_t)Vk->D(0))); | ||
143 | + for (i = 0; i < oprsz / 16; i ++) { | ||
144 | + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | ||
145 | + int128_make64(Vk->UD(2 * i))); | ||
146 | + } | ||
147 | } | ||
148 | |||
149 | DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) | ||
150 | @@ -XXX,XX +XXX,XX @@ DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) | ||
151 | |||
152 | void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
153 | { | ||
154 | + int i; | ||
155 | VReg *Vd = (VReg *)vd; | ||
156 | VReg *Vj = (VReg *)vj; | ||
157 | VReg *Vk = (VReg *)vk; | ||
158 | + int oprsz = simd_oprsz(desc); | ||
159 | |||
160 | - Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
161 | - int128_make64((uint64_t)Vk->D(0))); | ||
162 | + for (i = 0; i < oprsz / 16; i++) { | ||
163 | + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), | ||
164 | + int128_make64(Vk->UD(2 * i))); | ||
165 | + } | ||
166 | } | ||
167 | |||
168 | #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
169 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
170 | index XXXXXXX..XXXXXXX 100644 | ||
171 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
172 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
173 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, | ||
174 | static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
175 | gen_helper_gvec_3 *fn) | ||
176 | { | ||
177 | + if (!check_vec(ctx, oprsz)) { | ||
178 | + return true; | ||
179 | + } | ||
180 | + | ||
181 | tcg_gen_gvec_3_ool(vec_full_offset(a->vd), | ||
182 | vec_full_offset(a->vj), | ||
183 | vec_full_offset(a->vk), | ||
184 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
185 | |||
186 | static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
187 | { | ||
188 | - if (!check_vec(ctx, 16)) { | ||
189 | - return true; | ||
190 | - } | ||
191 | - | ||
192 | return gen_vvv_vl(ctx, a, 16, fn); | ||
193 | } | ||
194 | |||
195 | +static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
196 | +{ | ||
197 | + return gen_vvv_vl(ctx, a, 32, fn); | ||
198 | +} | ||
199 | + | ||
200 | static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
201 | gen_helper_gvec_2_ptr *fn) | ||
202 | { | ||
203 | @@ -XXX,XX +XXX,XX @@ TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu) | ||
204 | TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu) | ||
205 | TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du) | ||
206 | |||
207 | +TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b) | ||
208 | +TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h) | ||
209 | +TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w) | ||
210 | +TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d) | ||
211 | +TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu) | ||
212 | +TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu) | ||
213 | +TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu) | ||
214 | +TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du) | ||
215 | +TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b) | ||
216 | +TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h) | ||
217 | +TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w) | ||
218 | +TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d) | ||
219 | +TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu) | ||
220 | +TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu) | ||
221 | +TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu) | ||
222 | +TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du) | ||
223 | + | ||
224 | static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
225 | { | ||
226 | TCGv_vec t1, t2; | ||
227 | -- | ||
228 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
3 | - XVSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
4 | - XVADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-21-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 45 ++++++++ | ||
11 | target/loongarch/disas.c | 43 +++++++ | ||
12 | target/loongarch/vec_helper.c | 120 ++++++++++++++------ | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 41 +++++++ | ||
14 | 4 files changed, 215 insertions(+), 34 deletions(-) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv | ||
21 | xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv | ||
22 | xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv | ||
23 | |||
24 | +xvaddwev_h_b 0111 01000001 11100 ..... ..... ..... @vvv | ||
25 | +xvaddwev_w_h 0111 01000001 11101 ..... ..... ..... @vvv | ||
26 | +xvaddwev_d_w 0111 01000001 11110 ..... ..... ..... @vvv | ||
27 | +xvaddwev_q_d 0111 01000001 11111 ..... ..... ..... @vvv | ||
28 | +xvaddwod_h_b 0111 01000010 00100 ..... ..... ..... @vvv | ||
29 | +xvaddwod_w_h 0111 01000010 00101 ..... ..... ..... @vvv | ||
30 | +xvaddwod_d_w 0111 01000010 00110 ..... ..... ..... @vvv | ||
31 | +xvaddwod_q_d 0111 01000010 00111 ..... ..... ..... @vvv | ||
32 | + | ||
33 | +xvsubwev_h_b 0111 01000010 00000 ..... ..... ..... @vvv | ||
34 | +xvsubwev_w_h 0111 01000010 00001 ..... ..... ..... @vvv | ||
35 | +xvsubwev_d_w 0111 01000010 00010 ..... ..... ..... @vvv | ||
36 | +xvsubwev_q_d 0111 01000010 00011 ..... ..... ..... @vvv | ||
37 | +xvsubwod_h_b 0111 01000010 01000 ..... ..... ..... @vvv | ||
38 | +xvsubwod_w_h 0111 01000010 01001 ..... ..... ..... @vvv | ||
39 | +xvsubwod_d_w 0111 01000010 01010 ..... ..... ..... @vvv | ||
40 | +xvsubwod_q_d 0111 01000010 01011 ..... ..... ..... @vvv | ||
41 | + | ||
42 | +xvaddwev_h_bu 0111 01000010 11100 ..... ..... ..... @vvv | ||
43 | +xvaddwev_w_hu 0111 01000010 11101 ..... ..... ..... @vvv | ||
44 | +xvaddwev_d_wu 0111 01000010 11110 ..... ..... ..... @vvv | ||
45 | +xvaddwev_q_du 0111 01000010 11111 ..... ..... ..... @vvv | ||
46 | +xvaddwod_h_bu 0111 01000011 00100 ..... ..... ..... @vvv | ||
47 | +xvaddwod_w_hu 0111 01000011 00101 ..... ..... ..... @vvv | ||
48 | +xvaddwod_d_wu 0111 01000011 00110 ..... ..... ..... @vvv | ||
49 | +xvaddwod_q_du 0111 01000011 00111 ..... ..... ..... @vvv | ||
50 | + | ||
51 | +xvsubwev_h_bu 0111 01000011 00000 ..... ..... ..... @vvv | ||
52 | +xvsubwev_w_hu 0111 01000011 00001 ..... ..... ..... @vvv | ||
53 | +xvsubwev_d_wu 0111 01000011 00010 ..... ..... ..... @vvv | ||
54 | +xvsubwev_q_du 0111 01000011 00011 ..... ..... ..... @vvv | ||
55 | +xvsubwod_h_bu 0111 01000011 01000 ..... ..... ..... @vvv | ||
56 | +xvsubwod_w_hu 0111 01000011 01001 ..... ..... ..... @vvv | ||
57 | +xvsubwod_d_wu 0111 01000011 01010 ..... ..... ..... @vvv | ||
58 | +xvsubwod_q_du 0111 01000011 01011 ..... ..... ..... @vvv | ||
59 | + | ||
60 | +xvaddwev_h_bu_b 0111 01000011 11100 ..... ..... ..... @vvv | ||
61 | +xvaddwev_w_hu_h 0111 01000011 11101 ..... ..... ..... @vvv | ||
62 | +xvaddwev_d_wu_w 0111 01000011 11110 ..... ..... ..... @vvv | ||
63 | +xvaddwev_q_du_d 0111 01000011 11111 ..... ..... ..... @vvv | ||
64 | +xvaddwod_h_bu_b 0111 01000100 00000 ..... ..... ..... @vvv | ||
65 | +xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv | ||
66 | +xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv | ||
67 | +xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv | ||
68 | + | ||
69 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
70 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
71 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
72 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/loongarch/disas.c | ||
75 | +++ b/target/loongarch/disas.c | ||
76 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvhsubw_wu_hu, vvv) | ||
77 | INSN_LASX(xvhsubw_du_wu, vvv) | ||
78 | INSN_LASX(xvhsubw_qu_du, vvv) | ||
79 | |||
80 | +INSN_LASX(xvaddwev_h_b, vvv) | ||
81 | +INSN_LASX(xvaddwev_w_h, vvv) | ||
82 | +INSN_LASX(xvaddwev_d_w, vvv) | ||
83 | +INSN_LASX(xvaddwev_q_d, vvv) | ||
84 | +INSN_LASX(xvaddwod_h_b, vvv) | ||
85 | +INSN_LASX(xvaddwod_w_h, vvv) | ||
86 | +INSN_LASX(xvaddwod_d_w, vvv) | ||
87 | +INSN_LASX(xvaddwod_q_d, vvv) | ||
88 | +INSN_LASX(xvsubwev_h_b, vvv) | ||
89 | +INSN_LASX(xvsubwev_w_h, vvv) | ||
90 | +INSN_LASX(xvsubwev_d_w, vvv) | ||
91 | +INSN_LASX(xvsubwev_q_d, vvv) | ||
92 | +INSN_LASX(xvsubwod_h_b, vvv) | ||
93 | +INSN_LASX(xvsubwod_w_h, vvv) | ||
94 | +INSN_LASX(xvsubwod_d_w, vvv) | ||
95 | +INSN_LASX(xvsubwod_q_d, vvv) | ||
96 | + | ||
97 | +INSN_LASX(xvaddwev_h_bu, vvv) | ||
98 | +INSN_LASX(xvaddwev_w_hu, vvv) | ||
99 | +INSN_LASX(xvaddwev_d_wu, vvv) | ||
100 | +INSN_LASX(xvaddwev_q_du, vvv) | ||
101 | +INSN_LASX(xvaddwod_h_bu, vvv) | ||
102 | +INSN_LASX(xvaddwod_w_hu, vvv) | ||
103 | +INSN_LASX(xvaddwod_d_wu, vvv) | ||
104 | +INSN_LASX(xvaddwod_q_du, vvv) | ||
105 | +INSN_LASX(xvsubwev_h_bu, vvv) | ||
106 | +INSN_LASX(xvsubwev_w_hu, vvv) | ||
107 | +INSN_LASX(xvsubwev_d_wu, vvv) | ||
108 | +INSN_LASX(xvsubwev_q_du, vvv) | ||
109 | +INSN_LASX(xvsubwod_h_bu, vvv) | ||
110 | +INSN_LASX(xvsubwod_w_hu, vvv) | ||
111 | +INSN_LASX(xvsubwod_d_wu, vvv) | ||
112 | +INSN_LASX(xvsubwod_q_du, vvv) | ||
113 | + | ||
114 | +INSN_LASX(xvaddwev_h_bu_b, vvv) | ||
115 | +INSN_LASX(xvaddwev_w_hu_h, vvv) | ||
116 | +INSN_LASX(xvaddwev_d_wu_w, vvv) | ||
117 | +INSN_LASX(xvaddwev_q_du_d, vvv) | ||
118 | +INSN_LASX(xvaddwod_h_bu_b, vvv) | ||
119 | +INSN_LASX(xvaddwod_w_hu_h, vvv) | ||
120 | +INSN_LASX(xvaddwod_d_wu_w, vvv) | ||
121 | +INSN_LASX(xvaddwod_q_du_d, vvv) | ||
122 | + | ||
123 | INSN_LASX(xvreplgr2vr_b, vr) | ||
124 | INSN_LASX(xvreplgr2vr_h, vr) | ||
125 | INSN_LASX(xvreplgr2vr_w, vr) | ||
126 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
127 | index XXXXXXX..XXXXXXX 100644 | ||
128 | --- a/target/loongarch/vec_helper.c | ||
129 | +++ b/target/loongarch/vec_helper.c | ||
130 | @@ -XXX,XX +XXX,XX @@ void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
131 | } | ||
132 | |||
133 | #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
134 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
135 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
136 | { \ | ||
137 | int i; \ | ||
138 | VReg *Vd = (VReg *)vd; \ | ||
139 | VReg *Vj = (VReg *)vj; \ | ||
140 | VReg *Vk = (VReg *)vk; \ | ||
141 | typedef __typeof(Vd->E1(0)) TD; \ | ||
142 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
143 | + int oprsz = simd_oprsz(desc); \ | ||
144 | + \ | ||
145 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
146 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ | ||
147 | } \ | ||
148 | } | ||
149 | |||
150 | #define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ | ||
151 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
152 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
153 | { \ | ||
154 | int i; \ | ||
155 | VReg *Vd = (VReg *)vd; \ | ||
156 | VReg *Vj = (VReg *)vj; \ | ||
157 | VReg *Vk = (VReg *)vk; \ | ||
158 | typedef __typeof(Vd->E1(0)) TD; \ | ||
159 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
160 | + int oprsz = simd_oprsz(desc); \ | ||
161 | + \ | ||
162 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
163 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ | ||
164 | } \ | ||
165 | } | ||
166 | |||
167 | -void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
168 | +void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
169 | { | ||
170 | + int i; | ||
171 | VReg *Vd = (VReg *)vd; | ||
172 | VReg *Vj = (VReg *)vj; | ||
173 | VReg *Vk = (VReg *)vk; | ||
174 | + int oprsz = simd_oprsz(desc); | ||
175 | |||
176 | - Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); | ||
177 | + for (i = 0; i < oprsz / 16; i++) { | ||
178 | + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), | ||
179 | + int128_makes64(Vk->D(2 * i))); | ||
180 | + } | ||
181 | } | ||
182 | |||
183 | DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) | ||
184 | DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) | ||
185 | DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) | ||
186 | |||
187 | -void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
188 | +void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
189 | { | ||
190 | + int i; | ||
191 | VReg *Vd = (VReg *)vd; | ||
192 | VReg *Vj = (VReg *)vj; | ||
193 | VReg *Vk = (VReg *)vk; | ||
194 | + int oprsz = simd_oprsz(desc); | ||
195 | |||
196 | - Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); | ||
197 | + for (i = 0; i < oprsz / 16; i++) { | ||
198 | + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), | ||
199 | + int128_makes64(Vk->D(2 * i +1))); | ||
200 | + } | ||
201 | } | ||
202 | |||
203 | DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) | ||
204 | DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) | ||
205 | DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) | ||
206 | |||
207 | -void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
208 | +void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
209 | { | ||
210 | + int i; | ||
211 | VReg *Vd = (VReg *)vd; | ||
212 | VReg *Vj = (VReg *)vj; | ||
213 | VReg *Vk = (VReg *)vk; | ||
214 | + int oprsz = simd_oprsz(desc); | ||
215 | |||
216 | - Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); | ||
217 | + for (i = 0; i < oprsz / 16; i++) { | ||
218 | + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), | ||
219 | + int128_makes64(Vk->D(2 * i))); | ||
220 | + } | ||
221 | } | ||
222 | |||
223 | DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) | ||
224 | DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) | ||
225 | DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) | ||
226 | |||
227 | -void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
228 | +void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
229 | { | ||
230 | + int i; | ||
231 | VReg *Vd = (VReg *)vd; | ||
232 | VReg *Vj = (VReg *)vj; | ||
233 | VReg *Vk = (VReg *)vk; | ||
234 | + int oprsz = simd_oprsz(desc); | ||
235 | |||
236 | - Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); | ||
237 | + for (i = 0; i < oprsz / 16; i++) { | ||
238 | + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), | ||
239 | + int128_makes64(Vk->D(2 * i + 1))); | ||
240 | + } | ||
241 | } | ||
242 | |||
243 | DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) | ||
244 | DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) | ||
245 | DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) | ||
246 | |||
247 | -void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
248 | +void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
249 | { | ||
250 | + int i; | ||
251 | VReg *Vd = (VReg *)vd; | ||
252 | VReg *Vj = (VReg *)vj; | ||
253 | VReg *Vk = (VReg *)vk; | ||
254 | + int oprsz = simd_oprsz(desc); | ||
255 | |||
256 | - Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), | ||
257 | - int128_make64((uint64_t)Vk->D(0))); | ||
258 | + for (i = 0; i < oprsz / 16; i++) { | ||
259 | + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), | ||
260 | + int128_make64(Vk->UD(2 * i))); | ||
261 | + } | ||
262 | } | ||
263 | |||
264 | DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) | ||
265 | DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) | ||
266 | DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) | ||
267 | |||
268 | -void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
269 | +void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
270 | { | ||
271 | + int i; | ||
272 | VReg *Vd = (VReg *)vd; | ||
273 | VReg *Vj = (VReg *)vj; | ||
274 | VReg *Vk = (VReg *)vk; | ||
275 | + int oprsz = simd_oprsz(desc); | ||
276 | |||
277 | - Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
278 | - int128_make64((uint64_t)Vk->D(1))); | ||
279 | + for (i = 0; i < oprsz / 16; i++) { | ||
280 | + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | ||
281 | + int128_make64(Vk->UD(2 * i + 1))); | ||
282 | + } | ||
283 | } | ||
284 | |||
285 | DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) | ||
286 | DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) | ||
287 | DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) | ||
288 | |||
289 | -void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
290 | +void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
291 | { | ||
292 | + int i; | ||
293 | VReg *Vd = (VReg *)vd; | ||
294 | VReg *Vj = (VReg *)vj; | ||
295 | VReg *Vk = (VReg *)vk; | ||
296 | + int oprsz = simd_oprsz(desc); | ||
297 | |||
298 | - Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)), | ||
299 | - int128_make64((uint64_t)Vk->D(0))); | ||
300 | + for (i = 0; i < oprsz / 16; i++) { | ||
301 | + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), | ||
302 | + int128_make64(Vk->UD(2 * i))); | ||
303 | + } | ||
304 | } | ||
305 | |||
306 | DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) | ||
307 | DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) | ||
308 | DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) | ||
309 | |||
310 | -void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
311 | +void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
312 | { | ||
313 | + int i; | ||
314 | VReg *Vd = (VReg *)vd; | ||
315 | VReg *Vj = (VReg *)vj; | ||
316 | VReg *Vk = (VReg *)vk; | ||
317 | + int oprsz = simd_oprsz(desc); | ||
318 | |||
319 | - Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
320 | - int128_make64((uint64_t)Vk->D(1))); | ||
321 | + for (i = 0; i < oprsz / 16; i++) { | ||
322 | + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), | ||
323 | + int128_make64(Vk->UD(2 * i + 1))); | ||
324 | + } | ||
325 | } | ||
326 | |||
327 | DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) | ||
328 | @@ -XXX,XX +XXX,XX @@ DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) | ||
329 | DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) | ||
330 | |||
331 | #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
332 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
333 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
334 | { \ | ||
335 | int i; \ | ||
336 | VReg *Vd = (VReg *)vd; \ | ||
337 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
338 | VReg *Vk = (VReg *)vk; \ | ||
339 | typedef __typeof(Vd->ES1(0)) TDS; \ | ||
340 | typedef __typeof(Vd->EU1(0)) TDU; \ | ||
341 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
342 | + int oprsz = simd_oprsz(desc); \ | ||
343 | + \ | ||
344 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
345 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ | ||
346 | } \ | ||
347 | } | ||
348 | |||
349 | #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
350 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
351 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
352 | { \ | ||
353 | int i; \ | ||
354 | VReg *Vd = (VReg *)vd; \ | ||
355 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
356 | VReg *Vk = (VReg *)vk; \ | ||
357 | typedef __typeof(Vd->ES1(0)) TDS; \ | ||
358 | typedef __typeof(Vd->EU1(0)) TDU; \ | ||
359 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
360 | + int oprsz = simd_oprsz(desc); \ | ||
361 | + \ | ||
362 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
363 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ | ||
364 | } \ | ||
365 | } | ||
366 | |||
367 | -void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
368 | +void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
369 | { | ||
370 | + int i; | ||
371 | VReg *Vd = (VReg *)vd; | ||
372 | VReg *Vj = (VReg *)vj; | ||
373 | VReg *Vk = (VReg *)vk; | ||
374 | + int oprsz = simd_oprsz(desc); | ||
375 | |||
376 | - Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), | ||
377 | - int128_makes64(Vk->D(0))); | ||
378 | + for (i = 0; i < oprsz / 16; i++) { | ||
379 | + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), | ||
380 | + int128_makes64(Vk->D(2 * i))); | ||
381 | + } | ||
382 | } | ||
383 | |||
384 | DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
385 | DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) | ||
386 | DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
387 | |||
388 | -void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
389 | +void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
390 | { | ||
391 | + int i; | ||
392 | VReg *Vd = (VReg *)vd; | ||
393 | VReg *Vj = (VReg *)vj; | ||
394 | VReg *Vk = (VReg *)vk; | ||
395 | + int oprsz = simd_oprsz(desc); | ||
396 | |||
397 | - Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
398 | - int128_makes64(Vk->D(1))); | ||
399 | + for (i = 0; i < oprsz / 16; i++) { | ||
400 | + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), | ||
401 | + int128_makes64(Vk->D(2 * i + 1))); | ||
402 | + } | ||
403 | } | ||
404 | |||
405 | DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
406 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
407 | index XXXXXXX..XXXXXXX 100644 | ||
408 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
409 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
410 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s) | ||
411 | TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s) | ||
412 | TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s) | ||
413 | TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s) | ||
414 | +TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s) | ||
415 | +TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s) | ||
416 | +TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s) | ||
417 | +TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s) | ||
418 | |||
419 | static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
420 | { | ||
421 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s) | ||
422 | TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s) | ||
423 | TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s) | ||
424 | TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s) | ||
425 | +TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s) | ||
426 | +TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s) | ||
427 | +TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s) | ||
428 | +TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s) | ||
429 | + | ||
430 | |||
431 | static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
432 | { | ||
433 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s) | ||
434 | TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s) | ||
435 | TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s) | ||
436 | TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s) | ||
437 | +TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s) | ||
438 | +TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s) | ||
439 | +TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s) | ||
440 | +TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s) | ||
441 | |||
442 | static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
443 | { | ||
444 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s) | ||
445 | TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s) | ||
446 | TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s) | ||
447 | TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s) | ||
448 | +TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s) | ||
449 | +TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s) | ||
450 | +TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s) | ||
451 | +TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s) | ||
452 | |||
453 | static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
454 | { | ||
455 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u) | ||
456 | TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u) | ||
457 | TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u) | ||
458 | TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u) | ||
459 | +TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u) | ||
460 | +TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u) | ||
461 | +TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u) | ||
462 | +TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u) | ||
463 | |||
464 | static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
465 | { | ||
466 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u) | ||
467 | TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u) | ||
468 | TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u) | ||
469 | TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u) | ||
470 | +TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u) | ||
471 | +TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u) | ||
472 | +TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u) | ||
473 | +TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u) | ||
474 | |||
475 | static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
476 | { | ||
477 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u) | ||
478 | TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u) | ||
479 | TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u) | ||
480 | TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u) | ||
481 | +TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u) | ||
482 | +TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u) | ||
483 | +TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u) | ||
484 | +TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u) | ||
485 | |||
486 | static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
487 | { | ||
488 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u) | ||
489 | TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u) | ||
490 | TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u) | ||
491 | TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u) | ||
492 | +TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u) | ||
493 | +TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u) | ||
494 | +TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u) | ||
495 | +TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u) | ||
496 | |||
497 | static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
498 | { | ||
499 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s) | ||
500 | TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s) | ||
501 | TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s) | ||
502 | TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s) | ||
503 | +TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s) | ||
504 | +TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s) | ||
505 | +TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s) | ||
506 | +TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s) | ||
507 | |||
508 | static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
509 | { | ||
510 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s) | ||
511 | TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s) | ||
512 | TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s) | ||
513 | TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s) | ||
514 | +TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s) | ||
515 | +TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s) | ||
516 | +TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s) | ||
517 | +TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s) | ||
518 | |||
519 | static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, | ||
520 | void (*gen_shr_vec)(unsigned, TCGv_vec, | ||
521 | -- | ||
522 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVAVG.{B/H/W/D/}[U]; | ||
3 | - XVAVGR.{B/H/W/D}[U]. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-22-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 17 ++++++++++++++++ | ||
10 | target/loongarch/disas.c | 17 ++++++++++++++++ | ||
11 | target/loongarch/vec_helper.c | 22 +++++++++++---------- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 16 +++++++++++++++ | ||
13 | 4 files changed, 62 insertions(+), 10 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv | ||
20 | xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv | ||
21 | xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv | ||
22 | |||
23 | +xvavg_b 0111 01000110 01000 ..... ..... ..... @vvv | ||
24 | +xvavg_h 0111 01000110 01001 ..... ..... ..... @vvv | ||
25 | +xvavg_w 0111 01000110 01010 ..... ..... ..... @vvv | ||
26 | +xvavg_d 0111 01000110 01011 ..... ..... ..... @vvv | ||
27 | +xvavg_bu 0111 01000110 01100 ..... ..... ..... @vvv | ||
28 | +xvavg_hu 0111 01000110 01101 ..... ..... ..... @vvv | ||
29 | +xvavg_wu 0111 01000110 01110 ..... ..... ..... @vvv | ||
30 | +xvavg_du 0111 01000110 01111 ..... ..... ..... @vvv | ||
31 | +xvavgr_b 0111 01000110 10000 ..... ..... ..... @vvv | ||
32 | +xvavgr_h 0111 01000110 10001 ..... ..... ..... @vvv | ||
33 | +xvavgr_w 0111 01000110 10010 ..... ..... ..... @vvv | ||
34 | +xvavgr_d 0111 01000110 10011 ..... ..... ..... @vvv | ||
35 | +xvavgr_bu 0111 01000110 10100 ..... ..... ..... @vvv | ||
36 | +xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv | ||
37 | +xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv | ||
38 | +xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv | ||
39 | + | ||
40 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
41 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
42 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
43 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/loongarch/disas.c | ||
46 | +++ b/target/loongarch/disas.c | ||
47 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvaddwod_w_hu_h, vvv) | ||
48 | INSN_LASX(xvaddwod_d_wu_w, vvv) | ||
49 | INSN_LASX(xvaddwod_q_du_d, vvv) | ||
50 | |||
51 | +INSN_LASX(xvavg_b, vvv) | ||
52 | +INSN_LASX(xvavg_h, vvv) | ||
53 | +INSN_LASX(xvavg_w, vvv) | ||
54 | +INSN_LASX(xvavg_d, vvv) | ||
55 | +INSN_LASX(xvavg_bu, vvv) | ||
56 | +INSN_LASX(xvavg_hu, vvv) | ||
57 | +INSN_LASX(xvavg_wu, vvv) | ||
58 | +INSN_LASX(xvavg_du, vvv) | ||
59 | +INSN_LASX(xvavgr_b, vvv) | ||
60 | +INSN_LASX(xvavgr_h, vvv) | ||
61 | +INSN_LASX(xvavgr_w, vvv) | ||
62 | +INSN_LASX(xvavgr_d, vvv) | ||
63 | +INSN_LASX(xvavgr_bu, vvv) | ||
64 | +INSN_LASX(xvavgr_hu, vvv) | ||
65 | +INSN_LASX(xvavgr_wu, vvv) | ||
66 | +INSN_LASX(xvavgr_du, vvv) | ||
67 | + | ||
68 | INSN_LASX(xvreplgr2vr_b, vr) | ||
69 | INSN_LASX(xvreplgr2vr_h, vr) | ||
70 | INSN_LASX(xvreplgr2vr_w, vr) | ||
71 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/loongarch/vec_helper.c | ||
74 | +++ b/target/loongarch/vec_helper.c | ||
75 | @@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
76 | #define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) | ||
77 | #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) | ||
78 | |||
79 | -#define DO_3OP(NAME, BIT, E, DO_OP) \ | ||
80 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
81 | -{ \ | ||
82 | - int i; \ | ||
83 | - VReg *Vd = (VReg *)vd; \ | ||
84 | - VReg *Vj = (VReg *)vj; \ | ||
85 | - VReg *Vk = (VReg *)vk; \ | ||
86 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
87 | - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
88 | - } \ | ||
89 | +#define DO_3OP(NAME, BIT, E, DO_OP) \ | ||
90 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
91 | +{ \ | ||
92 | + int i; \ | ||
93 | + VReg *Vd = (VReg *)vd; \ | ||
94 | + VReg *Vj = (VReg *)vj; \ | ||
95 | + VReg *Vk = (VReg *)vk; \ | ||
96 | + int oprsz = simd_oprsz(desc); \ | ||
97 | + \ | ||
98 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
99 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
100 | + } \ | ||
101 | } | ||
102 | |||
103 | DO_3OP(vavg_b, 8, B, DO_VAVG) | ||
104 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
107 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
108 | @@ -XXX,XX +XXX,XX @@ TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u) | ||
109 | TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u) | ||
110 | TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u) | ||
111 | TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u) | ||
112 | +TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s) | ||
113 | +TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s) | ||
114 | +TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s) | ||
115 | +TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s) | ||
116 | +TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u) | ||
117 | +TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u) | ||
118 | +TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u) | ||
119 | +TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u) | ||
120 | |||
121 | static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
122 | uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
123 | @@ -XXX,XX +XXX,XX @@ TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u) | ||
124 | TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u) | ||
125 | TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u) | ||
126 | TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u) | ||
127 | +TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s) | ||
128 | +TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s) | ||
129 | +TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s) | ||
130 | +TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s) | ||
131 | +TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u) | ||
132 | +TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u) | ||
133 | +TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u) | ||
134 | +TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u) | ||
135 | |||
136 | static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
137 | { | ||
138 | -- | ||
139 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVABSD.{B/H/W/D}[U]. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-23-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 9 +++++++++ | ||
9 | target/loongarch/disas.c | 9 +++++++++ | ||
10 | target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++ | ||
11 | 3 files changed, 26 insertions(+) | ||
12 | |||
13 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/insns.decode | ||
16 | +++ b/target/loongarch/insns.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv | ||
18 | xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv | ||
19 | xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv | ||
20 | |||
21 | +xvabsd_b 0111 01000110 00000 ..... ..... ..... @vvv | ||
22 | +xvabsd_h 0111 01000110 00001 ..... ..... ..... @vvv | ||
23 | +xvabsd_w 0111 01000110 00010 ..... ..... ..... @vvv | ||
24 | +xvabsd_d 0111 01000110 00011 ..... ..... ..... @vvv | ||
25 | +xvabsd_bu 0111 01000110 00100 ..... ..... ..... @vvv | ||
26 | +xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv | ||
27 | +xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv | ||
28 | +xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv | ||
29 | + | ||
30 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
31 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
32 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
33 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/loongarch/disas.c | ||
36 | +++ b/target/loongarch/disas.c | ||
37 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvavgr_hu, vvv) | ||
38 | INSN_LASX(xvavgr_wu, vvv) | ||
39 | INSN_LASX(xvavgr_du, vvv) | ||
40 | |||
41 | +INSN_LASX(xvabsd_b, vvv) | ||
42 | +INSN_LASX(xvabsd_h, vvv) | ||
43 | +INSN_LASX(xvabsd_w, vvv) | ||
44 | +INSN_LASX(xvabsd_d, vvv) | ||
45 | +INSN_LASX(xvabsd_bu, vvv) | ||
46 | +INSN_LASX(xvabsd_hu, vvv) | ||
47 | +INSN_LASX(xvabsd_wu, vvv) | ||
48 | +INSN_LASX(xvabsd_du, vvv) | ||
49 | + | ||
50 | INSN_LASX(xvreplgr2vr_b, vr) | ||
51 | INSN_LASX(xvreplgr2vr_h, vr) | ||
52 | INSN_LASX(xvreplgr2vr_w, vr) | ||
53 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
56 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
57 | @@ -XXX,XX +XXX,XX @@ TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u) | ||
58 | TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u) | ||
59 | TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u) | ||
60 | TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u) | ||
61 | +TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s) | ||
62 | +TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s) | ||
63 | +TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s) | ||
64 | +TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s) | ||
65 | +TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u) | ||
66 | +TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u) | ||
67 | +TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u) | ||
68 | +TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u) | ||
69 | |||
70 | static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
71 | { | ||
72 | -- | ||
73 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVADDA.{B/H/W/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-24-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 ++++ | ||
9 | target/loongarch/disas.c | 5 ++++ | ||
10 | target/loongarch/vec_helper.c | 30 +++++++++++---------- | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 4 +++ | ||
12 | 4 files changed, 30 insertions(+), 14 deletions(-) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv | ||
19 | xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv | ||
20 | xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv | ||
21 | |||
22 | +xvadda_b 0111 01000101 11000 ..... ..... ..... @vvv | ||
23 | +xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv | ||
24 | +xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv | ||
25 | +xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv | ||
26 | + | ||
27 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
28 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
29 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
30 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/loongarch/disas.c | ||
33 | +++ b/target/loongarch/disas.c | ||
34 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvabsd_hu, vvv) | ||
35 | INSN_LASX(xvabsd_wu, vvv) | ||
36 | INSN_LASX(xvabsd_du, vvv) | ||
37 | |||
38 | +INSN_LASX(xvadda_b, vvv) | ||
39 | +INSN_LASX(xvadda_h, vvv) | ||
40 | +INSN_LASX(xvadda_w, vvv) | ||
41 | +INSN_LASX(xvadda_d, vvv) | ||
42 | + | ||
43 | INSN_LASX(xvreplgr2vr_b, vr) | ||
44 | INSN_LASX(xvreplgr2vr_h, vr) | ||
45 | INSN_LASX(xvreplgr2vr_w, vr) | ||
46 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/loongarch/vec_helper.c | ||
49 | +++ b/target/loongarch/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_du, 64, UD, DO_VABSD) | ||
51 | |||
52 | #define DO_VABS(a) ((a < 0) ? (-a) : (a)) | ||
53 | |||
54 | -#define DO_VADDA(NAME, BIT, E, DO_OP) \ | ||
55 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
56 | -{ \ | ||
57 | - int i; \ | ||
58 | - VReg *Vd = (VReg *)vd; \ | ||
59 | - VReg *Vj = (VReg *)vj; \ | ||
60 | - VReg *Vk = (VReg *)vk; \ | ||
61 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
62 | - Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \ | ||
63 | - } \ | ||
64 | +#define DO_VADDA(NAME, BIT, E) \ | ||
65 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
66 | +{ \ | ||
67 | + int i; \ | ||
68 | + VReg *Vd = (VReg *)vd; \ | ||
69 | + VReg *Vj = (VReg *)vj; \ | ||
70 | + VReg *Vk = (VReg *)vk; \ | ||
71 | + int oprsz = simd_oprsz(desc); \ | ||
72 | + \ | ||
73 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
74 | + Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ | ||
75 | + } \ | ||
76 | } | ||
77 | |||
78 | -DO_VADDA(vadda_b, 8, B, DO_VABS) | ||
79 | -DO_VADDA(vadda_h, 16, H, DO_VABS) | ||
80 | -DO_VADDA(vadda_w, 32, W, DO_VABS) | ||
81 | -DO_VADDA(vadda_d, 64, D, DO_VABS) | ||
82 | +DO_VADDA(vadda_b, 8, B) | ||
83 | +DO_VADDA(vadda_h, 16, H) | ||
84 | +DO_VADDA(vadda_w, 32, W) | ||
85 | +DO_VADDA(vadda_d, 64, D) | ||
86 | |||
87 | #define DO_MIN(a, b) (a < b ? a : b) | ||
88 | #define DO_MAX(a, b) (a > b ? a : b) | ||
89 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
92 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
93 | @@ -XXX,XX +XXX,XX @@ TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda) | ||
94 | TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda) | ||
95 | TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda) | ||
96 | TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda) | ||
97 | +TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda) | ||
98 | +TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda) | ||
99 | +TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda) | ||
100 | +TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda) | ||
101 | |||
102 | TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax) | ||
103 | TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax) | ||
104 | -- | ||
105 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVMAX[I].{B/H/W/D}[U]; | ||
3 | - XVMIN[I].{B/H/W/D}[U]. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-25-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 36 +++++++++++++++++++++ | ||
10 | target/loongarch/disas.c | 34 +++++++++++++++++++ | ||
11 | target/loongarch/vec_helper.c | 23 ++++++------- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++ | ||
13 | 4 files changed, 114 insertions(+), 11 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv | ||
20 | xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv | ||
21 | xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv | ||
22 | |||
23 | +xvmax_b 0111 01000111 00000 ..... ..... ..... @vvv | ||
24 | +xvmax_h 0111 01000111 00001 ..... ..... ..... @vvv | ||
25 | +xvmax_w 0111 01000111 00010 ..... ..... ..... @vvv | ||
26 | +xvmax_d 0111 01000111 00011 ..... ..... ..... @vvv | ||
27 | +xvmax_bu 0111 01000111 01000 ..... ..... ..... @vvv | ||
28 | +xvmax_hu 0111 01000111 01001 ..... ..... ..... @vvv | ||
29 | +xvmax_wu 0111 01000111 01010 ..... ..... ..... @vvv | ||
30 | +xvmax_du 0111 01000111 01011 ..... ..... ..... @vvv | ||
31 | + | ||
32 | +xvmaxi_b 0111 01101001 00000 ..... ..... ..... @vv_i5 | ||
33 | +xvmaxi_h 0111 01101001 00001 ..... ..... ..... @vv_i5 | ||
34 | +xvmaxi_w 0111 01101001 00010 ..... ..... ..... @vv_i5 | ||
35 | +xvmaxi_d 0111 01101001 00011 ..... ..... ..... @vv_i5 | ||
36 | +xvmaxi_bu 0111 01101001 01000 ..... ..... ..... @vv_ui5 | ||
37 | +xvmaxi_hu 0111 01101001 01001 ..... ..... ..... @vv_ui5 | ||
38 | +xvmaxi_wu 0111 01101001 01010 ..... ..... ..... @vv_ui5 | ||
39 | +xvmaxi_du 0111 01101001 01011 ..... ..... ..... @vv_ui5 | ||
40 | + | ||
41 | +xvmin_b 0111 01000111 00100 ..... ..... ..... @vvv | ||
42 | +xvmin_h 0111 01000111 00101 ..... ..... ..... @vvv | ||
43 | +xvmin_w 0111 01000111 00110 ..... ..... ..... @vvv | ||
44 | +xvmin_d 0111 01000111 00111 ..... ..... ..... @vvv | ||
45 | +xvmin_bu 0111 01000111 01100 ..... ..... ..... @vvv | ||
46 | +xvmin_hu 0111 01000111 01101 ..... ..... ..... @vvv | ||
47 | +xvmin_wu 0111 01000111 01110 ..... ..... ..... @vvv | ||
48 | +xvmin_du 0111 01000111 01111 ..... ..... ..... @vvv | ||
49 | + | ||
50 | +xvmini_b 0111 01101001 00100 ..... ..... ..... @vv_i5 | ||
51 | +xvmini_h 0111 01101001 00101 ..... ..... ..... @vv_i5 | ||
52 | +xvmini_w 0111 01101001 00110 ..... ..... ..... @vv_i5 | ||
53 | +xvmini_d 0111 01101001 00111 ..... ..... ..... @vv_i5 | ||
54 | +xvmini_bu 0111 01101001 01100 ..... ..... ..... @vv_ui5 | ||
55 | +xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5 | ||
56 | +xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5 | ||
57 | +xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5 | ||
58 | + | ||
59 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
60 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
61 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
62 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/disas.c | ||
65 | +++ b/target/loongarch/disas.c | ||
66 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvadda_h, vvv) | ||
67 | INSN_LASX(xvadda_w, vvv) | ||
68 | INSN_LASX(xvadda_d, vvv) | ||
69 | |||
70 | +INSN_LASX(xvmax_b, vvv) | ||
71 | +INSN_LASX(xvmax_h, vvv) | ||
72 | +INSN_LASX(xvmax_w, vvv) | ||
73 | +INSN_LASX(xvmax_d, vvv) | ||
74 | +INSN_LASX(xvmin_b, vvv) | ||
75 | +INSN_LASX(xvmin_h, vvv) | ||
76 | +INSN_LASX(xvmin_w, vvv) | ||
77 | +INSN_LASX(xvmin_d, vvv) | ||
78 | +INSN_LASX(xvmax_bu, vvv) | ||
79 | +INSN_LASX(xvmax_hu, vvv) | ||
80 | +INSN_LASX(xvmax_wu, vvv) | ||
81 | +INSN_LASX(xvmax_du, vvv) | ||
82 | +INSN_LASX(xvmin_bu, vvv) | ||
83 | +INSN_LASX(xvmin_hu, vvv) | ||
84 | +INSN_LASX(xvmin_wu, vvv) | ||
85 | +INSN_LASX(xvmin_du, vvv) | ||
86 | + | ||
87 | +INSN_LASX(xvmaxi_b, vv_i) | ||
88 | +INSN_LASX(xvmaxi_h, vv_i) | ||
89 | +INSN_LASX(xvmaxi_w, vv_i) | ||
90 | +INSN_LASX(xvmaxi_d, vv_i) | ||
91 | +INSN_LASX(xvmini_b, vv_i) | ||
92 | +INSN_LASX(xvmini_h, vv_i) | ||
93 | +INSN_LASX(xvmini_w, vv_i) | ||
94 | +INSN_LASX(xvmini_d, vv_i) | ||
95 | +INSN_LASX(xvmaxi_bu, vv_i) | ||
96 | +INSN_LASX(xvmaxi_hu, vv_i) | ||
97 | +INSN_LASX(xvmaxi_wu, vv_i) | ||
98 | +INSN_LASX(xvmaxi_du, vv_i) | ||
99 | +INSN_LASX(xvmini_bu, vv_i) | ||
100 | +INSN_LASX(xvmini_hu, vv_i) | ||
101 | +INSN_LASX(xvmini_wu, vv_i) | ||
102 | +INSN_LASX(xvmini_du, vv_i) | ||
103 | + | ||
104 | INSN_LASX(xvreplgr2vr_b, vr) | ||
105 | INSN_LASX(xvreplgr2vr_h, vr) | ||
106 | INSN_LASX(xvreplgr2vr_w, vr) | ||
107 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/loongarch/vec_helper.c | ||
110 | +++ b/target/loongarch/vec_helper.c | ||
111 | @@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_d, 64, D) | ||
112 | #define DO_MIN(a, b) (a < b ? a : b) | ||
113 | #define DO_MAX(a, b) (a > b ? a : b) | ||
114 | |||
115 | -#define VMINMAXI(NAME, BIT, E, DO_OP) \ | ||
116 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
117 | -{ \ | ||
118 | - int i; \ | ||
119 | - VReg *Vd = (VReg *)vd; \ | ||
120 | - VReg *Vj = (VReg *)vj; \ | ||
121 | - typedef __typeof(Vd->E(0)) TD; \ | ||
122 | - \ | ||
123 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
124 | - Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
125 | - } \ | ||
126 | +#define VMINMAXI(NAME, BIT, E, DO_OP) \ | ||
127 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
128 | +{ \ | ||
129 | + int i; \ | ||
130 | + VReg *Vd = (VReg *)vd; \ | ||
131 | + VReg *Vj = (VReg *)vj; \ | ||
132 | + typedef __typeof(Vd->E(0)) TD; \ | ||
133 | + int oprsz = simd_oprsz(desc); \ | ||
134 | + \ | ||
135 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
136 | + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
137 | + } \ | ||
138 | } | ||
139 | |||
140 | VMINMAXI(vmini_b, 8, B, DO_MIN) | ||
141 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
144 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax) | ||
146 | TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax) | ||
147 | TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax) | ||
148 | TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax) | ||
149 | +TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax) | ||
150 | +TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax) | ||
151 | +TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax) | ||
152 | +TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax) | ||
153 | +TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax) | ||
154 | +TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax) | ||
155 | +TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax) | ||
156 | +TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax) | ||
157 | |||
158 | TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin) | ||
159 | TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin) | ||
160 | @@ -XXX,XX +XXX,XX @@ TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin) | ||
161 | TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin) | ||
162 | TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin) | ||
163 | TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin) | ||
164 | +TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin) | ||
165 | +TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin) | ||
166 | +TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin) | ||
167 | +TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin) | ||
168 | +TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin) | ||
169 | +TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin) | ||
170 | +TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin) | ||
171 | +TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin) | ||
172 | |||
173 | static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
174 | { | ||
175 | @@ -XXX,XX +XXX,XX @@ TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u) | ||
176 | TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u) | ||
177 | TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u) | ||
178 | TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u) | ||
179 | +TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s) | ||
180 | +TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s) | ||
181 | +TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s) | ||
182 | +TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s) | ||
183 | +TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u) | ||
184 | +TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u) | ||
185 | +TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u) | ||
186 | +TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u) | ||
187 | |||
188 | static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
189 | int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
190 | @@ -XXX,XX +XXX,XX @@ TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u) | ||
191 | TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u) | ||
192 | TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u) | ||
193 | TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u) | ||
194 | +TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s) | ||
195 | +TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s) | ||
196 | +TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s) | ||
197 | +TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s) | ||
198 | +TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u) | ||
199 | +TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u) | ||
200 | +TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u) | ||
201 | +TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u) | ||
202 | |||
203 | TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul) | ||
204 | TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul) | ||
205 | -- | ||
206 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVMUL.{B/H/W/D}; | ||
3 | - XVMUH.{B/H/W/D}[U]; | ||
4 | - XVMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
5 | - XVMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
1 | 6 | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230914022645.1151356-26-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/insns.decode | 38 +++++++ | ||
12 | target/loongarch/disas.c | 38 +++++++ | ||
13 | target/loongarch/vec_helper.c | 55 +++++----- | ||
14 | target/loongarch/insn_trans/trans_vec.c.inc | 113 +++++++++++++++----- | ||
15 | 4 files changed, 189 insertions(+), 55 deletions(-) | ||
16 | |||
17 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/insns.decode | ||
20 | +++ b/target/loongarch/insns.decode | ||
21 | @@ -XXX,XX +XXX,XX @@ xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5 | ||
22 | xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5 | ||
23 | xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5 | ||
24 | |||
25 | +xvmul_b 0111 01001000 01000 ..... ..... ..... @vvv | ||
26 | +xvmul_h 0111 01001000 01001 ..... ..... ..... @vvv | ||
27 | +xvmul_w 0111 01001000 01010 ..... ..... ..... @vvv | ||
28 | +xvmul_d 0111 01001000 01011 ..... ..... ..... @vvv | ||
29 | +xvmuh_b 0111 01001000 01100 ..... ..... ..... @vvv | ||
30 | +xvmuh_h 0111 01001000 01101 ..... ..... ..... @vvv | ||
31 | +xvmuh_w 0111 01001000 01110 ..... ..... ..... @vvv | ||
32 | +xvmuh_d 0111 01001000 01111 ..... ..... ..... @vvv | ||
33 | +xvmuh_bu 0111 01001000 10000 ..... ..... ..... @vvv | ||
34 | +xvmuh_hu 0111 01001000 10001 ..... ..... ..... @vvv | ||
35 | +xvmuh_wu 0111 01001000 10010 ..... ..... ..... @vvv | ||
36 | +xvmuh_du 0111 01001000 10011 ..... ..... ..... @vvv | ||
37 | + | ||
38 | +xvmulwev_h_b 0111 01001001 00000 ..... ..... ..... @vvv | ||
39 | +xvmulwev_w_h 0111 01001001 00001 ..... ..... ..... @vvv | ||
40 | +xvmulwev_d_w 0111 01001001 00010 ..... ..... ..... @vvv | ||
41 | +xvmulwev_q_d 0111 01001001 00011 ..... ..... ..... @vvv | ||
42 | +xvmulwod_h_b 0111 01001001 00100 ..... ..... ..... @vvv | ||
43 | +xvmulwod_w_h 0111 01001001 00101 ..... ..... ..... @vvv | ||
44 | +xvmulwod_d_w 0111 01001001 00110 ..... ..... ..... @vvv | ||
45 | +xvmulwod_q_d 0111 01001001 00111 ..... ..... ..... @vvv | ||
46 | +xvmulwev_h_bu 0111 01001001 10000 ..... ..... ..... @vvv | ||
47 | +xvmulwev_w_hu 0111 01001001 10001 ..... ..... ..... @vvv | ||
48 | +xvmulwev_d_wu 0111 01001001 10010 ..... ..... ..... @vvv | ||
49 | +xvmulwev_q_du 0111 01001001 10011 ..... ..... ..... @vvv | ||
50 | +xvmulwod_h_bu 0111 01001001 10100 ..... ..... ..... @vvv | ||
51 | +xvmulwod_w_hu 0111 01001001 10101 ..... ..... ..... @vvv | ||
52 | +xvmulwod_d_wu 0111 01001001 10110 ..... ..... ..... @vvv | ||
53 | +xvmulwod_q_du 0111 01001001 10111 ..... ..... ..... @vvv | ||
54 | +xvmulwev_h_bu_b 0111 01001010 00000 ..... ..... ..... @vvv | ||
55 | +xvmulwev_w_hu_h 0111 01001010 00001 ..... ..... ..... @vvv | ||
56 | +xvmulwev_d_wu_w 0111 01001010 00010 ..... ..... ..... @vvv | ||
57 | +xvmulwev_q_du_d 0111 01001010 00011 ..... ..... ..... @vvv | ||
58 | +xvmulwod_h_bu_b 0111 01001010 00100 ..... ..... ..... @vvv | ||
59 | +xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv | ||
60 | +xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv | ||
61 | +xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv | ||
62 | + | ||
63 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
64 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
65 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
66 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/loongarch/disas.c | ||
69 | +++ b/target/loongarch/disas.c | ||
70 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmini_hu, vv_i) | ||
71 | INSN_LASX(xvmini_wu, vv_i) | ||
72 | INSN_LASX(xvmini_du, vv_i) | ||
73 | |||
74 | +INSN_LASX(xvmul_b, vvv) | ||
75 | +INSN_LASX(xvmul_h, vvv) | ||
76 | +INSN_LASX(xvmul_w, vvv) | ||
77 | +INSN_LASX(xvmul_d, vvv) | ||
78 | +INSN_LASX(xvmuh_b, vvv) | ||
79 | +INSN_LASX(xvmuh_h, vvv) | ||
80 | +INSN_LASX(xvmuh_w, vvv) | ||
81 | +INSN_LASX(xvmuh_d, vvv) | ||
82 | +INSN_LASX(xvmuh_bu, vvv) | ||
83 | +INSN_LASX(xvmuh_hu, vvv) | ||
84 | +INSN_LASX(xvmuh_wu, vvv) | ||
85 | +INSN_LASX(xvmuh_du, vvv) | ||
86 | + | ||
87 | +INSN_LASX(xvmulwev_h_b, vvv) | ||
88 | +INSN_LASX(xvmulwev_w_h, vvv) | ||
89 | +INSN_LASX(xvmulwev_d_w, vvv) | ||
90 | +INSN_LASX(xvmulwev_q_d, vvv) | ||
91 | +INSN_LASX(xvmulwod_h_b, vvv) | ||
92 | +INSN_LASX(xvmulwod_w_h, vvv) | ||
93 | +INSN_LASX(xvmulwod_d_w, vvv) | ||
94 | +INSN_LASX(xvmulwod_q_d, vvv) | ||
95 | +INSN_LASX(xvmulwev_h_bu, vvv) | ||
96 | +INSN_LASX(xvmulwev_w_hu, vvv) | ||
97 | +INSN_LASX(xvmulwev_d_wu, vvv) | ||
98 | +INSN_LASX(xvmulwev_q_du, vvv) | ||
99 | +INSN_LASX(xvmulwod_h_bu, vvv) | ||
100 | +INSN_LASX(xvmulwod_w_hu, vvv) | ||
101 | +INSN_LASX(xvmulwod_d_wu, vvv) | ||
102 | +INSN_LASX(xvmulwod_q_du, vvv) | ||
103 | +INSN_LASX(xvmulwev_h_bu_b, vvv) | ||
104 | +INSN_LASX(xvmulwev_w_hu_h, vvv) | ||
105 | +INSN_LASX(xvmulwev_d_wu_w, vvv) | ||
106 | +INSN_LASX(xvmulwev_q_du_d, vvv) | ||
107 | +INSN_LASX(xvmulwod_h_bu_b, vvv) | ||
108 | +INSN_LASX(xvmulwod_w_hu_h, vvv) | ||
109 | +INSN_LASX(xvmulwod_d_wu_w, vvv) | ||
110 | +INSN_LASX(xvmulwod_q_du_d, vvv) | ||
111 | + | ||
112 | INSN_LASX(xvreplgr2vr_b, vr) | ||
113 | INSN_LASX(xvreplgr2vr_h, vr) | ||
114 | INSN_LASX(xvreplgr2vr_w, vr) | ||
115 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/target/loongarch/vec_helper.c | ||
118 | +++ b/target/loongarch/vec_helper.c | ||
119 | @@ -XXX,XX +XXX,XX @@ VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) | ||
120 | VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) | ||
121 | VMINMAXI(vmaxi_du, 64, UD, DO_MAX) | ||
122 | |||
123 | -#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ | ||
124 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
125 | -{ \ | ||
126 | - int i; \ | ||
127 | - VReg *Vd = (VReg *)vd; \ | ||
128 | - VReg *Vj = (VReg *)vj; \ | ||
129 | - VReg *Vk = (VReg *)vk; \ | ||
130 | - typedef __typeof(Vd->E1(0)) T; \ | ||
131 | - \ | ||
132 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
133 | - Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ | ||
134 | - } \ | ||
135 | +#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ | ||
136 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
137 | +{ \ | ||
138 | + int i; \ | ||
139 | + VReg *Vd = (VReg *)vd; \ | ||
140 | + VReg *Vj = (VReg *)vj; \ | ||
141 | + VReg *Vk = (VReg *)vk; \ | ||
142 | + typedef __typeof(Vd->E1(0)) T; \ | ||
143 | + int oprsz = simd_oprsz(desc); \ | ||
144 | + \ | ||
145 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
146 | + Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ | ||
147 | + } \ | ||
148 | } | ||
149 | |||
150 | -void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
151 | +void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) | ||
152 | { | ||
153 | - uint64_t l, h1, h2; | ||
154 | + int i; | ||
155 | + uint64_t l, h; | ||
156 | VReg *Vd = (VReg *)vd; | ||
157 | VReg *Vj = (VReg *)vj; | ||
158 | VReg *Vk = (VReg *)vk; | ||
159 | + int oprsz = simd_oprsz(desc); | ||
160 | |||
161 | - muls64(&l, &h1, Vj->D(0), Vk->D(0)); | ||
162 | - muls64(&l, &h2, Vj->D(1), Vk->D(1)); | ||
163 | - | ||
164 | - Vd->D(0) = h1; | ||
165 | - Vd->D(1) = h2; | ||
166 | + for (i = 0; i < oprsz / 8; i++) { | ||
167 | + muls64(&l, &h, Vj->D(i), Vk->D(i)); | ||
168 | + Vd->D(i) = h; | ||
169 | + } | ||
170 | } | ||
171 | |||
172 | DO_VMUH(vmuh_b, 8, H, B, DO_MUH) | ||
173 | DO_VMUH(vmuh_h, 16, W, H, DO_MUH) | ||
174 | DO_VMUH(vmuh_w, 32, D, W, DO_MUH) | ||
175 | |||
176 | -void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
177 | +void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) | ||
178 | { | ||
179 | - uint64_t l, h1, h2; | ||
180 | + int i; | ||
181 | + uint64_t l, h; | ||
182 | VReg *Vd = (VReg *)vd; | ||
183 | VReg *Vj = (VReg *)vj; | ||
184 | VReg *Vk = (VReg *)vk; | ||
185 | + int oprsz = simd_oprsz(desc); | ||
186 | |||
187 | - mulu64(&l, &h1, Vj->D(0), Vk->D(0)); | ||
188 | - mulu64(&l, &h2, Vj->D(1), Vk->D(1)); | ||
189 | - | ||
190 | - Vd->D(0) = h1; | ||
191 | - Vd->D(1) = h2; | ||
192 | + for (i = 0; i < oprsz / 8; i++) { | ||
193 | + mulu64(&l, &h, Vj->D(i), Vk->D(i)); | ||
194 | + Vd->D(i) = h; | ||
195 | + } | ||
196 | } | ||
197 | |||
198 | DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) | ||
199 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
200 | index XXXXXXX..XXXXXXX 100644 | ||
201 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
202 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
203 | @@ -XXX,XX +XXX,XX @@ TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul) | ||
204 | TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul) | ||
205 | TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul) | ||
206 | TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul) | ||
207 | +TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul) | ||
208 | +TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul) | ||
209 | +TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul) | ||
210 | +TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul) | ||
211 | |||
212 | static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
213 | { | ||
214 | @@ -XXX,XX +XXX,XX @@ TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s) | ||
215 | TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s) | ||
216 | TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s) | ||
217 | TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s) | ||
218 | +TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s) | ||
219 | +TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s) | ||
220 | +TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s) | ||
221 | +TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s) | ||
222 | |||
223 | static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
224 | { | ||
225 | @@ -XXX,XX +XXX,XX @@ TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u) | ||
226 | TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u) | ||
227 | TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u) | ||
228 | TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u) | ||
229 | +TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u) | ||
230 | +TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u) | ||
231 | +TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u) | ||
232 | +TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u) | ||
233 | |||
234 | static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
235 | { | ||
236 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
237 | TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s) | ||
238 | TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s) | ||
239 | TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s) | ||
240 | +TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s) | ||
241 | +TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s) | ||
242 | +TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s) | ||
243 | |||
244 | static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh, | ||
245 | TCGv_i64 arg1, TCGv_i64 arg2) | ||
246 | @@ -XXX,XX +XXX,XX @@ static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh, | ||
247 | tcg_gen_mulsu2_i64(rl, rh, arg2, arg1); | ||
248 | } | ||
249 | |||
250 | -#define VMUL_Q(NAME, FN, idx1, idx2) \ | ||
251 | -static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \ | ||
252 | -{ \ | ||
253 | - TCGv_i64 rh, rl, arg1, arg2; \ | ||
254 | - \ | ||
255 | - if (!avail_LSX(ctx)) { \ | ||
256 | - return false; \ | ||
257 | - } \ | ||
258 | - \ | ||
259 | - rh = tcg_temp_new_i64(); \ | ||
260 | - rl = tcg_temp_new_i64(); \ | ||
261 | - arg1 = tcg_temp_new_i64(); \ | ||
262 | - arg2 = tcg_temp_new_i64(); \ | ||
263 | - \ | ||
264 | - get_vreg64(arg1, a->vj, idx1); \ | ||
265 | - get_vreg64(arg2, a->vk, idx2); \ | ||
266 | - \ | ||
267 | - tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \ | ||
268 | - \ | ||
269 | - set_vreg64(rh, a->vd, 1); \ | ||
270 | - set_vreg64(rl, a->vd, 0); \ | ||
271 | - \ | ||
272 | - return true; \ | ||
273 | +static bool gen_vmul_q_vl(DisasContext *ctx, | ||
274 | + arg_vvv *a, uint32_t oprsz, int idx1, int idx2, | ||
275 | + void (*func)(TCGv_i64, TCGv_i64, | ||
276 | + TCGv_i64, TCGv_i64)) | ||
277 | +{ | ||
278 | + TCGv_i64 rh, rl, arg1, arg2; | ||
279 | + int i; | ||
280 | + | ||
281 | + if (!check_vec(ctx, oprsz)) { | ||
282 | + return true; | ||
283 | + } | ||
284 | + | ||
285 | + rh = tcg_temp_new_i64(); | ||
286 | + rl = tcg_temp_new_i64(); | ||
287 | + arg1 = tcg_temp_new_i64(); | ||
288 | + arg2 = tcg_temp_new_i64(); | ||
289 | + | ||
290 | + for (i = 0; i < oprsz / 16; i++) { | ||
291 | + get_vreg64(arg1, a->vj, 2 * i + idx1); | ||
292 | + get_vreg64(arg2, a->vk, 2 * i + idx2); | ||
293 | + | ||
294 | + func(rl, rh, arg1, arg2); | ||
295 | + | ||
296 | + set_vreg64(rh, a->vd, 2 * i + 1); | ||
297 | + set_vreg64(rl, a->vd, 2 * i); | ||
298 | + } | ||
299 | + | ||
300 | + return true; | ||
301 | +} | ||
302 | + | ||
303 | +static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, | ||
304 | + void (*func)(TCGv_i64, TCGv_i64, | ||
305 | + TCGv_i64, TCGv_i64)) | ||
306 | +{ | ||
307 | + return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func); | ||
308 | +} | ||
309 | + | ||
310 | +static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, | ||
311 | + void (*func)(TCGv_i64, TCGv_i64, | ||
312 | + TCGv_i64, TCGv_i64)) | ||
313 | +{ | ||
314 | + return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func); | ||
315 | } | ||
316 | |||
317 | -VMUL_Q(vmulwev_q_d, muls2, 0, 0) | ||
318 | -VMUL_Q(vmulwod_q_d, muls2, 1, 1) | ||
319 | -VMUL_Q(vmulwev_q_du, mulu2, 0, 0) | ||
320 | -VMUL_Q(vmulwod_q_du, mulu2, 1, 1) | ||
321 | -VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0) | ||
322 | -VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1) | ||
323 | +TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64) | ||
324 | +TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64) | ||
325 | +TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64) | ||
326 | +TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64) | ||
327 | +TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64) | ||
328 | +TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64) | ||
329 | +TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64) | ||
330 | +TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64) | ||
331 | +TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64) | ||
332 | +TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64) | ||
333 | +TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64) | ||
334 | +TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64) | ||
335 | |||
336 | static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
337 | { | ||
338 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
339 | TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s) | ||
340 | TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s) | ||
341 | TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s) | ||
342 | +TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s) | ||
343 | +TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s) | ||
344 | +TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s) | ||
345 | |||
346 | static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
347 | { | ||
348 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
349 | TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u) | ||
350 | TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u) | ||
351 | TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u) | ||
352 | +TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u) | ||
353 | +TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u) | ||
354 | +TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u) | ||
355 | |||
356 | static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
357 | { | ||
358 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
359 | TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u) | ||
360 | TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u) | ||
361 | TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u) | ||
362 | +TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u) | ||
363 | +TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u) | ||
364 | +TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u) | ||
365 | |||
366 | static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
367 | { | ||
368 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
369 | TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s) | ||
370 | TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s) | ||
371 | TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s) | ||
372 | +TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s) | ||
373 | +TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s) | ||
374 | +TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s) | ||
375 | |||
376 | static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
377 | { | ||
378 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
379 | TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s) | ||
380 | TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s) | ||
381 | TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s) | ||
382 | +TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s) | ||
383 | +TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s) | ||
384 | +TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s) | ||
385 | |||
386 | static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
387 | { | ||
388 | -- | ||
389 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVMADD.{B/H/W/D}; | ||
3 | - XVMSUB.{B/H/W/D}; | ||
4 | - XVMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
5 | - XVMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
1 | 6 | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230914022645.1151356-27-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/insns.decode | 34 ++++++ | ||
12 | target/loongarch/disas.c | 34 ++++++ | ||
13 | target/loongarch/vec_helper.c | 112 +++++++++--------- | ||
14 | target/loongarch/insn_trans/trans_vec.c.inc | 121 ++++++++++++++------ | ||
15 | 4 files changed, 212 insertions(+), 89 deletions(-) | ||
16 | |||
17 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/insns.decode | ||
20 | +++ b/target/loongarch/insns.decode | ||
21 | @@ -XXX,XX +XXX,XX @@ xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv | ||
22 | xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv | ||
23 | xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv | ||
24 | |||
25 | +xvmadd_b 0111 01001010 10000 ..... ..... ..... @vvv | ||
26 | +xvmadd_h 0111 01001010 10001 ..... ..... ..... @vvv | ||
27 | +xvmadd_w 0111 01001010 10010 ..... ..... ..... @vvv | ||
28 | +xvmadd_d 0111 01001010 10011 ..... ..... ..... @vvv | ||
29 | +xvmsub_b 0111 01001010 10100 ..... ..... ..... @vvv | ||
30 | +xvmsub_h 0111 01001010 10101 ..... ..... ..... @vvv | ||
31 | +xvmsub_w 0111 01001010 10110 ..... ..... ..... @vvv | ||
32 | +xvmsub_d 0111 01001010 10111 ..... ..... ..... @vvv | ||
33 | + | ||
34 | +xvmaddwev_h_b 0111 01001010 11000 ..... ..... ..... @vvv | ||
35 | +xvmaddwev_w_h 0111 01001010 11001 ..... ..... ..... @vvv | ||
36 | +xvmaddwev_d_w 0111 01001010 11010 ..... ..... ..... @vvv | ||
37 | +xvmaddwev_q_d 0111 01001010 11011 ..... ..... ..... @vvv | ||
38 | +xvmaddwod_h_b 0111 01001010 11100 ..... ..... ..... @vvv | ||
39 | +xvmaddwod_w_h 0111 01001010 11101 ..... ..... ..... @vvv | ||
40 | +xvmaddwod_d_w 0111 01001010 11110 ..... ..... ..... @vvv | ||
41 | +xvmaddwod_q_d 0111 01001010 11111 ..... ..... ..... @vvv | ||
42 | +xvmaddwev_h_bu 0111 01001011 01000 ..... ..... ..... @vvv | ||
43 | +xvmaddwev_w_hu 0111 01001011 01001 ..... ..... ..... @vvv | ||
44 | +xvmaddwev_d_wu 0111 01001011 01010 ..... ..... ..... @vvv | ||
45 | +xvmaddwev_q_du 0111 01001011 01011 ..... ..... ..... @vvv | ||
46 | +xvmaddwod_h_bu 0111 01001011 01100 ..... ..... ..... @vvv | ||
47 | +xvmaddwod_w_hu 0111 01001011 01101 ..... ..... ..... @vvv | ||
48 | +xvmaddwod_d_wu 0111 01001011 01110 ..... ..... ..... @vvv | ||
49 | +xvmaddwod_q_du 0111 01001011 01111 ..... ..... ..... @vvv | ||
50 | +xvmaddwev_h_bu_b 0111 01001011 11000 ..... ..... ..... @vvv | ||
51 | +xvmaddwev_w_hu_h 0111 01001011 11001 ..... ..... ..... @vvv | ||
52 | +xvmaddwev_d_wu_w 0111 01001011 11010 ..... ..... ..... @vvv | ||
53 | +xvmaddwev_q_du_d 0111 01001011 11011 ..... ..... ..... @vvv | ||
54 | +xvmaddwod_h_bu_b 0111 01001011 11100 ..... ..... ..... @vvv | ||
55 | +xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv | ||
56 | +xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv | ||
57 | +xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv | ||
58 | + | ||
59 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
60 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
61 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
62 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/disas.c | ||
65 | +++ b/target/loongarch/disas.c | ||
66 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmulwod_w_hu_h, vvv) | ||
67 | INSN_LASX(xvmulwod_d_wu_w, vvv) | ||
68 | INSN_LASX(xvmulwod_q_du_d, vvv) | ||
69 | |||
70 | +INSN_LASX(xvmadd_b, vvv) | ||
71 | +INSN_LASX(xvmadd_h, vvv) | ||
72 | +INSN_LASX(xvmadd_w, vvv) | ||
73 | +INSN_LASX(xvmadd_d, vvv) | ||
74 | +INSN_LASX(xvmsub_b, vvv) | ||
75 | +INSN_LASX(xvmsub_h, vvv) | ||
76 | +INSN_LASX(xvmsub_w, vvv) | ||
77 | +INSN_LASX(xvmsub_d, vvv) | ||
78 | + | ||
79 | +INSN_LASX(xvmaddwev_h_b, vvv) | ||
80 | +INSN_LASX(xvmaddwev_w_h, vvv) | ||
81 | +INSN_LASX(xvmaddwev_d_w, vvv) | ||
82 | +INSN_LASX(xvmaddwev_q_d, vvv) | ||
83 | +INSN_LASX(xvmaddwod_h_b, vvv) | ||
84 | +INSN_LASX(xvmaddwod_w_h, vvv) | ||
85 | +INSN_LASX(xvmaddwod_d_w, vvv) | ||
86 | +INSN_LASX(xvmaddwod_q_d, vvv) | ||
87 | +INSN_LASX(xvmaddwev_h_bu, vvv) | ||
88 | +INSN_LASX(xvmaddwev_w_hu, vvv) | ||
89 | +INSN_LASX(xvmaddwev_d_wu, vvv) | ||
90 | +INSN_LASX(xvmaddwev_q_du, vvv) | ||
91 | +INSN_LASX(xvmaddwod_h_bu, vvv) | ||
92 | +INSN_LASX(xvmaddwod_w_hu, vvv) | ||
93 | +INSN_LASX(xvmaddwod_d_wu, vvv) | ||
94 | +INSN_LASX(xvmaddwod_q_du, vvv) | ||
95 | +INSN_LASX(xvmaddwev_h_bu_b, vvv) | ||
96 | +INSN_LASX(xvmaddwev_w_hu_h, vvv) | ||
97 | +INSN_LASX(xvmaddwev_d_wu_w, vvv) | ||
98 | +INSN_LASX(xvmaddwev_q_du_d, vvv) | ||
99 | +INSN_LASX(xvmaddwod_h_bu_b, vvv) | ||
100 | +INSN_LASX(xvmaddwod_w_hu_h, vvv) | ||
101 | +INSN_LASX(xvmaddwod_d_wu_w, vvv) | ||
102 | +INSN_LASX(xvmaddwod_q_du_d, vvv) | ||
103 | + | ||
104 | INSN_LASX(xvreplgr2vr_b, vr) | ||
105 | INSN_LASX(xvreplgr2vr_h, vr) | ||
106 | INSN_LASX(xvreplgr2vr_w, vr) | ||
107 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/loongarch/vec_helper.c | ||
110 | +++ b/target/loongarch/vec_helper.c | ||
111 | @@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
112 | #define DO_MADD(a, b, c) (a + b * c) | ||
113 | #define DO_MSUB(a, b, c) (a - b * c) | ||
114 | |||
115 | -#define VMADDSUB(NAME, BIT, E, DO_OP) \ | ||
116 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
117 | -{ \ | ||
118 | - int i; \ | ||
119 | - VReg *Vd = (VReg *)vd; \ | ||
120 | - VReg *Vj = (VReg *)vj; \ | ||
121 | - VReg *Vk = (VReg *)vk; \ | ||
122 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
123 | - Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ | ||
124 | - } \ | ||
125 | +#define VMADDSUB(NAME, BIT, E, DO_OP) \ | ||
126 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
127 | +{ \ | ||
128 | + int i; \ | ||
129 | + VReg *Vd = (VReg *)vd; \ | ||
130 | + VReg *Vj = (VReg *)vj; \ | ||
131 | + VReg *Vk = (VReg *)vk; \ | ||
132 | + int oprsz = simd_oprsz(desc); \ | ||
133 | + \ | ||
134 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
135 | + Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ | ||
136 | + } \ | ||
137 | } | ||
138 | |||
139 | VMADDSUB(vmadd_b, 8, B, DO_MADD) | ||
140 | @@ -XXX,XX +XXX,XX @@ VMADDSUB(vmsub_w, 32, W, DO_MSUB) | ||
141 | VMADDSUB(vmsub_d, 64, D, DO_MSUB) | ||
142 | |||
143 | #define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ | ||
144 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
145 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
146 | { \ | ||
147 | int i; \ | ||
148 | VReg *Vd = (VReg *)vd; \ | ||
149 | VReg *Vj = (VReg *)vj; \ | ||
150 | VReg *Vk = (VReg *)vk; \ | ||
151 | typedef __typeof(Vd->E1(0)) TD; \ | ||
152 | + int oprsz = simd_oprsz(desc); \ | ||
153 | \ | ||
154 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
155 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
156 | Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ | ||
157 | } \ | ||
158 | } | ||
159 | @@ -XXX,XX +XXX,XX @@ VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) | ||
160 | VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) | ||
161 | VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) | ||
162 | |||
163 | -#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ | ||
164 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
165 | -{ \ | ||
166 | - int i; \ | ||
167 | - VReg *Vd = (VReg *)vd; \ | ||
168 | - VReg *Vj = (VReg *)vj; \ | ||
169 | - VReg *Vk = (VReg *)vk; \ | ||
170 | - typedef __typeof(Vd->E1(0)) TD; \ | ||
171 | - \ | ||
172 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
173 | - Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ | ||
174 | - (TD)Vk->E2(2 * i + 1)); \ | ||
175 | - } \ | ||
176 | +#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ | ||
177 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
178 | +{ \ | ||
179 | + int i; \ | ||
180 | + VReg *Vd = (VReg *)vd; \ | ||
181 | + VReg *Vj = (VReg *)vj; \ | ||
182 | + VReg *Vk = (VReg *)vk; \ | ||
183 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
184 | + int oprsz = simd_oprsz(desc); \ | ||
185 | + \ | ||
186 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
187 | + Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ | ||
188 | + (TD)Vk->E2(2 * i + 1)); \ | ||
189 | + } \ | ||
190 | } | ||
191 | |||
192 | VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) | ||
193 | @@ -XXX,XX +XXX,XX @@ VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) | ||
194 | VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) | ||
195 | VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) | ||
196 | |||
197 | -#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
198 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
199 | -{ \ | ||
200 | - int i; \ | ||
201 | - VReg *Vd = (VReg *)vd; \ | ||
202 | - VReg *Vj = (VReg *)vj; \ | ||
203 | - VReg *Vk = (VReg *)vk; \ | ||
204 | - typedef __typeof(Vd->ES1(0)) TS1; \ | ||
205 | - typedef __typeof(Vd->EU1(0)) TU1; \ | ||
206 | - \ | ||
207 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
208 | - Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ | ||
209 | - (TS1)Vk->ES2(2 * i)); \ | ||
210 | - } \ | ||
211 | +#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
212 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
213 | +{ \ | ||
214 | + int i; \ | ||
215 | + VReg *Vd = (VReg *)vd; \ | ||
216 | + VReg *Vj = (VReg *)vj; \ | ||
217 | + VReg *Vk = (VReg *)vk; \ | ||
218 | + typedef __typeof(Vd->ES1(0)) TS1; \ | ||
219 | + typedef __typeof(Vd->EU1(0)) TU1; \ | ||
220 | + int oprsz = simd_oprsz(desc); \ | ||
221 | + \ | ||
222 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
223 | + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ | ||
224 | + (TS1)Vk->ES2(2 * i)); \ | ||
225 | + } \ | ||
226 | } | ||
227 | |||
228 | VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
229 | VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
230 | VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
231 | |||
232 | -#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
233 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
234 | -{ \ | ||
235 | - int i; \ | ||
236 | - VReg *Vd = (VReg *)vd; \ | ||
237 | - VReg *Vj = (VReg *)vj; \ | ||
238 | - VReg *Vk = (VReg *)vk; \ | ||
239 | - typedef __typeof(Vd->ES1(0)) TS1; \ | ||
240 | - typedef __typeof(Vd->EU1(0)) TU1; \ | ||
241 | - \ | ||
242 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
243 | - Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ | ||
244 | - (TS1)Vk->ES2(2 * i + 1)); \ | ||
245 | - } \ | ||
246 | +#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
247 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
248 | +{ \ | ||
249 | + int i; \ | ||
250 | + VReg *Vd = (VReg *)vd; \ | ||
251 | + VReg *Vj = (VReg *)vj; \ | ||
252 | + VReg *Vk = (VReg *)vk; \ | ||
253 | + typedef __typeof(Vd->ES1(0)) TS1; \ | ||
254 | + typedef __typeof(Vd->EU1(0)) TU1; \ | ||
255 | + int oprsz = simd_oprsz(desc); \ | ||
256 | + \ | ||
257 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
258 | + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ | ||
259 | + (TS1)Vk->ES2(2 * i + 1)); \ | ||
260 | + } \ | ||
261 | } | ||
262 | |||
263 | VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
264 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
265 | index XXXXXXX..XXXXXXX 100644 | ||
266 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
267 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
268 | @@ -XXX,XX +XXX,XX @@ TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd) | ||
269 | TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd) | ||
270 | TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd) | ||
271 | TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd) | ||
272 | +TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd) | ||
273 | +TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd) | ||
274 | +TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd) | ||
275 | +TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd) | ||
276 | |||
277 | static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
278 | { | ||
279 | @@ -XXX,XX +XXX,XX @@ TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub) | ||
280 | TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub) | ||
281 | TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub) | ||
282 | TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub) | ||
283 | +TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub) | ||
284 | +TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub) | ||
285 | +TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub) | ||
286 | +TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub) | ||
287 | |||
288 | static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
289 | { | ||
290 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
291 | TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s) | ||
292 | TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s) | ||
293 | TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s) | ||
294 | +TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s) | ||
295 | +TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s) | ||
296 | +TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s) | ||
297 | |||
298 | -#define VMADD_Q(NAME, FN, idx1, idx2) \ | ||
299 | -static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \ | ||
300 | -{ \ | ||
301 | - TCGv_i64 rh, rl, arg1, arg2, th, tl; \ | ||
302 | - \ | ||
303 | - if (!avail_LSX(ctx)) { \ | ||
304 | - return false; \ | ||
305 | - } \ | ||
306 | - \ | ||
307 | - rh = tcg_temp_new_i64(); \ | ||
308 | - rl = tcg_temp_new_i64(); \ | ||
309 | - arg1 = tcg_temp_new_i64(); \ | ||
310 | - arg2 = tcg_temp_new_i64(); \ | ||
311 | - th = tcg_temp_new_i64(); \ | ||
312 | - tl = tcg_temp_new_i64(); \ | ||
313 | - \ | ||
314 | - get_vreg64(arg1, a->vj, idx1); \ | ||
315 | - get_vreg64(arg2, a->vk, idx2); \ | ||
316 | - get_vreg64(rh, a->vd, 1); \ | ||
317 | - get_vreg64(rl, a->vd, 0); \ | ||
318 | - \ | ||
319 | - tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \ | ||
320 | - tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \ | ||
321 | - \ | ||
322 | - set_vreg64(rh, a->vd, 1); \ | ||
323 | - set_vreg64(rl, a->vd, 0); \ | ||
324 | - \ | ||
325 | - return true; \ | ||
326 | -} | ||
327 | - | ||
328 | -VMADD_Q(vmaddwev_q_d, muls2, 0, 0) | ||
329 | -VMADD_Q(vmaddwod_q_d, muls2, 1, 1) | ||
330 | -VMADD_Q(vmaddwev_q_du, mulu2, 0, 0) | ||
331 | -VMADD_Q(vmaddwod_q_du, mulu2, 1, 1) | ||
332 | -VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0) | ||
333 | -VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1) | ||
334 | +static bool gen_vmadd_q_vl(DisasContext * ctx, | ||
335 | + arg_vvv *a, uint32_t oprsz, int idx1, int idx2, | ||
336 | + void (*func)(TCGv_i64, TCGv_i64, | ||
337 | + TCGv_i64, TCGv_i64)) | ||
338 | +{ | ||
339 | + TCGv_i64 rh, rl, arg1, arg2, th, tl; | ||
340 | + int i; | ||
341 | + | ||
342 | + if (!check_vec(ctx, oprsz)) { | ||
343 | + return true; | ||
344 | + } | ||
345 | + | ||
346 | + rh = tcg_temp_new_i64(); | ||
347 | + rl = tcg_temp_new_i64(); | ||
348 | + arg1 = tcg_temp_new_i64(); | ||
349 | + arg2 = tcg_temp_new_i64(); | ||
350 | + th = tcg_temp_new_i64(); | ||
351 | + tl = tcg_temp_new_i64(); | ||
352 | + | ||
353 | + for (i = 0; i < oprsz / 16; i++) { | ||
354 | + get_vreg64(arg1, a->vj, 2 * i + idx1); | ||
355 | + get_vreg64(arg2, a->vk, 2 * i + idx2); | ||
356 | + get_vreg64(rh, a->vd, 2 * i + 1); | ||
357 | + get_vreg64(rl, a->vd, 2 * i); | ||
358 | + | ||
359 | + func(tl, th, arg1, arg2); | ||
360 | + tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); | ||
361 | + | ||
362 | + set_vreg64(rh, a->vd, 2 * i + 1); | ||
363 | + set_vreg64(rl, a->vd, 2 * i); | ||
364 | + } | ||
365 | + | ||
366 | + return true; | ||
367 | +} | ||
368 | + | ||
369 | +static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, | ||
370 | + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) | ||
371 | +{ | ||
372 | + return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func); | ||
373 | +} | ||
374 | + | ||
375 | +static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, | ||
376 | + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) | ||
377 | +{ | ||
378 | + return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func); | ||
379 | +} | ||
380 | + | ||
381 | +TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64) | ||
382 | +TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64) | ||
383 | +TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64) | ||
384 | +TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64) | ||
385 | +TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64) | ||
386 | +TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64) | ||
387 | +TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64) | ||
388 | +TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64) | ||
389 | +TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64) | ||
390 | +TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64) | ||
391 | +TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64) | ||
392 | +TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64) | ||
393 | |||
394 | static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
395 | { | ||
396 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
397 | TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s) | ||
398 | TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s) | ||
399 | TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s) | ||
400 | +TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s) | ||
401 | +TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s) | ||
402 | +TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s) | ||
403 | |||
404 | static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
405 | { | ||
406 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
407 | TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u) | ||
408 | TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u) | ||
409 | TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u) | ||
410 | +TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u) | ||
411 | +TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u) | ||
412 | +TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u) | ||
413 | |||
414 | static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
415 | { | ||
416 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
417 | TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u) | ||
418 | TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u) | ||
419 | TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u) | ||
420 | +TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u) | ||
421 | +TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u) | ||
422 | +TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u) | ||
423 | |||
424 | static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
425 | { | ||
426 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
427 | TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s) | ||
428 | TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s) | ||
429 | TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s) | ||
430 | +TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s) | ||
431 | +TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s) | ||
432 | +TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s) | ||
433 | |||
434 | static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
435 | { | ||
436 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
437 | TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s) | ||
438 | TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s) | ||
439 | TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s) | ||
440 | +TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s) | ||
441 | +TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s) | ||
442 | +TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s) | ||
443 | |||
444 | TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b) | ||
445 | TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h) | ||
446 | -- | ||
447 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVDIV.{B/H/W/D}[U]; | ||
3 | - XVMOD.{B/H/W/D}[U]. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-28-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 17 +++++++++++++++++ | ||
10 | target/loongarch/disas.c | 17 +++++++++++++++++ | ||
11 | target/loongarch/vec_helper.c | 4 +++- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++ | ||
13 | 4 files changed, 53 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv | ||
20 | xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv | ||
21 | xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv | ||
22 | |||
23 | +xvdiv_b 0111 01001110 00000 ..... ..... ..... @vvv | ||
24 | +xvdiv_h 0111 01001110 00001 ..... ..... ..... @vvv | ||
25 | +xvdiv_w 0111 01001110 00010 ..... ..... ..... @vvv | ||
26 | +xvdiv_d 0111 01001110 00011 ..... ..... ..... @vvv | ||
27 | +xvmod_b 0111 01001110 00100 ..... ..... ..... @vvv | ||
28 | +xvmod_h 0111 01001110 00101 ..... ..... ..... @vvv | ||
29 | +xvmod_w 0111 01001110 00110 ..... ..... ..... @vvv | ||
30 | +xvmod_d 0111 01001110 00111 ..... ..... ..... @vvv | ||
31 | +xvdiv_bu 0111 01001110 01000 ..... ..... ..... @vvv | ||
32 | +xvdiv_hu 0111 01001110 01001 ..... ..... ..... @vvv | ||
33 | +xvdiv_wu 0111 01001110 01010 ..... ..... ..... @vvv | ||
34 | +xvdiv_du 0111 01001110 01011 ..... ..... ..... @vvv | ||
35 | +xvmod_bu 0111 01001110 01100 ..... ..... ..... @vvv | ||
36 | +xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv | ||
37 | +xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv | ||
38 | +xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv | ||
39 | + | ||
40 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
41 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
42 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
43 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/loongarch/disas.c | ||
46 | +++ b/target/loongarch/disas.c | ||
47 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmaddwod_w_hu_h, vvv) | ||
48 | INSN_LASX(xvmaddwod_d_wu_w, vvv) | ||
49 | INSN_LASX(xvmaddwod_q_du_d, vvv) | ||
50 | |||
51 | +INSN_LASX(xvdiv_b, vvv) | ||
52 | +INSN_LASX(xvdiv_h, vvv) | ||
53 | +INSN_LASX(xvdiv_w, vvv) | ||
54 | +INSN_LASX(xvdiv_d, vvv) | ||
55 | +INSN_LASX(xvdiv_bu, vvv) | ||
56 | +INSN_LASX(xvdiv_hu, vvv) | ||
57 | +INSN_LASX(xvdiv_wu, vvv) | ||
58 | +INSN_LASX(xvdiv_du, vvv) | ||
59 | +INSN_LASX(xvmod_b, vvv) | ||
60 | +INSN_LASX(xvmod_h, vvv) | ||
61 | +INSN_LASX(xvmod_w, vvv) | ||
62 | +INSN_LASX(xvmod_d, vvv) | ||
63 | +INSN_LASX(xvmod_bu, vvv) | ||
64 | +INSN_LASX(xvmod_hu, vvv) | ||
65 | +INSN_LASX(xvmod_wu, vvv) | ||
66 | +INSN_LASX(xvmod_du, vvv) | ||
67 | + | ||
68 | INSN_LASX(xvreplgr2vr_b, vr) | ||
69 | INSN_LASX(xvreplgr2vr_h, vr) | ||
70 | INSN_LASX(xvreplgr2vr_w, vr) | ||
71 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/loongarch/vec_helper.c | ||
74 | +++ b/target/loongarch/vec_helper.c | ||
75 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
76 | VReg *Vd = (VReg *)vd; \ | ||
77 | VReg *Vj = (VReg *)vj; \ | ||
78 | VReg *Vk = (VReg *)vk; \ | ||
79 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
80 | + int oprsz = simd_oprsz(desc); \ | ||
81 | + \ | ||
82 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
83 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
84 | } \ | ||
85 | } | ||
86 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
89 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
90 | @@ -XXX,XX +XXX,XX @@ TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu) | ||
91 | TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu) | ||
92 | TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu) | ||
93 | TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du) | ||
94 | +TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b) | ||
95 | +TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h) | ||
96 | +TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w) | ||
97 | +TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d) | ||
98 | +TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu) | ||
99 | +TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu) | ||
100 | +TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu) | ||
101 | +TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du) | ||
102 | +TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b) | ||
103 | +TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h) | ||
104 | +TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w) | ||
105 | +TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d) | ||
106 | +TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu) | ||
107 | +TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu) | ||
108 | +TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu) | ||
109 | +TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du) | ||
110 | |||
111 | static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) | ||
112 | { | ||
113 | -- | ||
114 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSAT.{B/H/W/D}[U]. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-29-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 9 ++++ | ||
9 | target/loongarch/disas.c | 9 ++++ | ||
10 | target/loongarch/vec_helper.c | 48 +++++++++++---------- | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++ | ||
12 | 4 files changed, 51 insertions(+), 23 deletions(-) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv | ||
19 | xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv | ||
20 | xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv | ||
21 | |||
22 | +xvsat_b 0111 01110010 01000 01 ... ..... ..... @vv_ui3 | ||
23 | +xvsat_h 0111 01110010 01000 1 .... ..... ..... @vv_ui4 | ||
24 | +xvsat_w 0111 01110010 01001 ..... ..... ..... @vv_ui5 | ||
25 | +xvsat_d 0111 01110010 0101 ...... ..... ..... @vv_ui6 | ||
26 | +xvsat_bu 0111 01110010 10000 01 ... ..... ..... @vv_ui3 | ||
27 | +xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4 | ||
28 | +xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5 | ||
29 | +xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6 | ||
30 | + | ||
31 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
32 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
33 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
34 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/loongarch/disas.c | ||
37 | +++ b/target/loongarch/disas.c | ||
38 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmod_hu, vvv) | ||
39 | INSN_LASX(xvmod_wu, vvv) | ||
40 | INSN_LASX(xvmod_du, vvv) | ||
41 | |||
42 | +INSN_LASX(xvsat_b, vv_i) | ||
43 | +INSN_LASX(xvsat_h, vv_i) | ||
44 | +INSN_LASX(xvsat_w, vv_i) | ||
45 | +INSN_LASX(xvsat_d, vv_i) | ||
46 | +INSN_LASX(xvsat_bu, vv_i) | ||
47 | +INSN_LASX(xvsat_hu, vv_i) | ||
48 | +INSN_LASX(xvsat_wu, vv_i) | ||
49 | +INSN_LASX(xvsat_du, vv_i) | ||
50 | + | ||
51 | INSN_LASX(xvreplgr2vr_b, vr) | ||
52 | INSN_LASX(xvreplgr2vr_h, vr) | ||
53 | INSN_LASX(xvreplgr2vr_w, vr) | ||
54 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/loongarch/vec_helper.c | ||
57 | +++ b/target/loongarch/vec_helper.c | ||
58 | @@ -XXX,XX +XXX,XX @@ VDIV(vmod_hu, 16, UH, DO_REMU) | ||
59 | VDIV(vmod_wu, 32, UW, DO_REMU) | ||
60 | VDIV(vmod_du, 64, UD, DO_REMU) | ||
61 | |||
62 | -#define VSAT_S(NAME, BIT, E) \ | ||
63 | -void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ | ||
64 | -{ \ | ||
65 | - int i; \ | ||
66 | - VReg *Vd = (VReg *)vd; \ | ||
67 | - VReg *Vj = (VReg *)vj; \ | ||
68 | - typedef __typeof(Vd->E(0)) TD; \ | ||
69 | - \ | ||
70 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
71 | - Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ | ||
72 | - Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ | ||
73 | - } \ | ||
74 | +#define VSAT_S(NAME, BIT, E) \ | ||
75 | +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ | ||
76 | +{ \ | ||
77 | + int i; \ | ||
78 | + VReg *Vd = (VReg *)vd; \ | ||
79 | + VReg *Vj = (VReg *)vj; \ | ||
80 | + typedef __typeof(Vd->E(0)) TD; \ | ||
81 | + int oprsz = simd_oprsz(desc); \ | ||
82 | + \ | ||
83 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
84 | + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ | ||
85 | + Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ | ||
86 | + } \ | ||
87 | } | ||
88 | |||
89 | VSAT_S(vsat_b, 8, B) | ||
90 | @@ -XXX,XX +XXX,XX @@ VSAT_S(vsat_h, 16, H) | ||
91 | VSAT_S(vsat_w, 32, W) | ||
92 | VSAT_S(vsat_d, 64, D) | ||
93 | |||
94 | -#define VSAT_U(NAME, BIT, E) \ | ||
95 | -void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ | ||
96 | -{ \ | ||
97 | - int i; \ | ||
98 | - VReg *Vd = (VReg *)vd; \ | ||
99 | - VReg *Vj = (VReg *)vj; \ | ||
100 | - typedef __typeof(Vd->E(0)) TD; \ | ||
101 | - \ | ||
102 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
103 | - Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ | ||
104 | - } \ | ||
105 | +#define VSAT_U(NAME, BIT, E) \ | ||
106 | +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ | ||
107 | +{ \ | ||
108 | + int i; \ | ||
109 | + VReg *Vd = (VReg *)vd; \ | ||
110 | + VReg *Vj = (VReg *)vj; \ | ||
111 | + typedef __typeof(Vd->E(0)) TD; \ | ||
112 | + int oprsz = simd_oprsz(desc); \ | ||
113 | + \ | ||
114 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
115 | + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ | ||
116 | + } \ | ||
117 | } | ||
118 | |||
119 | VSAT_U(vsat_bu, 8, UB) | ||
120 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
123 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
124 | @@ -XXX,XX +XXX,XX @@ TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s) | ||
125 | TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s) | ||
126 | TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s) | ||
127 | TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s) | ||
128 | +TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s) | ||
129 | +TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s) | ||
130 | +TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s) | ||
131 | +TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s) | ||
132 | |||
133 | static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) | ||
134 | { | ||
135 | @@ -XXX,XX +XXX,XX @@ TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u) | ||
136 | TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u) | ||
137 | TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u) | ||
138 | TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u) | ||
139 | +TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u) | ||
140 | +TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u) | ||
141 | +TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u) | ||
142 | +TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u) | ||
143 | |||
144 | TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b) | ||
145 | TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h) | ||
146 | -- | ||
147 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVEXTH.{H.B/W.H/D.W/Q.D}; | ||
3 | - XVEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-30-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 9 ++++++ | ||
10 | target/loongarch/disas.c | 9 ++++++ | ||
11 | target/loongarch/vec_helper.c | 36 ++++++++++++++------- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 21 +++++++++--- | ||
13 | 4 files changed, 59 insertions(+), 16 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4 | ||
20 | xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5 | ||
21 | xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6 | ||
22 | |||
23 | +xvexth_h_b 0111 01101001 11101 11000 ..... ..... @vv | ||
24 | +xvexth_w_h 0111 01101001 11101 11001 ..... ..... @vv | ||
25 | +xvexth_d_w 0111 01101001 11101 11010 ..... ..... @vv | ||
26 | +xvexth_q_d 0111 01101001 11101 11011 ..... ..... @vv | ||
27 | +xvexth_hu_bu 0111 01101001 11101 11100 ..... ..... @vv | ||
28 | +xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv | ||
29 | +xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv | ||
30 | +xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv | ||
31 | + | ||
32 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
33 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
34 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
35 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/loongarch/disas.c | ||
38 | +++ b/target/loongarch/disas.c | ||
39 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsat_hu, vv_i) | ||
40 | INSN_LASX(xvsat_wu, vv_i) | ||
41 | INSN_LASX(xvsat_du, vv_i) | ||
42 | |||
43 | +INSN_LASX(xvexth_h_b, vv) | ||
44 | +INSN_LASX(xvexth_w_h, vv) | ||
45 | +INSN_LASX(xvexth_d_w, vv) | ||
46 | +INSN_LASX(xvexth_q_d, vv) | ||
47 | +INSN_LASX(xvexth_hu_bu, vv) | ||
48 | +INSN_LASX(xvexth_wu_hu, vv) | ||
49 | +INSN_LASX(xvexth_du_wu, vv) | ||
50 | +INSN_LASX(xvexth_qu_du, vv) | ||
51 | + | ||
52 | INSN_LASX(xvreplgr2vr_b, vr) | ||
53 | INSN_LASX(xvreplgr2vr_h, vr) | ||
54 | INSN_LASX(xvreplgr2vr_w, vr) | ||
55 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/loongarch/vec_helper.c | ||
58 | +++ b/target/loongarch/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_hu, 16, UH) | ||
60 | VSAT_U(vsat_wu, 32, UW) | ||
61 | VSAT_U(vsat_du, 64, UD) | ||
62 | |||
63 | -#define VEXTH(NAME, BIT, E1, E2) \ | ||
64 | -void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
65 | -{ \ | ||
66 | - int i; \ | ||
67 | - VReg *Vd = (VReg *)vd; \ | ||
68 | - VReg *Vj = (VReg *)vj; \ | ||
69 | - \ | ||
70 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
71 | - Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ | ||
72 | - } \ | ||
73 | +#define VEXTH(NAME, BIT, E1, E2) \ | ||
74 | +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
75 | +{ \ | ||
76 | + int i, j, ofs; \ | ||
77 | + VReg *Vd = (VReg *)vd; \ | ||
78 | + VReg *Vj = (VReg *)vj; \ | ||
79 | + int oprsz = simd_oprsz(desc); \ | ||
80 | + \ | ||
81 | + ofs = LSX_LEN / BIT; \ | ||
82 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
83 | + for (j = 0; j < ofs; j++) { \ | ||
84 | + Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \ | ||
85 | + } \ | ||
86 | + } \ | ||
87 | } | ||
88 | |||
89 | void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) | ||
90 | { | ||
91 | + int i; | ||
92 | VReg *Vd = (VReg *)vd; | ||
93 | VReg *Vj = (VReg *)vj; | ||
94 | + int oprsz = simd_oprsz(desc); | ||
95 | |||
96 | - Vd->Q(0) = int128_makes64(Vj->D(1)); | ||
97 | + for (i = 0; i < oprsz / 16; i++) { | ||
98 | + Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); | ||
99 | + } | ||
100 | } | ||
101 | |||
102 | void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) | ||
103 | { | ||
104 | + int i; | ||
105 | VReg *Vd = (VReg *)vd; | ||
106 | VReg *Vj = (VReg *)vj; | ||
107 | + int oprsz = simd_oprsz(desc); | ||
108 | |||
109 | - Vd->Q(0) = int128_make64((uint64_t)Vj->D(1)); | ||
110 | + for (i = 0; i < oprsz / 16; i++) { | ||
111 | + Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); | ||
112 | + } | ||
113 | } | ||
114 | |||
115 | VEXTH(vexth_h_b, 16, H, B) | ||
116 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
119 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
120 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, | ||
121 | static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
122 | gen_helper_gvec_2 *fn) | ||
123 | { | ||
124 | + if (!check_vec(ctx, oprsz)) { | ||
125 | + return true; | ||
126 | + } | ||
127 | + | ||
128 | tcg_gen_gvec_2_ool(vec_full_offset(a->vd), | ||
129 | vec_full_offset(a->vj), | ||
130 | oprsz, ctx->vl / 8, 0, fn); | ||
131 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
132 | |||
133 | static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
134 | { | ||
135 | - if (!check_vec(ctx, 16)) { | ||
136 | - return true; | ||
137 | - } | ||
138 | - | ||
139 | return gen_vv_vl(ctx, a, 16, fn); | ||
140 | } | ||
141 | |||
142 | +static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
143 | +{ | ||
144 | + return gen_vv_vl(ctx, a, 32, fn); | ||
145 | +} | ||
146 | + | ||
147 | static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, | ||
148 | gen_helper_gvec_2i *fn) | ||
149 | { | ||
150 | @@ -XXX,XX +XXX,XX @@ TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu) | ||
151 | TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu) | ||
152 | TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu) | ||
153 | TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du) | ||
154 | +TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b) | ||
155 | +TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h) | ||
156 | +TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w) | ||
157 | +TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d) | ||
158 | +TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu) | ||
159 | +TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu) | ||
160 | +TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu) | ||
161 | +TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du) | ||
162 | |||
163 | static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
164 | { | ||
165 | -- | ||
166 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VEXT2XV.{H/W/D}.B, VEXT2XV.{HU/WU/DU}.BU; | ||
3 | - VEXT2XV.{W/D}.B, VEXT2XV.{WU/DU}.HU; | ||
4 | - VEXT2XV.D.W, VEXT2XV.DU.WU. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-31-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/helper.h | 13 ++++++++++ | ||
11 | target/loongarch/insns.decode | 13 ++++++++++ | ||
12 | target/loongarch/disas.c | 13 ++++++++++ | ||
13 | target/loongarch/vec_helper.c | 28 +++++++++++++++++++++ | ||
14 | target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++++++ | ||
15 | 5 files changed, 80 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/helper.h | ||
20 | +++ b/target/loongarch/helper.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
22 | DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
23 | DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
24 | |||
25 | +DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
26 | +DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
27 | +DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
28 | +DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
29 | +DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
30 | +DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
31 | +DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
32 | +DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
33 | +DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
34 | +DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
35 | +DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
36 | +DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) | ||
37 | + | ||
38 | DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
41 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/insns.decode | ||
44 | +++ b/target/loongarch/insns.decode | ||
45 | @@ -XXX,XX +XXX,XX @@ xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv | ||
46 | xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv | ||
47 | xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv | ||
48 | |||
49 | +vext2xv_h_b 0111 01101001 11110 00100 ..... ..... @vv | ||
50 | +vext2xv_w_b 0111 01101001 11110 00101 ..... ..... @vv | ||
51 | +vext2xv_d_b 0111 01101001 11110 00110 ..... ..... @vv | ||
52 | +vext2xv_w_h 0111 01101001 11110 00111 ..... ..... @vv | ||
53 | +vext2xv_d_h 0111 01101001 11110 01000 ..... ..... @vv | ||
54 | +vext2xv_d_w 0111 01101001 11110 01001 ..... ..... @vv | ||
55 | +vext2xv_hu_bu 0111 01101001 11110 01010 ..... ..... @vv | ||
56 | +vext2xv_wu_bu 0111 01101001 11110 01011 ..... ..... @vv | ||
57 | +vext2xv_du_bu 0111 01101001 11110 01100 ..... ..... @vv | ||
58 | +vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv | ||
59 | +vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv | ||
60 | +vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv | ||
61 | + | ||
62 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
63 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
64 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
65 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/loongarch/disas.c | ||
68 | +++ b/target/loongarch/disas.c | ||
69 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvexth_wu_hu, vv) | ||
70 | INSN_LASX(xvexth_du_wu, vv) | ||
71 | INSN_LASX(xvexth_qu_du, vv) | ||
72 | |||
73 | +INSN_LASX(vext2xv_h_b, vv) | ||
74 | +INSN_LASX(vext2xv_w_b, vv) | ||
75 | +INSN_LASX(vext2xv_d_b, vv) | ||
76 | +INSN_LASX(vext2xv_w_h, vv) | ||
77 | +INSN_LASX(vext2xv_d_h, vv) | ||
78 | +INSN_LASX(vext2xv_d_w, vv) | ||
79 | +INSN_LASX(vext2xv_hu_bu, vv) | ||
80 | +INSN_LASX(vext2xv_wu_bu, vv) | ||
81 | +INSN_LASX(vext2xv_du_bu, vv) | ||
82 | +INSN_LASX(vext2xv_wu_hu, vv) | ||
83 | +INSN_LASX(vext2xv_du_hu, vv) | ||
84 | +INSN_LASX(vext2xv_du_wu, vv) | ||
85 | + | ||
86 | INSN_LASX(xvreplgr2vr_b, vr) | ||
87 | INSN_LASX(xvreplgr2vr_h, vr) | ||
88 | INSN_LASX(xvreplgr2vr_w, vr) | ||
89 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/loongarch/vec_helper.c | ||
92 | +++ b/target/loongarch/vec_helper.c | ||
93 | @@ -XXX,XX +XXX,XX @@ VEXTH(vexth_hu_bu, 16, UH, UB) | ||
94 | VEXTH(vexth_wu_hu, 32, UW, UH) | ||
95 | VEXTH(vexth_du_wu, 64, UD, UW) | ||
96 | |||
97 | +#define VEXT2XV(NAME, BIT, E1, E2) \ | ||
98 | +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
99 | +{ \ | ||
100 | + int i; \ | ||
101 | + VReg temp = {}; \ | ||
102 | + VReg *Vd = (VReg *)vd; \ | ||
103 | + VReg *Vj = (VReg *)vj; \ | ||
104 | + int oprsz = simd_oprsz(desc); \ | ||
105 | + \ | ||
106 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
107 | + temp.E1(i) = Vj->E2(i); \ | ||
108 | + } \ | ||
109 | + *Vd = temp; \ | ||
110 | +} | ||
111 | + | ||
112 | +VEXT2XV(vext2xv_h_b, 16, H, B) | ||
113 | +VEXT2XV(vext2xv_w_b, 32, W, B) | ||
114 | +VEXT2XV(vext2xv_d_b, 64, D, B) | ||
115 | +VEXT2XV(vext2xv_w_h, 32, W, H) | ||
116 | +VEXT2XV(vext2xv_d_h, 64, D, H) | ||
117 | +VEXT2XV(vext2xv_d_w, 64, D, W) | ||
118 | +VEXT2XV(vext2xv_hu_bu, 16, UH, UB) | ||
119 | +VEXT2XV(vext2xv_wu_bu, 32, UW, UB) | ||
120 | +VEXT2XV(vext2xv_du_bu, 64, UD, UB) | ||
121 | +VEXT2XV(vext2xv_wu_hu, 32, UW, UH) | ||
122 | +VEXT2XV(vext2xv_du_hu, 64, UD, UH) | ||
123 | +VEXT2XV(vext2xv_du_wu, 64, UD, UW) | ||
124 | + | ||
125 | #define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) | ||
126 | |||
127 | DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) | ||
128 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
131 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
132 | @@ -XXX,XX +XXX,XX @@ TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu) | ||
133 | TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu) | ||
134 | TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du) | ||
135 | |||
136 | +TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b) | ||
137 | +TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b) | ||
138 | +TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b) | ||
139 | +TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h) | ||
140 | +TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h) | ||
141 | +TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w) | ||
142 | +TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu) | ||
143 | +TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu) | ||
144 | +TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu) | ||
145 | +TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu) | ||
146 | +TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu) | ||
147 | +TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu) | ||
148 | + | ||
149 | static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
150 | { | ||
151 | TCGv_vec t1, zero; | ||
152 | -- | ||
153 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSIGNCOV.{B/H/W/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-32-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 +++++ | ||
9 | target/loongarch/disas.c | 5 +++++ | ||
10 | target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++ | ||
11 | 3 files changed, 14 insertions(+) | ||
12 | |||
13 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/insns.decode | ||
16 | +++ b/target/loongarch/insns.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv | ||
18 | vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv | ||
19 | vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv | ||
20 | |||
21 | +xvsigncov_b 0111 01010010 11100 ..... ..... ..... @vvv | ||
22 | +xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv | ||
23 | +xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv | ||
24 | +xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv | ||
25 | + | ||
26 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
27 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
28 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
29 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/loongarch/disas.c | ||
32 | +++ b/target/loongarch/disas.c | ||
33 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(vext2xv_wu_hu, vv) | ||
34 | INSN_LASX(vext2xv_du_hu, vv) | ||
35 | INSN_LASX(vext2xv_du_wu, vv) | ||
36 | |||
37 | +INSN_LASX(xvsigncov_b, vvv) | ||
38 | +INSN_LASX(xvsigncov_h, vvv) | ||
39 | +INSN_LASX(xvsigncov_w, vvv) | ||
40 | +INSN_LASX(xvsigncov_d, vvv) | ||
41 | + | ||
42 | INSN_LASX(xvreplgr2vr_b, vr) | ||
43 | INSN_LASX(xvreplgr2vr_h, vr) | ||
44 | INSN_LASX(xvreplgr2vr_w, vr) | ||
45 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
48 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
49 | @@ -XXX,XX +XXX,XX @@ TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov) | ||
50 | TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov) | ||
51 | TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov) | ||
52 | TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov) | ||
53 | +TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov) | ||
54 | +TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov) | ||
55 | +TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov) | ||
56 | +TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov) | ||
57 | |||
58 | TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b) | ||
59 | TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h) | ||
60 | -- | ||
61 | 2.39.1 | diff view generated by jsdifflib |
1 | From: Jiaxun Yang <jiaxun.yang@flygoat.com> | 1 | This patch includes: |
---|---|---|---|
2 | - XVMSKLTZ.{B/H/W/D}; | ||
3 | - XVMSKGEZ.B; | ||
4 | - XVMSKNZ.B. | ||
2 | 5 | ||
3 | As per "Loongson 3A5000/3B5000 Processor Reference Manual", | 6 | Signed-off-by: Song Gao <gaosong@loongson.cn> |
4 | Loongson 3A5000's IPI implementation have 4 mailboxes per | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | core. | 8 | Message-Id: <20230914022645.1151356-33-gaosong@loongson.cn> |
9 | --- | ||
10 | target/loongarch/insns.decode | 7 ++ | ||
11 | target/loongarch/disas.c | 7 ++ | ||
12 | target/loongarch/vec_helper.c | 78 ++++++++++++++------- | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 6 ++ | ||
14 | 4 files changed, 74 insertions(+), 24 deletions(-) | ||
6 | 15 | ||
7 | However, in 78464f023b54 ("hw/loongarch/virt: Modify ipi as | 16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode |
8 | percpu device"), the number of IPI mailboxes was reduced to | 17 | index XXXXXXX..XXXXXXX 100644 |
9 | one, which mismatches actual hardware. | 18 | --- a/target/loongarch/insns.decode |
10 | 19 | +++ b/target/loongarch/insns.decode | |
11 | It won't affect LoongArch based system as LoongArch boot code | 20 | @@ -XXX,XX +XXX,XX @@ xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv |
12 | only uses the first mailbox, however MIPS based Loongson boot | 21 | xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv |
13 | code uses all 4 mailboxes. | 22 | xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv |
14 | 23 | ||
15 | Fixes Coverity CID: 1512452, 1512453 | 24 | +xvmskltz_b 0111 01101001 11000 10000 ..... ..... @vv |
16 | Fixes: 78464f023b54 ("hw/loongarch/virt: Modify ipi as percpu device") | 25 | +xvmskltz_h 0111 01101001 11000 10001 ..... ..... @vv |
17 | Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> | 26 | +xvmskltz_w 0111 01101001 11000 10010 ..... ..... @vv |
18 | Reviewed-by: Song Gao <gaosong@loongson.cn> | 27 | +xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv |
19 | Message-Id: <20230521102307.87081-2-jiaxun.yang@flygoat.com> | 28 | +xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv |
20 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 29 | +xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv |
21 | --- | ||
22 | hw/intc/loongarch_ipi.c | 6 +++--- | ||
23 | include/hw/intc/loongarch_ipi.h | 4 +++- | ||
24 | 2 files changed, 6 insertions(+), 4 deletions(-) | ||
25 | |||
26 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/hw/intc/loongarch_ipi.c | ||
29 | +++ b/hw/intc/loongarch_ipi.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_init(Object *obj) | ||
31 | |||
32 | static const VMStateDescription vmstate_ipi_core = { | ||
33 | .name = "ipi-single", | ||
34 | - .version_id = 1, | ||
35 | - .minimum_version_id = 1, | ||
36 | + .version_id = 2, | ||
37 | + .minimum_version_id = 2, | ||
38 | .fields = (VMStateField[]) { | ||
39 | VMSTATE_UINT32(status, IPICore), | ||
40 | VMSTATE_UINT32(en, IPICore), | ||
41 | VMSTATE_UINT32(set, IPICore), | ||
42 | VMSTATE_UINT32(clear, IPICore), | ||
43 | - VMSTATE_UINT32_ARRAY(buf, IPICore, 2), | ||
44 | + VMSTATE_UINT32_ARRAY(buf, IPICore, IPI_MBX_NUM * 2), | ||
45 | VMSTATE_END_OF_LIST() | ||
46 | } | ||
47 | }; | ||
48 | diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/include/hw/intc/loongarch_ipi.h | ||
51 | +++ b/include/hw/intc/loongarch_ipi.h | ||
52 | @@ -XXX,XX +XXX,XX @@ | ||
53 | #define MAIL_SEND_OFFSET 0 | ||
54 | #define ANY_SEND_OFFSET (IOCSR_ANY_SEND - IOCSR_MAIL_SEND) | ||
55 | |||
56 | +#define IPI_MBX_NUM 4 | ||
57 | + | 30 | + |
58 | #define TYPE_LOONGARCH_IPI "loongarch_ipi" | 31 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr |
59 | OBJECT_DECLARE_SIMPLE_TYPE(LoongArchIPI, LOONGARCH_IPI) | 32 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr |
60 | 33 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | |
61 | @@ -XXX,XX +XXX,XX @@ typedef struct IPICore { | 34 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c |
62 | uint32_t set; | 35 | index XXXXXXX..XXXXXXX 100644 |
63 | uint32_t clear; | 36 | --- a/target/loongarch/disas.c |
64 | /* 64bit buf divide into 2 32bit buf */ | 37 | +++ b/target/loongarch/disas.c |
65 | - uint32_t buf[2]; | 38 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsigncov_h, vvv) |
66 | + uint32_t buf[IPI_MBX_NUM * 2]; | 39 | INSN_LASX(xvsigncov_w, vvv) |
67 | qemu_irq irq; | 40 | INSN_LASX(xvsigncov_d, vvv) |
68 | } IPICore; | 41 | |
42 | +INSN_LASX(xvmskltz_b, vv) | ||
43 | +INSN_LASX(xvmskltz_h, vv) | ||
44 | +INSN_LASX(xvmskltz_w, vv) | ||
45 | +INSN_LASX(xvmskltz_d, vv) | ||
46 | +INSN_LASX(xvmskgez_b, vv) | ||
47 | +INSN_LASX(xvmsknz_b, vv) | ||
48 | + | ||
49 | INSN_LASX(xvreplgr2vr_b, vr) | ||
50 | INSN_LASX(xvreplgr2vr_h, vr) | ||
51 | INSN_LASX(xvreplgr2vr_w, vr) | ||
52 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/loongarch/vec_helper.c | ||
55 | +++ b/target/loongarch/vec_helper.c | ||
56 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_b(int64_t val) | ||
57 | |||
58 | void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) | ||
59 | { | ||
60 | + int i; | ||
61 | uint16_t temp = 0; | ||
62 | VReg *Vd = (VReg *)vd; | ||
63 | VReg *Vj = (VReg *)vj; | ||
64 | + int oprsz = simd_oprsz(desc); | ||
65 | |||
66 | - temp = do_vmskltz_b(Vj->D(0)); | ||
67 | - temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
68 | - Vd->D(0) = temp; | ||
69 | - Vd->D(1) = 0; | ||
70 | + for (i = 0; i < oprsz / 16; i++) { | ||
71 | + temp = 0; | ||
72 | + temp = do_vmskltz_b(Vj->D(2 * i)); | ||
73 | + temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); | ||
74 | + Vd->D(2 * i) = temp; | ||
75 | + Vd->D(2 * i + 1) = 0; | ||
76 | + } | ||
77 | } | ||
78 | |||
79 | static uint64_t do_vmskltz_h(int64_t val) | ||
80 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_h(int64_t val) | ||
81 | |||
82 | void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) | ||
83 | { | ||
84 | + int i; | ||
85 | uint16_t temp = 0; | ||
86 | VReg *Vd = (VReg *)vd; | ||
87 | VReg *Vj = (VReg *)vj; | ||
88 | + int oprsz = simd_oprsz(desc); | ||
89 | |||
90 | - temp = do_vmskltz_h(Vj->D(0)); | ||
91 | - temp |= (do_vmskltz_h(Vj->D(1)) << 4); | ||
92 | - Vd->D(0) = temp; | ||
93 | - Vd->D(1) = 0; | ||
94 | + for (i = 0; i < oprsz / 16; i++) { | ||
95 | + temp = 0; | ||
96 | + temp = do_vmskltz_h(Vj->D(2 * i)); | ||
97 | + temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); | ||
98 | + Vd->D(2 * i) = temp; | ||
99 | + Vd->D(2 * i + 1) = 0; | ||
100 | + } | ||
101 | } | ||
102 | |||
103 | static uint64_t do_vmskltz_w(int64_t val) | ||
104 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_w(int64_t val) | ||
105 | |||
106 | void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) | ||
107 | { | ||
108 | + int i; | ||
109 | uint16_t temp = 0; | ||
110 | VReg *Vd = (VReg *)vd; | ||
111 | VReg *Vj = (VReg *)vj; | ||
112 | + int oprsz = simd_oprsz(desc); | ||
113 | |||
114 | - temp = do_vmskltz_w(Vj->D(0)); | ||
115 | - temp |= (do_vmskltz_w(Vj->D(1)) << 2); | ||
116 | - Vd->D(0) = temp; | ||
117 | - Vd->D(1) = 0; | ||
118 | + for (i = 0; i < oprsz / 16; i++) { | ||
119 | + temp = 0; | ||
120 | + temp = do_vmskltz_w(Vj->D(2 * i)); | ||
121 | + temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); | ||
122 | + Vd->D(2 * i) = temp; | ||
123 | + Vd->D(2 * i + 1) = 0; | ||
124 | + } | ||
125 | } | ||
126 | |||
127 | static uint64_t do_vmskltz_d(int64_t val) | ||
128 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskltz_d(int64_t val) | ||
129 | } | ||
130 | void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) | ||
131 | { | ||
132 | + int i; | ||
133 | uint16_t temp = 0; | ||
134 | VReg *Vd = (VReg *)vd; | ||
135 | VReg *Vj = (VReg *)vj; | ||
136 | + int oprsz = simd_oprsz(desc); | ||
137 | |||
138 | - temp = do_vmskltz_d(Vj->D(0)); | ||
139 | - temp |= (do_vmskltz_d(Vj->D(1)) << 1); | ||
140 | - Vd->D(0) = temp; | ||
141 | - Vd->D(1) = 0; | ||
142 | + for (i = 0; i < oprsz / 16; i++) { | ||
143 | + temp = 0; | ||
144 | + temp = do_vmskltz_d(Vj->D(2 * i)); | ||
145 | + temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); | ||
146 | + Vd->D(2 * i) = temp; | ||
147 | + Vd->D(2 * i + 1) = 0; | ||
148 | + } | ||
149 | } | ||
150 | |||
151 | void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) | ||
152 | { | ||
153 | + int i; | ||
154 | uint16_t temp = 0; | ||
155 | VReg *Vd = (VReg *)vd; | ||
156 | VReg *Vj = (VReg *)vj; | ||
157 | + int oprsz = simd_oprsz(desc); | ||
158 | |||
159 | - temp = do_vmskltz_b(Vj->D(0)); | ||
160 | - temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
161 | - Vd->D(0) = (uint16_t)(~temp); | ||
162 | - Vd->D(1) = 0; | ||
163 | + for (i = 0; i < oprsz / 16; i++) { | ||
164 | + temp = 0; | ||
165 | + temp = do_vmskltz_b(Vj->D(2 * i)); | ||
166 | + temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); | ||
167 | + Vd->D(2 * i) = (uint16_t)(~temp); | ||
168 | + Vd->D(2 * i + 1) = 0; | ||
169 | + } | ||
170 | } | ||
171 | |||
172 | static uint64_t do_vmskez_b(uint64_t a) | ||
173 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_vmskez_b(uint64_t a) | ||
174 | |||
175 | void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) | ||
176 | { | ||
177 | + int i; | ||
178 | uint16_t temp = 0; | ||
179 | VReg *Vd = (VReg *)vd; | ||
180 | VReg *Vj = (VReg *)vj; | ||
181 | + int oprsz = simd_oprsz(desc); | ||
182 | |||
183 | - temp = do_vmskez_b(Vj->D(0)); | ||
184 | - temp |= (do_vmskez_b(Vj->D(1)) << 8); | ||
185 | - Vd->D(0) = (uint16_t)(~temp); | ||
186 | - Vd->D(1) = 0; | ||
187 | + for (i = 0; i < oprsz / 16; i++) { | ||
188 | + temp = 0; | ||
189 | + temp = do_vmskez_b(Vj->D(2 * i)); | ||
190 | + temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); | ||
191 | + Vd->D(2 * i) = (uint16_t)(~temp); | ||
192 | + Vd->D(2 * i + 1) = 0; | ||
193 | + } | ||
194 | } | ||
195 | |||
196 | void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
197 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
198 | index XXXXXXX..XXXXXXX 100644 | ||
199 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
200 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
201 | @@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w) | ||
202 | TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d) | ||
203 | TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b) | ||
204 | TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b) | ||
205 | +TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b) | ||
206 | +TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h) | ||
207 | +TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w) | ||
208 | +TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d) | ||
209 | +TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b) | ||
210 | +TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) | ||
211 | |||
212 | #define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0)) | ||
69 | 213 | ||
70 | -- | 214 | -- |
71 | 2.39.1 | 215 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVLDI. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-34-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 2 ++ | ||
9 | target/loongarch/disas.c | 7 +++++++ | ||
10 | target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++------- | ||
11 | 3 files changed, 15 insertions(+), 7 deletions(-) | ||
12 | |||
13 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/insns.decode | ||
16 | +++ b/target/loongarch/insns.decode | ||
17 | @@ -XXX,XX +XXX,XX @@ xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv | ||
18 | xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv | ||
19 | xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv | ||
20 | |||
21 | +xvldi 0111 01111110 00 ............. ..... @v_i13 | ||
22 | + | ||
23 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
24 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
25 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
26 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/loongarch/disas.c | ||
29 | +++ b/target/loongarch/disas.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \ | ||
31 | return true; \ | ||
32 | } | ||
33 | |||
34 | +static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic) | ||
35 | +{ | ||
36 | + output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm); | ||
37 | +} | ||
38 | + | ||
39 | static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic) | ||
40 | { | ||
41 | output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk); | ||
42 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmskltz_d, vv) | ||
43 | INSN_LASX(xvmskgez_b, vv) | ||
44 | INSN_LASX(xvmsknz_b, vv) | ||
45 | |||
46 | +INSN_LASX(xvldi, v_i) | ||
47 | + | ||
48 | INSN_LASX(xvreplgr2vr_b, vr) | ||
49 | INSN_LASX(xvreplgr2vr_h, vr) | ||
50 | INSN_LASX(xvreplgr2vr_w, vr) | ||
51 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
54 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
55 | @@ -XXX,XX +XXX,XX @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) | ||
56 | return data; | ||
57 | } | ||
58 | |||
59 | -static bool trans_vldi(DisasContext *ctx, arg_vldi *a) | ||
60 | +static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) | ||
61 | { | ||
62 | int sel, vece; | ||
63 | uint64_t value; | ||
64 | |||
65 | - if (!avail_LSX(ctx)) { | ||
66 | - return false; | ||
67 | - } | ||
68 | - | ||
69 | - if (!check_vec(ctx, 16)) { | ||
70 | + if (!check_vec(ctx, oprsz)) { | ||
71 | return true; | ||
72 | } | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a) | ||
75 | vece = (a->imm >> 10) & 0x3; | ||
76 | } | ||
77 | |||
78 | - tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, | ||
79 | + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8, | ||
80 | tcg_constant_i64(value)); | ||
81 | return true; | ||
82 | } | ||
83 | |||
84 | +TRANS(vldi, LSX, gen_vldi, 16) | ||
85 | +TRANS(xvldi, LASX, gen_vldi, 32) | ||
86 | + | ||
87 | TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and) | ||
88 | TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or) | ||
89 | TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor) | ||
90 | -- | ||
91 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XV{AND/OR/XOR/NOR/ANDN/ORN}.V; | ||
3 | - XV{AND/OR/XOR/NOR}I.B. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-35-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 12 +++++++ | ||
10 | target/loongarch/disas.c | 12 +++++++ | ||
11 | target/loongarch/vec_helper.c | 4 +-- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 38 ++++++++++++--------- | ||
13 | 4 files changed, 48 insertions(+), 18 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv | ||
20 | |||
21 | xvldi 0111 01111110 00 ............. ..... @v_i13 | ||
22 | |||
23 | +xvand_v 0111 01010010 01100 ..... ..... ..... @vvv | ||
24 | +xvor_v 0111 01010010 01101 ..... ..... ..... @vvv | ||
25 | +xvxor_v 0111 01010010 01110 ..... ..... ..... @vvv | ||
26 | +xvnor_v 0111 01010010 01111 ..... ..... ..... @vvv | ||
27 | +xvandn_v 0111 01010010 10000 ..... ..... ..... @vvv | ||
28 | +xvorn_v 0111 01010010 10001 ..... ..... ..... @vvv | ||
29 | + | ||
30 | +xvandi_b 0111 01111101 00 ........ ..... ..... @vv_ui8 | ||
31 | +xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8 | ||
32 | +xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8 | ||
33 | +xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8 | ||
34 | + | ||
35 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
36 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
37 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
38 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/loongarch/disas.c | ||
41 | +++ b/target/loongarch/disas.c | ||
42 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvmsknz_b, vv) | ||
43 | |||
44 | INSN_LASX(xvldi, v_i) | ||
45 | |||
46 | +INSN_LASX(xvand_v, vvv) | ||
47 | +INSN_LASX(xvor_v, vvv) | ||
48 | +INSN_LASX(xvxor_v, vvv) | ||
49 | +INSN_LASX(xvnor_v, vvv) | ||
50 | +INSN_LASX(xvandn_v, vvv) | ||
51 | +INSN_LASX(xvorn_v, vvv) | ||
52 | + | ||
53 | +INSN_LASX(xvandi_b, vv_i) | ||
54 | +INSN_LASX(xvori_b, vv_i) | ||
55 | +INSN_LASX(xvxori_b, vv_i) | ||
56 | +INSN_LASX(xvnori_b, vv_i) | ||
57 | + | ||
58 | INSN_LASX(xvreplgr2vr_b, vr) | ||
59 | INSN_LASX(xvreplgr2vr_h, vr) | ||
60 | INSN_LASX(xvreplgr2vr_w, vr) | ||
61 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/target/loongarch/vec_helper.c | ||
64 | +++ b/target/loongarch/vec_helper.c | ||
65 | @@ -XXX,XX +XXX,XX @@ void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) | ||
66 | } | ||
67 | } | ||
68 | |||
69 | -void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
70 | +void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
71 | { | ||
72 | int i; | ||
73 | VReg *Vd = (VReg *)vd; | ||
74 | VReg *Vj = (VReg *)vj; | ||
75 | |||
76 | - for (i = 0; i < LSX_LEN/8; i++) { | ||
77 | + for (i = 0; i < simd_oprsz(desc); i++) { | ||
78 | Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); | ||
79 | } | ||
80 | } | ||
81 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
84 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
85 | @@ -XXX,XX +XXX,XX @@ static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) | ||
86 | TRANS(vldi, LSX, gen_vldi, 16) | ||
87 | TRANS(xvldi, LASX, gen_vldi, 32) | ||
88 | |||
89 | -TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and) | ||
90 | -TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or) | ||
91 | -TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor) | ||
92 | -TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor) | ||
93 | - | ||
94 | -static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a) | ||
95 | +static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz) | ||
96 | { | ||
97 | uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
98 | |||
99 | - if (!avail_LSX(ctx)) { | ||
100 | - return false; | ||
101 | - } | ||
102 | - | ||
103 | - if (!check_vec(ctx, 16)) { | ||
104 | + if (!check_vec(ctx, oprsz)) { | ||
105 | return true; | ||
106 | } | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a) | ||
109 | vj_ofs = vec_full_offset(a->vj); | ||
110 | vk_ofs = vec_full_offset(a->vk); | ||
111 | |||
112 | - tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8); | ||
113 | + tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8); | ||
114 | return true; | ||
115 | } | ||
116 | -TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc) | ||
117 | -TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi) | ||
118 | -TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori) | ||
119 | -TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori) | ||
120 | |||
121 | static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
122 | { | ||
123 | @@ -XXX,XX +XXX,XX @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
124 | tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op); | ||
125 | } | ||
126 | |||
127 | +TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and) | ||
128 | +TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or) | ||
129 | +TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor) | ||
130 | +TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor) | ||
131 | +TRANS(vandn_v, LSX, gen_vandn_v, 16) | ||
132 | +TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc) | ||
133 | +TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi) | ||
134 | +TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori) | ||
135 | +TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori) | ||
136 | TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b) | ||
137 | +TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and) | ||
138 | +TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or) | ||
139 | +TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor) | ||
140 | +TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor) | ||
141 | +TRANS(xvandn_v, LASX, gen_vandn_v, 32) | ||
142 | +TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc) | ||
143 | +TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi) | ||
144 | +TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori) | ||
145 | +TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori) | ||
146 | +TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b) | ||
147 | |||
148 | TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv) | ||
149 | TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv) | ||
150 | -- | ||
151 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSLL[I].{B/H/W/D}; | ||
3 | - XVSRL[I].{B/H/W/D}; | ||
4 | - XVSRA[I].{B/H/W/D}; | ||
5 | - XVROTR[I].{B/H/W/D}. | ||
1 | 6 | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230914022645.1151356-36-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/insns.decode | 33 +++++++++++++++++++ | ||
12 | target/loongarch/disas.c | 36 +++++++++++++++++++++ | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++ | ||
14 | 3 files changed, 101 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8 | ||
21 | xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8 | ||
22 | xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8 | ||
23 | |||
24 | +xvsll_b 0111 01001110 10000 ..... ..... ..... @vvv | ||
25 | +xvsll_h 0111 01001110 10001 ..... ..... ..... @vvv | ||
26 | +xvsll_w 0111 01001110 10010 ..... ..... ..... @vvv | ||
27 | +xvsll_d 0111 01001110 10011 ..... ..... ..... @vvv | ||
28 | +xvslli_b 0111 01110010 11000 01 ... ..... ..... @vv_ui3 | ||
29 | +xvslli_h 0111 01110010 11000 1 .... ..... ..... @vv_ui4 | ||
30 | +xvslli_w 0111 01110010 11001 ..... ..... ..... @vv_ui5 | ||
31 | +xvslli_d 0111 01110010 1101 ...... ..... ..... @vv_ui6 | ||
32 | +xvsrl_b 0111 01001110 10100 ..... ..... ..... @vvv | ||
33 | +xvsrl_h 0111 01001110 10101 ..... ..... ..... @vvv | ||
34 | +xvsrl_w 0111 01001110 10110 ..... ..... ..... @vvv | ||
35 | +xvsrl_d 0111 01001110 10111 ..... ..... ..... @vvv | ||
36 | +xvsrli_b 0111 01110011 00000 01 ... ..... ..... @vv_ui3 | ||
37 | +xvsrli_h 0111 01110011 00000 1 .... ..... ..... @vv_ui4 | ||
38 | +xvsrli_w 0111 01110011 00001 ..... ..... ..... @vv_ui5 | ||
39 | +xvsrli_d 0111 01110011 0001 ...... ..... ..... @vv_ui6 | ||
40 | +xvsra_b 0111 01001110 11000 ..... ..... ..... @vvv | ||
41 | +xvsra_h 0111 01001110 11001 ..... ..... ..... @vvv | ||
42 | +xvsra_w 0111 01001110 11010 ..... ..... ..... @vvv | ||
43 | +xvsra_d 0111 01001110 11011 ..... ..... ..... @vvv | ||
44 | +xvsrai_b 0111 01110011 01000 01 ... ..... ..... @vv_ui3 | ||
45 | +xvsrai_h 0111 01110011 01000 1 .... ..... ..... @vv_ui4 | ||
46 | +xvsrai_w 0111 01110011 01001 ..... ..... ..... @vv_ui5 | ||
47 | +xvsrai_d 0111 01110011 0101 ...... ..... ..... @vv_ui6 | ||
48 | +xvrotr_b 0111 01001110 11100 ..... ..... ..... @vvv | ||
49 | +xvrotr_h 0111 01001110 11101 ..... ..... ..... @vvv | ||
50 | +xvrotr_w 0111 01001110 11110 ..... ..... ..... @vvv | ||
51 | +xvrotr_d 0111 01001110 11111 ..... ..... ..... @vvv | ||
52 | +xvrotri_b 0111 01101010 00000 01 ... ..... ..... @vv_ui3 | ||
53 | +xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4 | ||
54 | +xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5 | ||
55 | +xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6 | ||
56 | + | ||
57 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
58 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
59 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
60 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/disas.c | ||
63 | +++ b/target/loongarch/disas.c | ||
64 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvori_b, vv_i) | ||
65 | INSN_LASX(xvxori_b, vv_i) | ||
66 | INSN_LASX(xvnori_b, vv_i) | ||
67 | |||
68 | +INSN_LASX(xvsll_b, vvv) | ||
69 | +INSN_LASX(xvsll_h, vvv) | ||
70 | +INSN_LASX(xvsll_w, vvv) | ||
71 | +INSN_LASX(xvsll_d, vvv) | ||
72 | +INSN_LASX(xvslli_b, vv_i) | ||
73 | +INSN_LASX(xvslli_h, vv_i) | ||
74 | +INSN_LASX(xvslli_w, vv_i) | ||
75 | +INSN_LASX(xvslli_d, vv_i) | ||
76 | + | ||
77 | +INSN_LASX(xvsrl_b, vvv) | ||
78 | +INSN_LASX(xvsrl_h, vvv) | ||
79 | +INSN_LASX(xvsrl_w, vvv) | ||
80 | +INSN_LASX(xvsrl_d, vvv) | ||
81 | +INSN_LASX(xvsrli_b, vv_i) | ||
82 | +INSN_LASX(xvsrli_h, vv_i) | ||
83 | +INSN_LASX(xvsrli_w, vv_i) | ||
84 | +INSN_LASX(xvsrli_d, vv_i) | ||
85 | + | ||
86 | +INSN_LASX(xvsra_b, vvv) | ||
87 | +INSN_LASX(xvsra_h, vvv) | ||
88 | +INSN_LASX(xvsra_w, vvv) | ||
89 | +INSN_LASX(xvsra_d, vvv) | ||
90 | +INSN_LASX(xvsrai_b, vv_i) | ||
91 | +INSN_LASX(xvsrai_h, vv_i) | ||
92 | +INSN_LASX(xvsrai_w, vv_i) | ||
93 | +INSN_LASX(xvsrai_d, vv_i) | ||
94 | + | ||
95 | +INSN_LASX(xvrotr_b, vvv) | ||
96 | +INSN_LASX(xvrotr_h, vvv) | ||
97 | +INSN_LASX(xvrotr_w, vvv) | ||
98 | +INSN_LASX(xvrotr_d, vvv) | ||
99 | +INSN_LASX(xvrotri_b, vv_i) | ||
100 | +INSN_LASX(xvrotri_h, vv_i) | ||
101 | +INSN_LASX(xvrotri_w, vv_i) | ||
102 | +INSN_LASX(xvrotri_d, vv_i) | ||
103 | + | ||
104 | INSN_LASX(xvreplgr2vr_b, vr) | ||
105 | INSN_LASX(xvreplgr2vr_h, vr) | ||
106 | INSN_LASX(xvreplgr2vr_w, vr) | ||
107 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
110 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
111 | @@ -XXX,XX +XXX,XX @@ TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli) | ||
112 | TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli) | ||
113 | TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli) | ||
114 | TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli) | ||
115 | +TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv) | ||
116 | +TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv) | ||
117 | +TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv) | ||
118 | +TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv) | ||
119 | +TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli) | ||
120 | +TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli) | ||
121 | +TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli) | ||
122 | +TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli) | ||
123 | |||
124 | TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv) | ||
125 | TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv) | ||
126 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri) | ||
127 | TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri) | ||
128 | TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri) | ||
129 | TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri) | ||
130 | +TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv) | ||
131 | +TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv) | ||
132 | +TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv) | ||
133 | +TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv) | ||
134 | +TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri) | ||
135 | +TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri) | ||
136 | +TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri) | ||
137 | +TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri) | ||
138 | |||
139 | TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv) | ||
140 | TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv) | ||
141 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari) | ||
142 | TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari) | ||
143 | TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari) | ||
144 | TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari) | ||
145 | +TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv) | ||
146 | +TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv) | ||
147 | +TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv) | ||
148 | +TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv) | ||
149 | +TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari) | ||
150 | +TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari) | ||
151 | +TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari) | ||
152 | +TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari) | ||
153 | |||
154 | TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv) | ||
155 | TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv) | ||
156 | @@ -XXX,XX +XXX,XX @@ TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri) | ||
157 | TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri) | ||
158 | TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri) | ||
159 | TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri) | ||
160 | +TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv) | ||
161 | +TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv) | ||
162 | +TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv) | ||
163 | +TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv) | ||
164 | +TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri) | ||
165 | +TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri) | ||
166 | +TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri) | ||
167 | +TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri) | ||
168 | |||
169 | TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b) | ||
170 | TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h) | ||
171 | -- | ||
172 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSLLWIL.{H.B/W.H/D.W}; | ||
3 | - XVSLLWIL.{HU.BU/WU.HU/DU.WU}; | ||
4 | - XVEXTL.Q.D, VEXTL.QU.DU. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-37-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 9 +++++ | ||
11 | target/loongarch/disas.c | 9 +++++ | ||
12 | target/loongarch/vec_helper.c | 45 +++++++++++++-------- | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 21 ++++++++-- | ||
14 | 4 files changed, 63 insertions(+), 21 deletions(-) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4 | ||
21 | xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5 | ||
22 | xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6 | ||
23 | |||
24 | +xvsllwil_h_b 0111 01110000 10000 01 ... ..... ..... @vv_ui3 | ||
25 | +xvsllwil_w_h 0111 01110000 10000 1 .... ..... ..... @vv_ui4 | ||
26 | +xvsllwil_d_w 0111 01110000 10001 ..... ..... ..... @vv_ui5 | ||
27 | +xvextl_q_d 0111 01110000 10010 00000 ..... ..... @vv | ||
28 | +xvsllwil_hu_bu 0111 01110000 11000 01 ... ..... ..... @vv_ui3 | ||
29 | +xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4 | ||
30 | +xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5 | ||
31 | +xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv | ||
32 | + | ||
33 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
34 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
35 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
36 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/loongarch/disas.c | ||
39 | +++ b/target/loongarch/disas.c | ||
40 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvrotri_h, vv_i) | ||
41 | INSN_LASX(xvrotri_w, vv_i) | ||
42 | INSN_LASX(xvrotri_d, vv_i) | ||
43 | |||
44 | +INSN_LASX(xvsllwil_h_b, vv_i) | ||
45 | +INSN_LASX(xvsllwil_w_h, vv_i) | ||
46 | +INSN_LASX(xvsllwil_d_w, vv_i) | ||
47 | +INSN_LASX(xvextl_q_d, vv) | ||
48 | +INSN_LASX(xvsllwil_hu_bu, vv_i) | ||
49 | +INSN_LASX(xvsllwil_wu_hu, vv_i) | ||
50 | +INSN_LASX(xvsllwil_du_wu, vv_i) | ||
51 | +INSN_LASX(xvextl_qu_du, vv) | ||
52 | + | ||
53 | INSN_LASX(xvreplgr2vr_b, vr) | ||
54 | INSN_LASX(xvreplgr2vr_h, vr) | ||
55 | INSN_LASX(xvreplgr2vr_w, vr) | ||
56 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/loongarch/vec_helper.c | ||
59 | +++ b/target/loongarch/vec_helper.c | ||
60 | @@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
61 | } | ||
62 | } | ||
63 | |||
64 | -#define VSLLWIL(NAME, BIT, E1, E2) \ | ||
65 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
66 | -{ \ | ||
67 | - int i; \ | ||
68 | - VReg temp; \ | ||
69 | - VReg *Vd = (VReg *)vd; \ | ||
70 | - VReg *Vj = (VReg *)vj; \ | ||
71 | - typedef __typeof(temp.E1(0)) TD; \ | ||
72 | - \ | ||
73 | - temp.D(0) = 0; \ | ||
74 | - temp.D(1) = 0; \ | ||
75 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
76 | - temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ | ||
77 | - } \ | ||
78 | - *Vd = temp; \ | ||
79 | +#define VSLLWIL(NAME, BIT, E1, E2) \ | ||
80 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
81 | +{ \ | ||
82 | + int i, j, ofs; \ | ||
83 | + VReg temp = {}; \ | ||
84 | + VReg *Vd = (VReg *)vd; \ | ||
85 | + VReg *Vj = (VReg *)vj; \ | ||
86 | + int oprsz = simd_oprsz(desc); \ | ||
87 | + typedef __typeof(temp.E1(0)) TD; \ | ||
88 | + \ | ||
89 | + ofs = LSX_LEN / BIT; \ | ||
90 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
91 | + for (j = 0; j < ofs; j++) { \ | ||
92 | + temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \ | ||
93 | + } \ | ||
94 | + } \ | ||
95 | + *Vd = temp; \ | ||
96 | } | ||
97 | |||
98 | + | ||
99 | void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) | ||
100 | { | ||
101 | + int i; | ||
102 | VReg *Vd = (VReg *)vd; | ||
103 | VReg *Vj = (VReg *)vj; | ||
104 | + int oprsz = simd_oprsz(desc); | ||
105 | |||
106 | - Vd->Q(0) = int128_makes64(Vj->D(0)); | ||
107 | + for (i = 0; i < oprsz / 16; i++) { | ||
108 | + Vd->Q(i) = int128_makes64(Vj->D(2 * i)); | ||
109 | + } | ||
110 | } | ||
111 | |||
112 | void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) | ||
113 | { | ||
114 | + int i; | ||
115 | VReg *Vd = (VReg *)vd; | ||
116 | VReg *Vj = (VReg *)vj; | ||
117 | + int oprsz = simd_oprsz(desc); | ||
118 | |||
119 | - Vd->Q(0) = int128_make64(Vj->D(0)); | ||
120 | + for (i = 0; i < oprsz / 16; i++) { | ||
121 | + Vd->Q(i) = int128_make64(Vj->UD(2 * i)); | ||
122 | + } | ||
123 | } | ||
124 | |||
125 | VSLLWIL(vsllwil_h_b, 16, H, B) | ||
126 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
127 | index XXXXXXX..XXXXXXX 100644 | ||
128 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
129 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
130 | @@ -XXX,XX +XXX,XX @@ static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) | ||
131 | static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, | ||
132 | gen_helper_gvec_2i *fn) | ||
133 | { | ||
134 | + if (!check_vec(ctx, oprsz)) { | ||
135 | + return true; | ||
136 | + } | ||
137 | + | ||
138 | tcg_gen_gvec_2i_ool(vec_full_offset(a->vd), | ||
139 | vec_full_offset(a->vj), | ||
140 | tcg_constant_i64(a->imm), | ||
141 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, | ||
142 | |||
143 | static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) | ||
144 | { | ||
145 | - if (!check_vec(ctx, 16)) { | ||
146 | - return true; | ||
147 | - } | ||
148 | - | ||
149 | return gen_vv_i_vl(ctx, a, 16, fn); | ||
150 | } | ||
151 | |||
152 | +static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) | ||
153 | +{ | ||
154 | + return gen_vv_i_vl(ctx, a, 32, fn); | ||
155 | +} | ||
156 | + | ||
157 | static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
158 | void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
159 | { | ||
160 | @@ -XXX,XX +XXX,XX @@ TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu) | ||
161 | TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu) | ||
162 | TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu) | ||
163 | TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du) | ||
164 | +TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b) | ||
165 | +TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h) | ||
166 | +TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w) | ||
167 | +TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d) | ||
168 | +TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu) | ||
169 | +TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu) | ||
170 | +TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu) | ||
171 | +TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du) | ||
172 | |||
173 | TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b) | ||
174 | TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h) | ||
175 | -- | ||
176 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSRLR[I].{B/H/W/D}; | ||
3 | - XVSRAR[I].{B/H/W/D}. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-38-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 17 +++++++++++++++++ | ||
10 | target/loongarch/disas.c | 18 ++++++++++++++++++ | ||
11 | target/loongarch/vec_helper.c | 12 ++++++++---- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++ | ||
13 | 4 files changed, 59 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4 | ||
20 | xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5 | ||
21 | xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv | ||
22 | |||
23 | +xvsrlr_b 0111 01001111 00000 ..... ..... ..... @vvv | ||
24 | +xvsrlr_h 0111 01001111 00001 ..... ..... ..... @vvv | ||
25 | +xvsrlr_w 0111 01001111 00010 ..... ..... ..... @vvv | ||
26 | +xvsrlr_d 0111 01001111 00011 ..... ..... ..... @vvv | ||
27 | +xvsrlri_b 0111 01101010 01000 01 ... ..... ..... @vv_ui3 | ||
28 | +xvsrlri_h 0111 01101010 01000 1 .... ..... ..... @vv_ui4 | ||
29 | +xvsrlri_w 0111 01101010 01001 ..... ..... ..... @vv_ui5 | ||
30 | +xvsrlri_d 0111 01101010 0101 ...... ..... ..... @vv_ui6 | ||
31 | +xvsrar_b 0111 01001111 00100 ..... ..... ..... @vvv | ||
32 | +xvsrar_h 0111 01001111 00101 ..... ..... ..... @vvv | ||
33 | +xvsrar_w 0111 01001111 00110 ..... ..... ..... @vvv | ||
34 | +xvsrar_d 0111 01001111 00111 ..... ..... ..... @vvv | ||
35 | +xvsrari_b 0111 01101010 10000 01 ... ..... ..... @vv_ui3 | ||
36 | +xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4 | ||
37 | +xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5 | ||
38 | +xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6 | ||
39 | + | ||
40 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
41 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
42 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
43 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/loongarch/disas.c | ||
46 | +++ b/target/loongarch/disas.c | ||
47 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsllwil_wu_hu, vv_i) | ||
48 | INSN_LASX(xvsllwil_du_wu, vv_i) | ||
49 | INSN_LASX(xvextl_qu_du, vv) | ||
50 | |||
51 | +INSN_LASX(xvsrlr_b, vvv) | ||
52 | +INSN_LASX(xvsrlr_h, vvv) | ||
53 | +INSN_LASX(xvsrlr_w, vvv) | ||
54 | +INSN_LASX(xvsrlr_d, vvv) | ||
55 | +INSN_LASX(xvsrlri_b, vv_i) | ||
56 | +INSN_LASX(xvsrlri_h, vv_i) | ||
57 | +INSN_LASX(xvsrlri_w, vv_i) | ||
58 | +INSN_LASX(xvsrlri_d, vv_i) | ||
59 | + | ||
60 | +INSN_LASX(xvsrar_b, vvv) | ||
61 | +INSN_LASX(xvsrar_h, vvv) | ||
62 | +INSN_LASX(xvsrar_w, vvv) | ||
63 | +INSN_LASX(xvsrar_d, vvv) | ||
64 | +INSN_LASX(xvsrari_b, vv_i) | ||
65 | +INSN_LASX(xvsrari_h, vv_i) | ||
66 | +INSN_LASX(xvsrari_w, vv_i) | ||
67 | +INSN_LASX(xvsrari_d, vv_i) | ||
68 | + | ||
69 | INSN_LASX(xvreplgr2vr_b, vr) | ||
70 | INSN_LASX(xvreplgr2vr_h, vr) | ||
71 | INSN_LASX(xvreplgr2vr_w, vr) | ||
72 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/loongarch/vec_helper.c | ||
75 | +++ b/target/loongarch/vec_helper.c | ||
76 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
77 | VReg *Vd = (VReg *)vd; \ | ||
78 | VReg *Vj = (VReg *)vj; \ | ||
79 | VReg *Vk = (VReg *)vk; \ | ||
80 | + int oprsz = simd_oprsz(desc); \ | ||
81 | \ | ||
82 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
83 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
84 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
85 | } \ | ||
86 | } | ||
87 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
88 | int i; \ | ||
89 | VReg *Vd = (VReg *)vd; \ | ||
90 | VReg *Vj = (VReg *)vj; \ | ||
91 | + int oprsz = simd_oprsz(desc); \ | ||
92 | \ | ||
93 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
94 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
95 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ | ||
96 | } \ | ||
97 | } | ||
98 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
99 | VReg *Vd = (VReg *)vd; \ | ||
100 | VReg *Vj = (VReg *)vj; \ | ||
101 | VReg *Vk = (VReg *)vk; \ | ||
102 | + int oprsz = simd_oprsz(desc); \ | ||
103 | \ | ||
104 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
105 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
106 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
107 | } \ | ||
108 | } | ||
109 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
110 | int i; \ | ||
111 | VReg *Vd = (VReg *)vd; \ | ||
112 | VReg *Vj = (VReg *)vj; \ | ||
113 | + int oprsz = simd_oprsz(desc); \ | ||
114 | \ | ||
115 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
116 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
117 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ | ||
118 | } \ | ||
119 | } | ||
120 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
123 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
124 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b) | ||
125 | TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h) | ||
126 | TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w) | ||
127 | TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d) | ||
128 | +TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b) | ||
129 | +TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h) | ||
130 | +TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w) | ||
131 | +TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d) | ||
132 | +TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b) | ||
133 | +TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h) | ||
134 | +TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w) | ||
135 | +TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d) | ||
136 | |||
137 | TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b) | ||
138 | TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h) | ||
139 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b) | ||
140 | TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h) | ||
141 | TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w) | ||
142 | TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d) | ||
143 | +TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b) | ||
144 | +TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h) | ||
145 | +TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w) | ||
146 | +TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d) | ||
147 | +TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b) | ||
148 | +TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h) | ||
149 | +TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w) | ||
150 | +TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d) | ||
151 | |||
152 | TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h) | ||
153 | TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w) | ||
154 | -- | ||
155 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSRLN.{B.H/H.W/W.D}; | ||
3 | - XVSRAN.{B.H/H.W/W.D}; | ||
4 | - XVSRLNI.{B.H/H.W/W.D/D.Q}; | ||
5 | - XVSRANI.{B.H/H.W/W.D/D.Q}. | ||
1 | 6 | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230914022645.1151356-39-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/insns.decode | 16 ++ | ||
12 | target/loongarch/disas.c | 16 ++ | ||
13 | target/loongarch/vec_helper.c | 166 +++++++++++--------- | ||
14 | target/loongarch/insn_trans/trans_vec.c.inc | 14 ++ | ||
15 | 4 files changed, 137 insertions(+), 75 deletions(-) | ||
16 | |||
17 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/insns.decode | ||
20 | +++ b/target/loongarch/insns.decode | ||
21 | @@ -XXX,XX +XXX,XX @@ xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4 | ||
22 | xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5 | ||
23 | xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6 | ||
24 | |||
25 | +xvsrln_b_h 0111 01001111 01001 ..... ..... ..... @vvv | ||
26 | +xvsrln_h_w 0111 01001111 01010 ..... ..... ..... @vvv | ||
27 | +xvsrln_w_d 0111 01001111 01011 ..... ..... ..... @vvv | ||
28 | +xvsran_b_h 0111 01001111 01101 ..... ..... ..... @vvv | ||
29 | +xvsran_h_w 0111 01001111 01110 ..... ..... ..... @vvv | ||
30 | +xvsran_w_d 0111 01001111 01111 ..... ..... ..... @vvv | ||
31 | + | ||
32 | +xvsrlni_b_h 0111 01110100 00000 1 .... ..... ..... @vv_ui4 | ||
33 | +xvsrlni_h_w 0111 01110100 00001 ..... ..... ..... @vv_ui5 | ||
34 | +xvsrlni_w_d 0111 01110100 0001 ...... ..... ..... @vv_ui6 | ||
35 | +xvsrlni_d_q 0111 01110100 001 ....... ..... ..... @vv_ui7 | ||
36 | +xvsrani_b_h 0111 01110101 10000 1 .... ..... ..... @vv_ui4 | ||
37 | +xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5 | ||
38 | +xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6 | ||
39 | +xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7 | ||
40 | + | ||
41 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
42 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
43 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
44 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/loongarch/disas.c | ||
47 | +++ b/target/loongarch/disas.c | ||
48 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrari_h, vv_i) | ||
49 | INSN_LASX(xvsrari_w, vv_i) | ||
50 | INSN_LASX(xvsrari_d, vv_i) | ||
51 | |||
52 | +INSN_LASX(xvsrln_b_h, vvv) | ||
53 | +INSN_LASX(xvsrln_h_w, vvv) | ||
54 | +INSN_LASX(xvsrln_w_d, vvv) | ||
55 | +INSN_LASX(xvsran_b_h, vvv) | ||
56 | +INSN_LASX(xvsran_h_w, vvv) | ||
57 | +INSN_LASX(xvsran_w_d, vvv) | ||
58 | + | ||
59 | +INSN_LASX(xvsrlni_b_h, vv_i) | ||
60 | +INSN_LASX(xvsrlni_h_w, vv_i) | ||
61 | +INSN_LASX(xvsrlni_w_d, vv_i) | ||
62 | +INSN_LASX(xvsrlni_d_q, vv_i) | ||
63 | +INSN_LASX(xvsrani_b_h, vv_i) | ||
64 | +INSN_LASX(xvsrani_h_w, vv_i) | ||
65 | +INSN_LASX(xvsrani_w_d, vv_i) | ||
66 | +INSN_LASX(xvsrani_d_q, vv_i) | ||
67 | + | ||
68 | INSN_LASX(xvreplgr2vr_b, vr) | ||
69 | INSN_LASX(xvreplgr2vr_h, vr) | ||
70 | INSN_LASX(xvreplgr2vr_w, vr) | ||
71 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/loongarch/vec_helper.c | ||
74 | +++ b/target/loongarch/vec_helper.c | ||
75 | @@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_d, 64, D) | ||
76 | |||
77 | #define R_SHIFT(a, b) (a >> b) | ||
78 | |||
79 | -#define VSRLN(NAME, BIT, T, E1, E2) \ | ||
80 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
81 | -{ \ | ||
82 | - int i; \ | ||
83 | - VReg *Vd = (VReg *)vd; \ | ||
84 | - VReg *Vj = (VReg *)vj; \ | ||
85 | - VReg *Vk = (VReg *)vk; \ | ||
86 | - \ | ||
87 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
88 | - Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ | ||
89 | - } \ | ||
90 | - Vd->D(1) = 0; \ | ||
91 | +#define VSRLN(NAME, BIT, E1, E2) \ | ||
92 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
93 | +{ \ | ||
94 | + int i, j, ofs; \ | ||
95 | + VReg *Vd = (VReg *)vd; \ | ||
96 | + VReg *Vj = (VReg *)vj; \ | ||
97 | + VReg *Vk = (VReg *)vk; \ | ||
98 | + int oprsz = simd_oprsz(desc); \ | ||
99 | + \ | ||
100 | + ofs = LSX_LEN / BIT; \ | ||
101 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
102 | + for (j = 0; j < ofs; j++) { \ | ||
103 | + Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ | ||
104 | + Vk->E2(j + ofs * i) % BIT); \ | ||
105 | + } \ | ||
106 | + Vd->D(2 * i + 1) = 0; \ | ||
107 | + } \ | ||
108 | } | ||
109 | |||
110 | -VSRLN(vsrln_b_h, 16, uint16_t, B, H) | ||
111 | -VSRLN(vsrln_h_w, 32, uint32_t, H, W) | ||
112 | -VSRLN(vsrln_w_d, 64, uint64_t, W, D) | ||
113 | +VSRLN(vsrln_b_h, 16, B, UH) | ||
114 | +VSRLN(vsrln_h_w, 32, H, UW) | ||
115 | +VSRLN(vsrln_w_d, 64, W, UD) | ||
116 | |||
117 | -#define VSRAN(NAME, BIT, T, E1, E2) \ | ||
118 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
119 | -{ \ | ||
120 | - int i; \ | ||
121 | - VReg *Vd = (VReg *)vd; \ | ||
122 | - VReg *Vj = (VReg *)vj; \ | ||
123 | - VReg *Vk = (VReg *)vk; \ | ||
124 | - \ | ||
125 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
126 | - Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ | ||
127 | - } \ | ||
128 | - Vd->D(1) = 0; \ | ||
129 | +#define VSRAN(NAME, BIT, E1, E2, E3) \ | ||
130 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
131 | +{ \ | ||
132 | + int i, j, ofs; \ | ||
133 | + VReg *Vd = (VReg *)vd; \ | ||
134 | + VReg *Vj = (VReg *)vj; \ | ||
135 | + VReg *Vk = (VReg *)vk; \ | ||
136 | + int oprsz = simd_oprsz(desc); \ | ||
137 | + \ | ||
138 | + ofs = LSX_LEN / BIT; \ | ||
139 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
140 | + for (j = 0; j < ofs; j++) { \ | ||
141 | + Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ | ||
142 | + Vk->E3(j + ofs * i) % BIT); \ | ||
143 | + } \ | ||
144 | + Vd->D(2 * i + 1) = 0; \ | ||
145 | + } \ | ||
146 | } | ||
147 | |||
148 | -VSRAN(vsran_b_h, 16, uint16_t, B, H) | ||
149 | -VSRAN(vsran_h_w, 32, uint32_t, H, W) | ||
150 | -VSRAN(vsran_w_d, 64, uint64_t, W, D) | ||
151 | +VSRAN(vsran_b_h, 16, B, H, UH) | ||
152 | +VSRAN(vsran_h_w, 32, H, W, UW) | ||
153 | +VSRAN(vsran_w_d, 64, W, D, UD) | ||
154 | |||
155 | -#define VSRLNI(NAME, BIT, T, E1, E2) \ | ||
156 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
157 | -{ \ | ||
158 | - int i, max; \ | ||
159 | - VReg temp; \ | ||
160 | - VReg *Vd = (VReg *)vd; \ | ||
161 | - VReg *Vj = (VReg *)vj; \ | ||
162 | - \ | ||
163 | - temp.D(0) = 0; \ | ||
164 | - temp.D(1) = 0; \ | ||
165 | - max = LSX_LEN/BIT; \ | ||
166 | - for (i = 0; i < max; i++) { \ | ||
167 | - temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ | ||
168 | - temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ | ||
169 | - } \ | ||
170 | - *Vd = temp; \ | ||
171 | +#define VSRLNI(NAME, BIT, E1, E2) \ | ||
172 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
173 | +{ \ | ||
174 | + int i, j, ofs; \ | ||
175 | + VReg temp = {}; \ | ||
176 | + VReg *Vd = (VReg *)vd; \ | ||
177 | + VReg *Vj = (VReg *)vj; \ | ||
178 | + int oprsz = simd_oprsz(desc); \ | ||
179 | + \ | ||
180 | + ofs = LSX_LEN / BIT; \ | ||
181 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
182 | + for (j = 0; j < ofs; j++) { \ | ||
183 | + temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ | ||
184 | + temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ | ||
185 | + imm); \ | ||
186 | + } \ | ||
187 | + } \ | ||
188 | + *Vd = temp; \ | ||
189 | } | ||
190 | |||
191 | void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
192 | { | ||
193 | - VReg temp; | ||
194 | + int i; | ||
195 | + VReg temp = {}; | ||
196 | VReg *Vd = (VReg *)vd; | ||
197 | VReg *Vj = (VReg *)vj; | ||
198 | |||
199 | - temp.D(0) = 0; | ||
200 | - temp.D(1) = 0; | ||
201 | - temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128)); | ||
202 | - temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128)); | ||
203 | + for (i = 0; i < 2; i++) { | ||
204 | + temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128)); | ||
205 | + temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128)); | ||
206 | + } | ||
207 | *Vd = temp; | ||
208 | } | ||
209 | |||
210 | -VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) | ||
211 | -VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) | ||
212 | -VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) | ||
213 | +VSRLNI(vsrlni_b_h, 16, B, UH) | ||
214 | +VSRLNI(vsrlni_h_w, 32, H, UW) | ||
215 | +VSRLNI(vsrlni_w_d, 64, W, UD) | ||
216 | |||
217 | -#define VSRANI(NAME, BIT, E1, E2) \ | ||
218 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
219 | -{ \ | ||
220 | - int i, max; \ | ||
221 | - VReg temp; \ | ||
222 | - VReg *Vd = (VReg *)vd; \ | ||
223 | - VReg *Vj = (VReg *)vj; \ | ||
224 | - \ | ||
225 | - temp.D(0) = 0; \ | ||
226 | - temp.D(1) = 0; \ | ||
227 | - max = LSX_LEN/BIT; \ | ||
228 | - for (i = 0; i < max; i++) { \ | ||
229 | - temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ | ||
230 | - temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ | ||
231 | - } \ | ||
232 | - *Vd = temp; \ | ||
233 | +#define VSRANI(NAME, BIT, E1, E2) \ | ||
234 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
235 | +{ \ | ||
236 | + int i, j, ofs; \ | ||
237 | + VReg temp = {}; \ | ||
238 | + VReg *Vd = (VReg *)vd; \ | ||
239 | + VReg *Vj = (VReg *)vj; \ | ||
240 | + int oprsz = simd_oprsz(desc); \ | ||
241 | + \ | ||
242 | + ofs = LSX_LEN / BIT; \ | ||
243 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
244 | + for (j = 0; j < ofs; j++) { \ | ||
245 | + temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ | ||
246 | + temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ | ||
247 | + imm); \ | ||
248 | + } \ | ||
249 | + } \ | ||
250 | + *Vd = temp; \ | ||
251 | } | ||
252 | |||
253 | void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
254 | { | ||
255 | - VReg temp; | ||
256 | + int i; | ||
257 | + VReg temp = {}; | ||
258 | VReg *Vd = (VReg *)vd; | ||
259 | VReg *Vj = (VReg *)vj; | ||
260 | |||
261 | - temp.D(0) = 0; | ||
262 | - temp.D(1) = 0; | ||
263 | - temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128)); | ||
264 | - temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128)); | ||
265 | + for (i = 0; i < 2; i++) { | ||
266 | + temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128)); | ||
267 | + temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128)); | ||
268 | + } | ||
269 | *Vd = temp; | ||
270 | } | ||
271 | |||
272 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
273 | index XXXXXXX..XXXXXXX 100644 | ||
274 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
275 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
276 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d) | ||
277 | TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h) | ||
278 | TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w) | ||
279 | TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d) | ||
280 | +TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h) | ||
281 | +TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w) | ||
282 | +TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d) | ||
283 | +TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h) | ||
284 | +TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w) | ||
285 | +TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d) | ||
286 | |||
287 | TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h) | ||
288 | TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w) | ||
289 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h) | ||
290 | TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w) | ||
291 | TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d) | ||
292 | TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q) | ||
293 | +TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h) | ||
294 | +TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w) | ||
295 | +TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d) | ||
296 | +TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q) | ||
297 | +TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h) | ||
298 | +TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w) | ||
299 | +TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d) | ||
300 | +TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q) | ||
301 | |||
302 | TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h) | ||
303 | TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w) | ||
304 | -- | ||
305 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSRLRN.{B.H/H.W/W.D}; | ||
3 | - XVSRARN.{B.H/H.W/W.D}; | ||
4 | - XVSRLRNI.{B.H/H.W/W.D/D.Q}; | ||
5 | - XVSRARNI.{B.H/H.W/W.D/D.Q}. | ||
1 | 6 | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230914022645.1151356-40-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/insns.decode | 16 ++ | ||
12 | target/loongarch/disas.c | 16 ++ | ||
13 | target/loongarch/vec_helper.c | 198 +++++++++++--------- | ||
14 | target/loongarch/insn_trans/trans_vec.c.inc | 14 ++ | ||
15 | 4 files changed, 159 insertions(+), 85 deletions(-) | ||
16 | |||
17 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/insns.decode | ||
20 | +++ b/target/loongarch/insns.decode | ||
21 | @@ -XXX,XX +XXX,XX @@ xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5 | ||
22 | xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6 | ||
23 | xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7 | ||
24 | |||
25 | +xvsrlrn_b_h 0111 01001111 10001 ..... ..... ..... @vvv | ||
26 | +xvsrlrn_h_w 0111 01001111 10010 ..... ..... ..... @vvv | ||
27 | +xvsrlrn_w_d 0111 01001111 10011 ..... ..... ..... @vvv | ||
28 | +xvsrarn_b_h 0111 01001111 10101 ..... ..... ..... @vvv | ||
29 | +xvsrarn_h_w 0111 01001111 10110 ..... ..... ..... @vvv | ||
30 | +xvsrarn_w_d 0111 01001111 10111 ..... ..... ..... @vvv | ||
31 | + | ||
32 | +xvsrlrni_b_h 0111 01110100 01000 1 .... ..... ..... @vv_ui4 | ||
33 | +xvsrlrni_h_w 0111 01110100 01001 ..... ..... ..... @vv_ui5 | ||
34 | +xvsrlrni_w_d 0111 01110100 0101 ...... ..... ..... @vv_ui6 | ||
35 | +xvsrlrni_d_q 0111 01110100 011 ....... ..... ..... @vv_ui7 | ||
36 | +xvsrarni_b_h 0111 01110101 11000 1 .... ..... ..... @vv_ui4 | ||
37 | +xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5 | ||
38 | +xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6 | ||
39 | +xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7 | ||
40 | + | ||
41 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
42 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
43 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
44 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/loongarch/disas.c | ||
47 | +++ b/target/loongarch/disas.c | ||
48 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrani_h_w, vv_i) | ||
49 | INSN_LASX(xvsrani_w_d, vv_i) | ||
50 | INSN_LASX(xvsrani_d_q, vv_i) | ||
51 | |||
52 | +INSN_LASX(xvsrlrn_b_h, vvv) | ||
53 | +INSN_LASX(xvsrlrn_h_w, vvv) | ||
54 | +INSN_LASX(xvsrlrn_w_d, vvv) | ||
55 | +INSN_LASX(xvsrarn_b_h, vvv) | ||
56 | +INSN_LASX(xvsrarn_h_w, vvv) | ||
57 | +INSN_LASX(xvsrarn_w_d, vvv) | ||
58 | + | ||
59 | +INSN_LASX(xvsrlrni_b_h, vv_i) | ||
60 | +INSN_LASX(xvsrlrni_h_w, vv_i) | ||
61 | +INSN_LASX(xvsrlrni_w_d, vv_i) | ||
62 | +INSN_LASX(xvsrlrni_d_q, vv_i) | ||
63 | +INSN_LASX(xvsrarni_b_h, vv_i) | ||
64 | +INSN_LASX(xvsrarni_h_w, vv_i) | ||
65 | +INSN_LASX(xvsrarni_w_d, vv_i) | ||
66 | +INSN_LASX(xvsrarni_d_q, vv_i) | ||
67 | + | ||
68 | INSN_LASX(xvreplgr2vr_b, vr) | ||
69 | INSN_LASX(xvreplgr2vr_h, vr) | ||
70 | INSN_LASX(xvreplgr2vr_w, vr) | ||
71 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/loongarch/vec_helper.c | ||
74 | +++ b/target/loongarch/vec_helper.c | ||
75 | @@ -XXX,XX +XXX,XX @@ VSRANI(vsrani_b_h, 16, B, H) | ||
76 | VSRANI(vsrani_h_w, 32, H, W) | ||
77 | VSRANI(vsrani_w_d, 64, W, D) | ||
78 | |||
79 | -#define VSRLRN(NAME, BIT, T, E1, E2) \ | ||
80 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
81 | -{ \ | ||
82 | - int i; \ | ||
83 | - VReg *Vd = (VReg *)vd; \ | ||
84 | - VReg *Vj = (VReg *)vj; \ | ||
85 | - VReg *Vk = (VReg *)vk; \ | ||
86 | - \ | ||
87 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
88 | - Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
89 | - } \ | ||
90 | - Vd->D(1) = 0; \ | ||
91 | +#define VSRLRN(NAME, BIT, E1, E2, E3) \ | ||
92 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
93 | +{ \ | ||
94 | + int i, j, ofs; \ | ||
95 | + VReg *Vd = (VReg *)vd; \ | ||
96 | + VReg *Vj = (VReg *)vj; \ | ||
97 | + VReg *Vk = (VReg *)vk; \ | ||
98 | + int oprsz = simd_oprsz(desc); \ | ||
99 | + \ | ||
100 | + ofs = LSX_LEN / BIT; \ | ||
101 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
102 | + for (j = 0; j < ofs; j++) { \ | ||
103 | + Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \ | ||
104 | + Vk->E3(j + ofs * i) % BIT); \ | ||
105 | + } \ | ||
106 | + Vd->D(2 * i + 1) = 0; \ | ||
107 | + } \ | ||
108 | } | ||
109 | |||
110 | -VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H) | ||
111 | -VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) | ||
112 | -VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) | ||
113 | +VSRLRN(vsrlrn_b_h, 16, B, H, UH) | ||
114 | +VSRLRN(vsrlrn_h_w, 32, H, W, UW) | ||
115 | +VSRLRN(vsrlrn_w_d, 64, W, D, UD) | ||
116 | |||
117 | -#define VSRARN(NAME, BIT, T, E1, E2) \ | ||
118 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
119 | -{ \ | ||
120 | - int i; \ | ||
121 | - VReg *Vd = (VReg *)vd; \ | ||
122 | - VReg *Vj = (VReg *)vj; \ | ||
123 | - VReg *Vk = (VReg *)vk; \ | ||
124 | - \ | ||
125 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
126 | - Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
127 | - } \ | ||
128 | - Vd->D(1) = 0; \ | ||
129 | +#define VSRARN(NAME, BIT, E1, E2, E3) \ | ||
130 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
131 | +{ \ | ||
132 | + int i, j, ofs; \ | ||
133 | + VReg *Vd = (VReg *)vd; \ | ||
134 | + VReg *Vj = (VReg *)vj; \ | ||
135 | + VReg *Vk = (VReg *)vk; \ | ||
136 | + int oprsz = simd_oprsz(desc); \ | ||
137 | + \ | ||
138 | + ofs = LSX_LEN / BIT; \ | ||
139 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
140 | + for (j = 0; j < ofs; j++) { \ | ||
141 | + Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \ | ||
142 | + Vk->E3(j + ofs * i) % BIT); \ | ||
143 | + } \ | ||
144 | + Vd->D(2 * i + 1) = 0; \ | ||
145 | + } \ | ||
146 | } | ||
147 | |||
148 | -VSRARN(vsrarn_b_h, 16, uint8_t, B, H) | ||
149 | -VSRARN(vsrarn_h_w, 32, uint16_t, H, W) | ||
150 | -VSRARN(vsrarn_w_d, 64, uint32_t, W, D) | ||
151 | - | ||
152 | -#define VSRLRNI(NAME, BIT, E1, E2) \ | ||
153 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
154 | -{ \ | ||
155 | - int i, max; \ | ||
156 | - VReg temp; \ | ||
157 | - VReg *Vd = (VReg *)vd; \ | ||
158 | - VReg *Vj = (VReg *)vj; \ | ||
159 | - \ | ||
160 | - temp.D(0) = 0; \ | ||
161 | - temp.D(1) = 0; \ | ||
162 | - max = LSX_LEN/BIT; \ | ||
163 | - for (i = 0; i < max; i++) { \ | ||
164 | - temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ | ||
165 | - temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ | ||
166 | - } \ | ||
167 | - *Vd = temp; \ | ||
168 | +VSRARN(vsrarn_b_h, 16, B, H, UH) | ||
169 | +VSRARN(vsrarn_h_w, 32, H, W, UW) | ||
170 | +VSRARN(vsrarn_w_d, 64, W, D, UD) | ||
171 | + | ||
172 | +#define VSRLRNI(NAME, BIT, E1, E2) \ | ||
173 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
174 | +{ \ | ||
175 | + int i, j, ofs; \ | ||
176 | + VReg temp = {}; \ | ||
177 | + VReg *Vd = (VReg *)vd; \ | ||
178 | + VReg *Vj = (VReg *)vj; \ | ||
179 | + int oprsz = simd_oprsz(desc); \ | ||
180 | + \ | ||
181 | + ofs = LSX_LEN / BIT; \ | ||
182 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
183 | + for (j = 0; j < ofs; j++) { \ | ||
184 | + temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \ | ||
185 | + temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \ | ||
186 | + imm); \ | ||
187 | + } \ | ||
188 | + } \ | ||
189 | + *Vd = temp; \ | ||
190 | } | ||
191 | |||
192 | void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
193 | { | ||
194 | - VReg temp; | ||
195 | + int i; | ||
196 | + VReg temp = {}; | ||
197 | VReg *Vd = (VReg *)vd; | ||
198 | VReg *Vj = (VReg *)vj; | ||
199 | - Int128 r1, r2; | ||
200 | - | ||
201 | - if (imm == 0) { | ||
202 | - temp.D(0) = int128_getlo(Vj->Q(0)); | ||
203 | - temp.D(1) = int128_getlo(Vd->Q(0)); | ||
204 | - } else { | ||
205 | - r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); | ||
206 | - r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); | ||
207 | + Int128 r[4]; | ||
208 | + int oprsz = simd_oprsz(desc); | ||
209 | |||
210 | - temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1)); | ||
211 | - temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2)); | ||
212 | + for (i = 0; i < oprsz / 16; i++) { | ||
213 | + if (imm == 0) { | ||
214 | + temp.D(2 * i) = int128_getlo(Vj->Q(i)); | ||
215 | + temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); | ||
216 | + } else { | ||
217 | + r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)), | ||
218 | + int128_one()); | ||
219 | + r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)), | ||
220 | + int128_one()); | ||
221 | + temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i), | ||
222 | + imm), r[2 * i])); | ||
223 | + temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i), | ||
224 | + imm), r[ 2 * i + 1])); | ||
225 | + } | ||
226 | } | ||
227 | *Vd = temp; | ||
228 | } | ||
229 | @@ -XXX,XX +XXX,XX @@ VSRLRNI(vsrlrni_b_h, 16, B, H) | ||
230 | VSRLRNI(vsrlrni_h_w, 32, H, W) | ||
231 | VSRLRNI(vsrlrni_w_d, 64, W, D) | ||
232 | |||
233 | -#define VSRARNI(NAME, BIT, E1, E2) \ | ||
234 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
235 | -{ \ | ||
236 | - int i, max; \ | ||
237 | - VReg temp; \ | ||
238 | - VReg *Vd = (VReg *)vd; \ | ||
239 | - VReg *Vj = (VReg *)vj; \ | ||
240 | - \ | ||
241 | - temp.D(0) = 0; \ | ||
242 | - temp.D(1) = 0; \ | ||
243 | - max = LSX_LEN/BIT; \ | ||
244 | - for (i = 0; i < max; i++) { \ | ||
245 | - temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ | ||
246 | - temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ | ||
247 | - } \ | ||
248 | - *Vd = temp; \ | ||
249 | +#define VSRARNI(NAME, BIT, E1, E2) \ | ||
250 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
251 | +{ \ | ||
252 | + int i, j, ofs; \ | ||
253 | + VReg temp = {}; \ | ||
254 | + VReg *Vd = (VReg *)vd; \ | ||
255 | + VReg *Vj = (VReg *)vj; \ | ||
256 | + int oprsz = simd_oprsz(desc); \ | ||
257 | + \ | ||
258 | + ofs = LSX_LEN / BIT; \ | ||
259 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
260 | + for (j = 0; j < ofs; j++) { \ | ||
261 | + temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \ | ||
262 | + temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \ | ||
263 | + imm); \ | ||
264 | + } \ | ||
265 | + } \ | ||
266 | + *Vd = temp; \ | ||
267 | } | ||
268 | |||
269 | void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
270 | { | ||
271 | - VReg temp; | ||
272 | + int i; | ||
273 | + VReg temp = {}; | ||
274 | VReg *Vd = (VReg *)vd; | ||
275 | VReg *Vj = (VReg *)vj; | ||
276 | - Int128 r1, r2; | ||
277 | - | ||
278 | - if (imm == 0) { | ||
279 | - temp.D(0) = int128_getlo(Vj->Q(0)); | ||
280 | - temp.D(1) = int128_getlo(Vd->Q(0)); | ||
281 | - } else { | ||
282 | - r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
283 | - r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
284 | + Int128 r[4]; | ||
285 | + int oprsz = simd_oprsz(desc); | ||
286 | |||
287 | - temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1)); | ||
288 | - temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2)); | ||
289 | + for (i = 0; i < oprsz / 16; i++) { | ||
290 | + if (imm == 0) { | ||
291 | + temp.D(2 * i) = int128_getlo(Vj->Q(i)); | ||
292 | + temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); | ||
293 | + } else { | ||
294 | + r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)), | ||
295 | + int128_one()); | ||
296 | + r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)), | ||
297 | + int128_one()); | ||
298 | + temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i), | ||
299 | + imm), r[2 * i])); | ||
300 | + temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i), | ||
301 | + imm), r[2 * i + 1])); | ||
302 | + } | ||
303 | } | ||
304 | *Vd = temp; | ||
305 | } | ||
306 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
307 | index XXXXXXX..XXXXXXX 100644 | ||
308 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
309 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
310 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d) | ||
311 | TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h) | ||
312 | TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w) | ||
313 | TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d) | ||
314 | +TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h) | ||
315 | +TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w) | ||
316 | +TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d) | ||
317 | +TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h) | ||
318 | +TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w) | ||
319 | +TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d) | ||
320 | |||
321 | TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h) | ||
322 | TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w) | ||
323 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h) | ||
324 | TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w) | ||
325 | TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d) | ||
326 | TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q) | ||
327 | +TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h) | ||
328 | +TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w) | ||
329 | +TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d) | ||
330 | +TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q) | ||
331 | +TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h) | ||
332 | +TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w) | ||
333 | +TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d) | ||
334 | +TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q) | ||
335 | |||
336 | TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h) | ||
337 | TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w) | ||
338 | -- | ||
339 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSSRLN.{B.H/H.W/W.D}; | ||
3 | - XVSSRAN.{B.H/H.W/W.D}; | ||
4 | - XVSSRLN.{BU.H/HU.W/WU.D}; | ||
5 | - XVSSRAN.{BU.H/HU.W/WU.D}; | ||
6 | - XVSSRLNI.{B.H/H.W/W.D/D.Q}; | ||
7 | - XVSSRANI.{B.H/H.W/W.D/D.Q}; | ||
8 | - XVSSRLNI.{BU.H/HU.W/WU.D/DU.Q}; | ||
9 | - XVSSRANI.{BU.H/HU.W/WU.D/DU.Q}. | ||
1 | 10 | ||
11 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-Id: <20230914022645.1151356-41-gaosong@loongson.cn> | ||
14 | --- | ||
15 | target/loongarch/insns.decode | 30 ++ | ||
16 | target/loongarch/disas.c | 30 ++ | ||
17 | target/loongarch/vec_helper.c | 456 ++++++++++++-------- | ||
18 | target/loongarch/insn_trans/trans_vec.c.inc | 28 ++ | ||
19 | 4 files changed, 353 insertions(+), 191 deletions(-) | ||
20 | |||
21 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/loongarch/insns.decode | ||
24 | +++ b/target/loongarch/insns.decode | ||
25 | @@ -XXX,XX +XXX,XX @@ xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5 | ||
26 | xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6 | ||
27 | xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7 | ||
28 | |||
29 | +xvssrln_b_h 0111 01001111 11001 ..... ..... ..... @vvv | ||
30 | +xvssrln_h_w 0111 01001111 11010 ..... ..... ..... @vvv | ||
31 | +xvssrln_w_d 0111 01001111 11011 ..... ..... ..... @vvv | ||
32 | +xvssran_b_h 0111 01001111 11101 ..... ..... ..... @vvv | ||
33 | +xvssran_h_w 0111 01001111 11110 ..... ..... ..... @vvv | ||
34 | +xvssran_w_d 0111 01001111 11111 ..... ..... ..... @vvv | ||
35 | +xvssrln_bu_h 0111 01010000 01001 ..... ..... ..... @vvv | ||
36 | +xvssrln_hu_w 0111 01010000 01010 ..... ..... ..... @vvv | ||
37 | +xvssrln_wu_d 0111 01010000 01011 ..... ..... ..... @vvv | ||
38 | +xvssran_bu_h 0111 01010000 01101 ..... ..... ..... @vvv | ||
39 | +xvssran_hu_w 0111 01010000 01110 ..... ..... ..... @vvv | ||
40 | +xvssran_wu_d 0111 01010000 01111 ..... ..... ..... @vvv | ||
41 | + | ||
42 | +xvssrlni_b_h 0111 01110100 10000 1 .... ..... ..... @vv_ui4 | ||
43 | +xvssrlni_h_w 0111 01110100 10001 ..... ..... ..... @vv_ui5 | ||
44 | +xvssrlni_w_d 0111 01110100 1001 ...... ..... ..... @vv_ui6 | ||
45 | +xvssrlni_d_q 0111 01110100 101 ....... ..... ..... @vv_ui7 | ||
46 | +xvssrani_b_h 0111 01110110 00000 1 .... ..... ..... @vv_ui4 | ||
47 | +xvssrani_h_w 0111 01110110 00001 ..... ..... ..... @vv_ui5 | ||
48 | +xvssrani_w_d 0111 01110110 0001 ...... ..... ..... @vv_ui6 | ||
49 | +xvssrani_d_q 0111 01110110 001 ....... ..... ..... @vv_ui7 | ||
50 | +xvssrlni_bu_h 0111 01110100 11000 1 .... ..... ..... @vv_ui4 | ||
51 | +xvssrlni_hu_w 0111 01110100 11001 ..... ..... ..... @vv_ui5 | ||
52 | +xvssrlni_wu_d 0111 01110100 1101 ...... ..... ..... @vv_ui6 | ||
53 | +xvssrlni_du_q 0111 01110100 111 ....... ..... ..... @vv_ui7 | ||
54 | +xvssrani_bu_h 0111 01110110 01000 1 .... ..... ..... @vv_ui4 | ||
55 | +xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5 | ||
56 | +xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6 | ||
57 | +xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7 | ||
58 | + | ||
59 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
60 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
61 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
62 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/disas.c | ||
65 | +++ b/target/loongarch/disas.c | ||
66 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsrarni_h_w, vv_i) | ||
67 | INSN_LASX(xvsrarni_w_d, vv_i) | ||
68 | INSN_LASX(xvsrarni_d_q, vv_i) | ||
69 | |||
70 | +INSN_LASX(xvssrln_b_h, vvv) | ||
71 | +INSN_LASX(xvssrln_h_w, vvv) | ||
72 | +INSN_LASX(xvssrln_w_d, vvv) | ||
73 | +INSN_LASX(xvssran_b_h, vvv) | ||
74 | +INSN_LASX(xvssran_h_w, vvv) | ||
75 | +INSN_LASX(xvssran_w_d, vvv) | ||
76 | +INSN_LASX(xvssrln_bu_h, vvv) | ||
77 | +INSN_LASX(xvssrln_hu_w, vvv) | ||
78 | +INSN_LASX(xvssrln_wu_d, vvv) | ||
79 | +INSN_LASX(xvssran_bu_h, vvv) | ||
80 | +INSN_LASX(xvssran_hu_w, vvv) | ||
81 | +INSN_LASX(xvssran_wu_d, vvv) | ||
82 | + | ||
83 | +INSN_LASX(xvssrlni_b_h, vv_i) | ||
84 | +INSN_LASX(xvssrlni_h_w, vv_i) | ||
85 | +INSN_LASX(xvssrlni_w_d, vv_i) | ||
86 | +INSN_LASX(xvssrlni_d_q, vv_i) | ||
87 | +INSN_LASX(xvssrani_b_h, vv_i) | ||
88 | +INSN_LASX(xvssrani_h_w, vv_i) | ||
89 | +INSN_LASX(xvssrani_w_d, vv_i) | ||
90 | +INSN_LASX(xvssrani_d_q, vv_i) | ||
91 | +INSN_LASX(xvssrlni_bu_h, vv_i) | ||
92 | +INSN_LASX(xvssrlni_hu_w, vv_i) | ||
93 | +INSN_LASX(xvssrlni_wu_d, vv_i) | ||
94 | +INSN_LASX(xvssrlni_du_q, vv_i) | ||
95 | +INSN_LASX(xvssrani_bu_h, vv_i) | ||
96 | +INSN_LASX(xvssrani_hu_w, vv_i) | ||
97 | +INSN_LASX(xvssrani_wu_d, vv_i) | ||
98 | +INSN_LASX(xvssrani_du_q, vv_i) | ||
99 | + | ||
100 | INSN_LASX(xvreplgr2vr_b, vr) | ||
101 | INSN_LASX(xvreplgr2vr_h, vr) | ||
102 | INSN_LASX(xvreplgr2vr_w, vr) | ||
103 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/loongarch/vec_helper.c | ||
106 | +++ b/target/loongarch/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ SSRLNS(B, uint16_t, int16_t, uint8_t) | ||
108 | SSRLNS(H, uint32_t, int32_t, uint16_t) | ||
109 | SSRLNS(W, uint64_t, int64_t, uint32_t) | ||
110 | |||
111 | -#define VSSRLN(NAME, BIT, T, E1, E2) \ | ||
112 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
113 | -{ \ | ||
114 | - int i; \ | ||
115 | - VReg *Vd = (VReg *)vd; \ | ||
116 | - VReg *Vj = (VReg *)vj; \ | ||
117 | - VReg *Vk = (VReg *)vk; \ | ||
118 | - \ | ||
119 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
120 | - Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ | ||
121 | - } \ | ||
122 | - Vd->D(1) = 0; \ | ||
123 | +#define VSSRLN(NAME, BIT, E1, E2, E3) \ | ||
124 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
125 | +{ \ | ||
126 | + int i, j, ofs; \ | ||
127 | + VReg *Vd = (VReg *)vd; \ | ||
128 | + VReg *Vj = (VReg *)vj; \ | ||
129 | + VReg *Vk = (VReg *)vk; \ | ||
130 | + int oprsz = simd_oprsz(desc); \ | ||
131 | + \ | ||
132 | + ofs = LSX_LEN / BIT; \ | ||
133 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
134 | + for (j = 0; j < ofs; j++) { \ | ||
135 | + Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ | ||
136 | + Vk->E3(j + ofs * i) % BIT, \ | ||
137 | + BIT / 2 - 1); \ | ||
138 | + } \ | ||
139 | + Vd->D(2 * i + 1) = 0; \ | ||
140 | + } \ | ||
141 | } | ||
142 | |||
143 | -VSSRLN(vssrln_b_h, 16, uint16_t, B, H) | ||
144 | -VSSRLN(vssrln_h_w, 32, uint32_t, H, W) | ||
145 | -VSSRLN(vssrln_w_d, 64, uint64_t, W, D) | ||
146 | +VSSRLN(vssrln_b_h, 16, B, H, UH) | ||
147 | +VSSRLN(vssrln_h_w, 32, H, W, UW) | ||
148 | +VSSRLN(vssrln_w_d, 64, W, D, UD) | ||
149 | |||
150 | #define SSRANS(E, T1, T2) \ | ||
151 | static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ | ||
152 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ | ||
153 | shft_res = e2 >> sa; \ | ||
154 | } \ | ||
155 | T2 mask; \ | ||
156 | - mask = (1ll << sh) -1; \ | ||
157 | + mask = (1ll << sh) - 1; \ | ||
158 | if (shft_res > mask) { \ | ||
159 | return mask; \ | ||
160 | - } else if (shft_res < -(mask +1)) { \ | ||
161 | + } else if (shft_res < -(mask + 1)) { \ | ||
162 | return ~mask; \ | ||
163 | } else { \ | ||
164 | return shft_res; \ | ||
165 | @@ -XXX,XX +XXX,XX @@ SSRANS(B, int16_t, int8_t) | ||
166 | SSRANS(H, int32_t, int16_t) | ||
167 | SSRANS(W, int64_t, int32_t) | ||
168 | |||
169 | -#define VSSRAN(NAME, BIT, T, E1, E2) \ | ||
170 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
171 | -{ \ | ||
172 | - int i; \ | ||
173 | - VReg *Vd = (VReg *)vd; \ | ||
174 | - VReg *Vj = (VReg *)vj; \ | ||
175 | - VReg *Vk = (VReg *)vk; \ | ||
176 | - \ | ||
177 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
178 | - Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
179 | - } \ | ||
180 | - Vd->D(1) = 0; \ | ||
181 | +#define VSSRAN(NAME, BIT, E1, E2, E3) \ | ||
182 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
183 | +{ \ | ||
184 | + int i, j, ofs; \ | ||
185 | + VReg *Vd = (VReg *)vd; \ | ||
186 | + VReg *Vj = (VReg *)vj; \ | ||
187 | + VReg *Vk = (VReg *)vk; \ | ||
188 | + int oprsz = simd_oprsz(desc); \ | ||
189 | + \ | ||
190 | + ofs = LSX_LEN / BIT; \ | ||
191 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
192 | + for (j = 0; j < ofs; j++) { \ | ||
193 | + Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ | ||
194 | + Vk->E3(j + ofs * i) % BIT, \ | ||
195 | + BIT / 2 - 1); \ | ||
196 | + } \ | ||
197 | + Vd->D(2 * i + 1) = 0; \ | ||
198 | + } \ | ||
199 | } | ||
200 | |||
201 | -VSSRAN(vssran_b_h, 16, uint16_t, B, H) | ||
202 | -VSSRAN(vssran_h_w, 32, uint32_t, H, W) | ||
203 | -VSSRAN(vssran_w_d, 64, uint64_t, W, D) | ||
204 | +VSSRAN(vssran_b_h, 16, B, H, UH) | ||
205 | +VSSRAN(vssran_h_w, 32, H, W, UW) | ||
206 | +VSSRAN(vssran_w_d, 64, W, D, UD) | ||
207 | |||
208 | #define SSRLNU(E, T1, T2, T3) \ | ||
209 | static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ | ||
210 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ | ||
211 | shft_res = (((T1)e2) >> sa); \ | ||
212 | } \ | ||
213 | T2 mask; \ | ||
214 | - mask = (1ull << sh) -1; \ | ||
215 | + mask = (1ull << sh) - 1; \ | ||
216 | if (shft_res > mask) { \ | ||
217 | return mask; \ | ||
218 | } else { \ | ||
219 | @@ -XXX,XX +XXX,XX @@ SSRLNU(B, uint16_t, uint8_t, int16_t) | ||
220 | SSRLNU(H, uint32_t, uint16_t, int32_t) | ||
221 | SSRLNU(W, uint64_t, uint32_t, int64_t) | ||
222 | |||
223 | -#define VSSRLNU(NAME, BIT, T, E1, E2) \ | ||
224 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
225 | -{ \ | ||
226 | - int i; \ | ||
227 | - VReg *Vd = (VReg *)vd; \ | ||
228 | - VReg *Vj = (VReg *)vj; \ | ||
229 | - VReg *Vk = (VReg *)vk; \ | ||
230 | - \ | ||
231 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
232 | - Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
233 | - } \ | ||
234 | - Vd->D(1) = 0; \ | ||
235 | +#define VSSRLNU(NAME, BIT, E1, E2, E3) \ | ||
236 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
237 | +{ \ | ||
238 | + int i, j, ofs; \ | ||
239 | + VReg *Vd = (VReg *)vd; \ | ||
240 | + VReg *Vj = (VReg *)vj; \ | ||
241 | + VReg *Vk = (VReg *)vk; \ | ||
242 | + int oprsz = simd_oprsz(desc); \ | ||
243 | + \ | ||
244 | + ofs = LSX_LEN / BIT; \ | ||
245 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
246 | + for (j = 0; j < ofs; j++) { \ | ||
247 | + Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
248 | + Vk->E3(j + ofs * i) % BIT, \ | ||
249 | + BIT / 2); \ | ||
250 | + } \ | ||
251 | + Vd->D(2 * i + 1) = 0; \ | ||
252 | + } \ | ||
253 | } | ||
254 | |||
255 | -VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H) | ||
256 | -VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W) | ||
257 | -VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D) | ||
258 | +VSSRLNU(vssrln_bu_h, 16, B, H, UH) | ||
259 | +VSSRLNU(vssrln_hu_w, 32, H, W, UW) | ||
260 | +VSSRLNU(vssrln_wu_d, 64, W, D, UD) | ||
261 | |||
262 | #define SSRANU(E, T1, T2, T3) \ | ||
263 | static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ | ||
264 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ | ||
265 | shft_res = 0; \ | ||
266 | } \ | ||
267 | T2 mask; \ | ||
268 | - mask = (1ull << sh) -1; \ | ||
269 | + mask = (1ull << sh) - 1; \ | ||
270 | if (shft_res > mask) { \ | ||
271 | return mask; \ | ||
272 | } else { \ | ||
273 | @@ -XXX,XX +XXX,XX @@ SSRANU(B, uint16_t, uint8_t, int16_t) | ||
274 | SSRANU(H, uint32_t, uint16_t, int32_t) | ||
275 | SSRANU(W, uint64_t, uint32_t, int64_t) | ||
276 | |||
277 | -#define VSSRANU(NAME, BIT, T, E1, E2) \ | ||
278 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
279 | -{ \ | ||
280 | - int i; \ | ||
281 | - VReg *Vd = (VReg *)vd; \ | ||
282 | - VReg *Vj = (VReg *)vj; \ | ||
283 | - VReg *Vk = (VReg *)vk; \ | ||
284 | - \ | ||
285 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
286 | - Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
287 | - } \ | ||
288 | - Vd->D(1) = 0; \ | ||
289 | -} | ||
290 | - | ||
291 | -VSSRANU(vssran_bu_h, 16, uint16_t, B, H) | ||
292 | -VSSRANU(vssran_hu_w, 32, uint32_t, H, W) | ||
293 | -VSSRANU(vssran_wu_d, 64, uint64_t, W, D) | ||
294 | - | ||
295 | -#define VSSRLNI(NAME, BIT, E1, E2) \ | ||
296 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
297 | -{ \ | ||
298 | - int i; \ | ||
299 | - VReg temp; \ | ||
300 | - VReg *Vd = (VReg *)vd; \ | ||
301 | - VReg *Vj = (VReg *)vj; \ | ||
302 | - \ | ||
303 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
304 | - temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
305 | - temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | ||
306 | - } \ | ||
307 | - *Vd = temp; \ | ||
308 | +#define VSSRANU(NAME, BIT, E1, E2, E3) \ | ||
309 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
310 | +{ \ | ||
311 | + int i, j, ofs; \ | ||
312 | + VReg *Vd = (VReg *)vd; \ | ||
313 | + VReg *Vj = (VReg *)vj; \ | ||
314 | + VReg *Vk = (VReg *)vk; \ | ||
315 | + int oprsz = simd_oprsz(desc); \ | ||
316 | + \ | ||
317 | + ofs = LSX_LEN / BIT; \ | ||
318 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
319 | + for (j = 0; j < ofs; j++) { \ | ||
320 | + Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ | ||
321 | + Vk->E3(j + ofs * i) % BIT, \ | ||
322 | + BIT / 2); \ | ||
323 | + } \ | ||
324 | + Vd->D(2 * i + 1) = 0; \ | ||
325 | + } \ | ||
326 | } | ||
327 | |||
328 | -void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
329 | -{ | ||
330 | - Int128 shft_res1, shft_res2, mask; | ||
331 | - VReg *Vd = (VReg *)vd; | ||
332 | - VReg *Vj = (VReg *)vj; | ||
333 | +VSSRANU(vssran_bu_h, 16, B, H, UH) | ||
334 | +VSSRANU(vssran_hu_w, 32, H, W, UW) | ||
335 | +VSSRANU(vssran_wu_d, 64, W, D, UD) | ||
336 | + | ||
337 | +#define VSSRLNI(NAME, BIT, E1, E2) \ | ||
338 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
339 | +{ \ | ||
340 | + int i, j, ofs; \ | ||
341 | + VReg temp = {}; \ | ||
342 | + VReg *Vd = (VReg *)vd; \ | ||
343 | + VReg *Vj = (VReg *)vj; \ | ||
344 | + int oprsz = simd_oprsz(desc); \ | ||
345 | + \ | ||
346 | + ofs = LSX_LEN / BIT; \ | ||
347 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
348 | + for (j = 0; j < ofs; j++) { \ | ||
349 | + temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ | ||
350 | + imm, BIT / 2 - 1); \ | ||
351 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \ | ||
352 | + imm, BIT / 2 - 1); \ | ||
353 | + } \ | ||
354 | + } \ | ||
355 | + *Vd = temp; \ | ||
356 | +} | ||
357 | + | ||
358 | +static void do_vssrlni_q(VReg *Vd, VReg *Vj, | ||
359 | + uint64_t imm, int idx, Int128 mask) | ||
360 | +{ | ||
361 | + Int128 shft_res1, shft_res2; | ||
362 | |||
363 | if (imm == 0) { | ||
364 | - shft_res1 = Vj->Q(0); | ||
365 | - shft_res2 = Vd->Q(0); | ||
366 | + shft_res1 = Vj->Q(idx); | ||
367 | + shft_res2 = Vd->Q(idx); | ||
368 | } else { | ||
369 | - shft_res1 = int128_urshift(Vj->Q(0), imm); | ||
370 | - shft_res2 = int128_urshift(Vd->Q(0), imm); | ||
371 | + shft_res1 = int128_urshift(Vj->Q(idx), imm); | ||
372 | + shft_res2 = int128_urshift(Vd->Q(idx), imm); | ||
373 | } | ||
374 | - mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
375 | |||
376 | if (int128_ult(mask, shft_res1)) { | ||
377 | - Vd->D(0) = int128_getlo(mask); | ||
378 | + Vd->D(idx * 2) = int128_getlo(mask); | ||
379 | }else { | ||
380 | - Vd->D(0) = int128_getlo(shft_res1); | ||
381 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
382 | } | ||
383 | |||
384 | if (int128_ult(mask, shft_res2)) { | ||
385 | - Vd->D(1) = int128_getlo(mask); | ||
386 | + Vd->D(idx * 2 + 1) = int128_getlo(mask); | ||
387 | }else { | ||
388 | - Vd->D(1) = int128_getlo(shft_res2); | ||
389 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
390 | + } | ||
391 | +} | ||
392 | + | ||
393 | +void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
394 | +{ | ||
395 | + int i; | ||
396 | + Int128 mask; | ||
397 | + VReg *Vd = (VReg *)vd; | ||
398 | + VReg *Vj = (VReg *)vj; | ||
399 | + int oprsz = simd_oprsz(desc); | ||
400 | + | ||
401 | + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
402 | + | ||
403 | + for (i = 0; i < oprsz / 16; i++) { | ||
404 | + do_vssrlni_q(Vd, Vj, imm, i, mask); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | @@ -XXX,XX +XXX,XX @@ VSSRLNI(vssrlni_b_h, 16, B, H) | ||
409 | VSSRLNI(vssrlni_h_w, 32, H, W) | ||
410 | VSSRLNI(vssrlni_w_d, 64, W, D) | ||
411 | |||
412 | -#define VSSRANI(NAME, BIT, E1, E2) \ | ||
413 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
414 | -{ \ | ||
415 | - int i; \ | ||
416 | - VReg temp; \ | ||
417 | - VReg *Vd = (VReg *)vd; \ | ||
418 | - VReg *Vj = (VReg *)vj; \ | ||
419 | - \ | ||
420 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
421 | - temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
422 | - temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | ||
423 | - } \ | ||
424 | - *Vd = temp; \ | ||
425 | -} | ||
426 | - | ||
427 | -void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
428 | -{ | ||
429 | - Int128 shft_res1, shft_res2, mask, min; | ||
430 | - VReg *Vd = (VReg *)vd; | ||
431 | - VReg *Vj = (VReg *)vj; | ||
432 | +#define VSSRANI(NAME, BIT, E1, E2) \ | ||
433 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
434 | +{ \ | ||
435 | + int i, j, ofs; \ | ||
436 | + VReg temp = {}; \ | ||
437 | + VReg *Vd = (VReg *)vd; \ | ||
438 | + VReg *Vj = (VReg *)vj; \ | ||
439 | + int oprsz = simd_oprsz(desc); \ | ||
440 | + \ | ||
441 | + ofs = LSX_LEN / BIT; \ | ||
442 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
443 | + for (j = 0; j < ofs; j++) { \ | ||
444 | + temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ | ||
445 | + imm, BIT / 2 - 1); \ | ||
446 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \ | ||
447 | + imm, BIT / 2 - 1); \ | ||
448 | + } \ | ||
449 | + } \ | ||
450 | + *Vd = temp; \ | ||
451 | +} | ||
452 | + | ||
453 | +static void do_vssrani_d_q(VReg *Vd, VReg *Vj, | ||
454 | + uint64_t imm, int idx, Int128 mask, Int128 min) | ||
455 | +{ | ||
456 | + Int128 shft_res1, shft_res2; | ||
457 | |||
458 | if (imm == 0) { | ||
459 | - shft_res1 = Vj->Q(0); | ||
460 | - shft_res2 = Vd->Q(0); | ||
461 | + shft_res1 = Vj->Q(idx); | ||
462 | + shft_res2 = Vd->Q(idx); | ||
463 | } else { | ||
464 | - shft_res1 = int128_rshift(Vj->Q(0), imm); | ||
465 | - shft_res2 = int128_rshift(Vd->Q(0), imm); | ||
466 | + shft_res1 = int128_rshift(Vj->Q(idx), imm); | ||
467 | + shft_res2 = int128_rshift(Vd->Q(idx), imm); | ||
468 | } | ||
469 | - mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
470 | - min = int128_lshift(int128_one(), 63); | ||
471 | |||
472 | - if (int128_gt(shft_res1, mask)) { | ||
473 | - Vd->D(0) = int128_getlo(mask); | ||
474 | + if (int128_gt(shft_res1, mask)) { | ||
475 | + Vd->D(idx * 2) = int128_getlo(mask); | ||
476 | } else if (int128_lt(shft_res1, int128_neg(min))) { | ||
477 | - Vd->D(0) = int128_getlo(min); | ||
478 | + Vd->D(idx * 2) = int128_getlo(min); | ||
479 | } else { | ||
480 | - Vd->D(0) = int128_getlo(shft_res1); | ||
481 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
482 | } | ||
483 | |||
484 | if (int128_gt(shft_res2, mask)) { | ||
485 | - Vd->D(1) = int128_getlo(mask); | ||
486 | + Vd->D(idx * 2 + 1) = int128_getlo(mask); | ||
487 | } else if (int128_lt(shft_res2, int128_neg(min))) { | ||
488 | - Vd->D(1) = int128_getlo(min); | ||
489 | + Vd->D(idx * 2 + 1) = int128_getlo(min); | ||
490 | } else { | ||
491 | - Vd->D(1) = int128_getlo(shft_res2); | ||
492 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
493 | + } | ||
494 | +} | ||
495 | + | ||
496 | +void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
497 | +{ | ||
498 | + int i; | ||
499 | + Int128 mask, min; | ||
500 | + VReg *Vd = (VReg *)vd; | ||
501 | + VReg *Vj = (VReg *)vj; | ||
502 | + int oprsz = simd_oprsz(desc); | ||
503 | + | ||
504 | + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
505 | + min = int128_lshift(int128_one(), 63); | ||
506 | + | ||
507 | + for (i = 0; i < oprsz / 16; i++) { | ||
508 | + do_vssrani_d_q(Vd, Vj, imm, i, mask, min); | ||
509 | } | ||
510 | } | ||
511 | |||
512 | + | ||
513 | VSSRANI(vssrani_b_h, 16, B, H) | ||
514 | VSSRANI(vssrani_h_w, 32, H, W) | ||
515 | VSSRANI(vssrani_w_d, 64, W, D) | ||
516 | |||
517 | -#define VSSRLNUI(NAME, BIT, E1, E2) \ | ||
518 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
519 | -{ \ | ||
520 | - int i; \ | ||
521 | - VReg temp; \ | ||
522 | - VReg *Vd = (VReg *)vd; \ | ||
523 | - VReg *Vj = (VReg *)vj; \ | ||
524 | - \ | ||
525 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
526 | - temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
527 | - temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
528 | - } \ | ||
529 | - *Vd = temp; \ | ||
530 | +#define VSSRLNUI(NAME, BIT, E1, E2) \ | ||
531 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
532 | +{ \ | ||
533 | + int i, j, ofs; \ | ||
534 | + VReg temp = {}; \ | ||
535 | + VReg *Vd = (VReg *)vd; \ | ||
536 | + VReg *Vj = (VReg *)vj; \ | ||
537 | + int oprsz = simd_oprsz(desc); \ | ||
538 | + \ | ||
539 | + ofs = LSX_LEN / BIT; \ | ||
540 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
541 | + for (j = 0; j < ofs; j++) { \ | ||
542 | + temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
543 | + imm, BIT / 2); \ | ||
544 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \ | ||
545 | + imm, BIT / 2); \ | ||
546 | + } \ | ||
547 | + } \ | ||
548 | + *Vd = temp; \ | ||
549 | } | ||
550 | |||
551 | void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
552 | { | ||
553 | - Int128 shft_res1, shft_res2, mask; | ||
554 | + int i; | ||
555 | + Int128 mask; | ||
556 | VReg *Vd = (VReg *)vd; | ||
557 | VReg *Vj = (VReg *)vj; | ||
558 | + int oprsz = simd_oprsz(desc); | ||
559 | |||
560 | - if (imm == 0) { | ||
561 | - shft_res1 = Vj->Q(0); | ||
562 | - shft_res2 = Vd->Q(0); | ||
563 | - } else { | ||
564 | - shft_res1 = int128_urshift(Vj->Q(0), imm); | ||
565 | - shft_res2 = int128_urshift(Vd->Q(0), imm); | ||
566 | - } | ||
567 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
568 | |||
569 | - if (int128_ult(mask, shft_res1)) { | ||
570 | - Vd->D(0) = int128_getlo(mask); | ||
571 | - }else { | ||
572 | - Vd->D(0) = int128_getlo(shft_res1); | ||
573 | - } | ||
574 | - | ||
575 | - if (int128_ult(mask, shft_res2)) { | ||
576 | - Vd->D(1) = int128_getlo(mask); | ||
577 | - }else { | ||
578 | - Vd->D(1) = int128_getlo(shft_res2); | ||
579 | + for (i = 0; i < oprsz / 16; i++) { | ||
580 | + do_vssrlni_q(Vd, Vj, imm, i, mask); | ||
581 | } | ||
582 | } | ||
583 | |||
584 | @@ -XXX,XX +XXX,XX @@ VSSRLNUI(vssrlni_bu_h, 16, B, H) | ||
585 | VSSRLNUI(vssrlni_hu_w, 32, H, W) | ||
586 | VSSRLNUI(vssrlni_wu_d, 64, W, D) | ||
587 | |||
588 | -#define VSSRANUI(NAME, BIT, E1, E2) \ | ||
589 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
590 | -{ \ | ||
591 | - int i; \ | ||
592 | - VReg temp; \ | ||
593 | - VReg *Vd = (VReg *)vd; \ | ||
594 | - VReg *Vj = (VReg *)vj; \ | ||
595 | - \ | ||
596 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
597 | - temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
598 | - temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
599 | - } \ | ||
600 | - *Vd = temp; \ | ||
601 | -} | ||
602 | - | ||
603 | -void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
604 | -{ | ||
605 | - Int128 shft_res1, shft_res2, mask; | ||
606 | - VReg *Vd = (VReg *)vd; | ||
607 | - VReg *Vj = (VReg *)vj; | ||
608 | +#define VSSRANUI(NAME, BIT, E1, E2) \ | ||
609 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
610 | +{ \ | ||
611 | + int i, j, ofs; \ | ||
612 | + VReg temp = {}; \ | ||
613 | + VReg *Vd = (VReg *)vd; \ | ||
614 | + VReg *Vj = (VReg *)vj; \ | ||
615 | + int oprsz = simd_oprsz(desc); \ | ||
616 | + \ | ||
617 | + ofs = LSX_LEN / BIT; \ | ||
618 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
619 | + for (j = 0; j < ofs; j++) { \ | ||
620 | + temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ | ||
621 | + imm, BIT / 2); \ | ||
622 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \ | ||
623 | + imm, BIT / 2); \ | ||
624 | + } \ | ||
625 | + } \ | ||
626 | + *Vd = temp; \ | ||
627 | +} | ||
628 | + | ||
629 | +static void do_vssrani_du_q(VReg *Vd, VReg *Vj, | ||
630 | + uint64_t imm, int idx, Int128 mask) | ||
631 | +{ | ||
632 | + Int128 shft_res1, shft_res2; | ||
633 | |||
634 | if (imm == 0) { | ||
635 | - shft_res1 = Vj->Q(0); | ||
636 | - shft_res2 = Vd->Q(0); | ||
637 | + shft_res1 = Vj->Q(idx); | ||
638 | + shft_res2 = Vd->Q(idx); | ||
639 | } else { | ||
640 | - shft_res1 = int128_rshift(Vj->Q(0), imm); | ||
641 | - shft_res2 = int128_rshift(Vd->Q(0), imm); | ||
642 | + shft_res1 = int128_rshift(Vj->Q(idx), imm); | ||
643 | + shft_res2 = int128_rshift(Vd->Q(idx), imm); | ||
644 | } | ||
645 | |||
646 | - if (int128_lt(Vj->Q(0), int128_zero())) { | ||
647 | + if (int128_lt(Vj->Q(idx), int128_zero())) { | ||
648 | shft_res1 = int128_zero(); | ||
649 | } | ||
650 | |||
651 | - if (int128_lt(Vd->Q(0), int128_zero())) { | ||
652 | + if (int128_lt(Vd->Q(idx), int128_zero())) { | ||
653 | shft_res2 = int128_zero(); | ||
654 | } | ||
655 | - | ||
656 | - mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
657 | - | ||
658 | if (int128_ult(mask, shft_res1)) { | ||
659 | - Vd->D(0) = int128_getlo(mask); | ||
660 | + Vd->D(idx * 2) = int128_getlo(mask); | ||
661 | }else { | ||
662 | - Vd->D(0) = int128_getlo(shft_res1); | ||
663 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
664 | } | ||
665 | |||
666 | if (int128_ult(mask, shft_res2)) { | ||
667 | - Vd->D(1) = int128_getlo(mask); | ||
668 | + Vd->D(idx * 2 + 1) = int128_getlo(mask); | ||
669 | }else { | ||
670 | - Vd->D(1) = int128_getlo(shft_res2); | ||
671 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
672 | + } | ||
673 | + | ||
674 | +} | ||
675 | + | ||
676 | +void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
677 | +{ | ||
678 | + int i; | ||
679 | + Int128 mask; | ||
680 | + VReg *Vd = (VReg *)vd; | ||
681 | + VReg *Vj = (VReg *)vj; | ||
682 | + int oprsz = simd_oprsz(desc); | ||
683 | + | ||
684 | + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
685 | + | ||
686 | + for (i = 0; i < oprsz / 16; i++) { | ||
687 | + do_vssrani_du_q(Vd, Vj, imm, i, mask); | ||
688 | } | ||
689 | } | ||
690 | |||
691 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
692 | index XXXXXXX..XXXXXXX 100644 | ||
693 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
694 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
695 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d) | ||
696 | TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h) | ||
697 | TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w) | ||
698 | TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d) | ||
699 | +TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h) | ||
700 | +TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w) | ||
701 | +TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d) | ||
702 | +TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h) | ||
703 | +TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w) | ||
704 | +TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d) | ||
705 | +TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h) | ||
706 | +TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w) | ||
707 | +TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d) | ||
708 | +TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h) | ||
709 | +TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w) | ||
710 | +TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d) | ||
711 | |||
712 | TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h) | ||
713 | TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w) | ||
714 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h) | ||
715 | TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w) | ||
716 | TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d) | ||
717 | TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q) | ||
718 | +TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h) | ||
719 | +TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w) | ||
720 | +TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d) | ||
721 | +TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q) | ||
722 | +TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h) | ||
723 | +TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w) | ||
724 | +TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d) | ||
725 | +TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q) | ||
726 | +TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h) | ||
727 | +TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w) | ||
728 | +TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d) | ||
729 | +TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q) | ||
730 | +TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h) | ||
731 | +TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w) | ||
732 | +TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d) | ||
733 | +TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q) | ||
734 | |||
735 | TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h) | ||
736 | TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w) | ||
737 | -- | ||
738 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSSRLRN.{B.H/H.W/W.D}; | ||
3 | - XVSSRARN.{B.H/H.W/W.D}; | ||
4 | - XVSSRLRN.{BU.H/HU.W/WU.D}; | ||
5 | - XVSSRARN.{BU.H/HU.W/WU.D}; | ||
6 | - XVSSRLRNI.{B.H/H.W/W.D/D.Q}; | ||
7 | - XVSSRARNI.{B.H/H.W/W.D/D.Q}; | ||
8 | - XVSSRLRNI.{BU.H/HU.W/WU.D/DU.Q}; | ||
9 | - XVSSRARNI.{BU.H/HU.W/WU.D/DU.Q}. | ||
1 | 10 | ||
11 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-Id: <20230914022645.1151356-42-gaosong@loongson.cn> | ||
14 | --- | ||
15 | target/loongarch/insns.decode | 30 ++ | ||
16 | target/loongarch/disas.c | 30 ++ | ||
17 | target/loongarch/vec_helper.c | 489 ++++++++++++-------- | ||
18 | target/loongarch/insn_trans/trans_vec.c.inc | 28 ++ | ||
19 | 4 files changed, 378 insertions(+), 199 deletions(-) | ||
20 | |||
21 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/loongarch/insns.decode | ||
24 | +++ b/target/loongarch/insns.decode | ||
25 | @@ -XXX,XX +XXX,XX @@ xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5 | ||
26 | xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6 | ||
27 | xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7 | ||
28 | |||
29 | +xvssrlrn_b_h 0111 01010000 00001 ..... ..... ..... @vvv | ||
30 | +xvssrlrn_h_w 0111 01010000 00010 ..... ..... ..... @vvv | ||
31 | +xvssrlrn_w_d 0111 01010000 00011 ..... ..... ..... @vvv | ||
32 | +xvssrarn_b_h 0111 01010000 00101 ..... ..... ..... @vvv | ||
33 | +xvssrarn_h_w 0111 01010000 00110 ..... ..... ..... @vvv | ||
34 | +xvssrarn_w_d 0111 01010000 00111 ..... ..... ..... @vvv | ||
35 | +xvssrlrn_bu_h 0111 01010000 10001 ..... ..... ..... @vvv | ||
36 | +xvssrlrn_hu_w 0111 01010000 10010 ..... ..... ..... @vvv | ||
37 | +xvssrlrn_wu_d 0111 01010000 10011 ..... ..... ..... @vvv | ||
38 | +xvssrarn_bu_h 0111 01010000 10101 ..... ..... ..... @vvv | ||
39 | +xvssrarn_hu_w 0111 01010000 10110 ..... ..... ..... @vvv | ||
40 | +xvssrarn_wu_d 0111 01010000 10111 ..... ..... ..... @vvv | ||
41 | + | ||
42 | +xvssrlrni_b_h 0111 01110101 00000 1 .... ..... ..... @vv_ui4 | ||
43 | +xvssrlrni_h_w 0111 01110101 00001 ..... ..... ..... @vv_ui5 | ||
44 | +xvssrlrni_w_d 0111 01110101 0001 ...... ..... ..... @vv_ui6 | ||
45 | +xvssrlrni_d_q 0111 01110101 001 ....... ..... ..... @vv_ui7 | ||
46 | +xvssrarni_b_h 0111 01110110 10000 1 .... ..... ..... @vv_ui4 | ||
47 | +xvssrarni_h_w 0111 01110110 10001 ..... ..... ..... @vv_ui5 | ||
48 | +xvssrarni_w_d 0111 01110110 1001 ...... ..... ..... @vv_ui6 | ||
49 | +xvssrarni_d_q 0111 01110110 101 ....... ..... ..... @vv_ui7 | ||
50 | +xvssrlrni_bu_h 0111 01110101 01000 1 .... ..... ..... @vv_ui4 | ||
51 | +xvssrlrni_hu_w 0111 01110101 01001 ..... ..... ..... @vv_ui5 | ||
52 | +xvssrlrni_wu_d 0111 01110101 0101 ...... ..... ..... @vv_ui6 | ||
53 | +xvssrlrni_du_q 0111 01110101 011 ....... ..... ..... @vv_ui7 | ||
54 | +xvssrarni_bu_h 0111 01110110 11000 1 .... ..... ..... @vv_ui4 | ||
55 | +xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5 | ||
56 | +xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6 | ||
57 | +xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7 | ||
58 | + | ||
59 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
60 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
61 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
62 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/disas.c | ||
65 | +++ b/target/loongarch/disas.c | ||
66 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssrani_hu_w, vv_i) | ||
67 | INSN_LASX(xvssrani_wu_d, vv_i) | ||
68 | INSN_LASX(xvssrani_du_q, vv_i) | ||
69 | |||
70 | +INSN_LASX(xvssrlrn_b_h, vvv) | ||
71 | +INSN_LASX(xvssrlrn_h_w, vvv) | ||
72 | +INSN_LASX(xvssrlrn_w_d, vvv) | ||
73 | +INSN_LASX(xvssrarn_b_h, vvv) | ||
74 | +INSN_LASX(xvssrarn_h_w, vvv) | ||
75 | +INSN_LASX(xvssrarn_w_d, vvv) | ||
76 | +INSN_LASX(xvssrlrn_bu_h, vvv) | ||
77 | +INSN_LASX(xvssrlrn_hu_w, vvv) | ||
78 | +INSN_LASX(xvssrlrn_wu_d, vvv) | ||
79 | +INSN_LASX(xvssrarn_bu_h, vvv) | ||
80 | +INSN_LASX(xvssrarn_hu_w, vvv) | ||
81 | +INSN_LASX(xvssrarn_wu_d, vvv) | ||
82 | + | ||
83 | +INSN_LASX(xvssrlrni_b_h, vv_i) | ||
84 | +INSN_LASX(xvssrlrni_h_w, vv_i) | ||
85 | +INSN_LASX(xvssrlrni_w_d, vv_i) | ||
86 | +INSN_LASX(xvssrlrni_d_q, vv_i) | ||
87 | +INSN_LASX(xvssrlrni_bu_h, vv_i) | ||
88 | +INSN_LASX(xvssrlrni_hu_w, vv_i) | ||
89 | +INSN_LASX(xvssrlrni_wu_d, vv_i) | ||
90 | +INSN_LASX(xvssrlrni_du_q, vv_i) | ||
91 | +INSN_LASX(xvssrarni_b_h, vv_i) | ||
92 | +INSN_LASX(xvssrarni_h_w, vv_i) | ||
93 | +INSN_LASX(xvssrarni_w_d, vv_i) | ||
94 | +INSN_LASX(xvssrarni_d_q, vv_i) | ||
95 | +INSN_LASX(xvssrarni_bu_h, vv_i) | ||
96 | +INSN_LASX(xvssrarni_hu_w, vv_i) | ||
97 | +INSN_LASX(xvssrarni_wu_d, vv_i) | ||
98 | +INSN_LASX(xvssrarni_du_q, vv_i) | ||
99 | + | ||
100 | INSN_LASX(xvreplgr2vr_b, vr) | ||
101 | INSN_LASX(xvreplgr2vr_h, vr) | ||
102 | INSN_LASX(xvreplgr2vr_w, vr) | ||
103 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/target/loongarch/vec_helper.c | ||
106 | +++ b/target/loongarch/vec_helper.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ | ||
108 | \ | ||
109 | shft_res = do_vsrlr_ ## E2(e2, sa); \ | ||
110 | T1 mask; \ | ||
111 | - mask = (1ull << sh) -1; \ | ||
112 | + mask = (1ull << sh) - 1; \ | ||
113 | if (shft_res > mask) { \ | ||
114 | return mask; \ | ||
115 | } else { \ | ||
116 | @@ -XXX,XX +XXX,XX @@ SSRLRNS(B, H, uint16_t, int16_t, uint8_t) | ||
117 | SSRLRNS(H, W, uint32_t, int32_t, uint16_t) | ||
118 | SSRLRNS(W, D, uint64_t, int64_t, uint32_t) | ||
119 | |||
120 | -#define VSSRLRN(NAME, BIT, T, E1, E2) \ | ||
121 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
122 | -{ \ | ||
123 | - int i; \ | ||
124 | - VReg *Vd = (VReg *)vd; \ | ||
125 | - VReg *Vj = (VReg *)vj; \ | ||
126 | - VReg *Vk = (VReg *)vk; \ | ||
127 | - \ | ||
128 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
129 | - Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
130 | - } \ | ||
131 | - Vd->D(1) = 0; \ | ||
132 | +#define VSSRLRN(NAME, BIT, E1, E2, E3) \ | ||
133 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
134 | +{ \ | ||
135 | + int i, j, ofs; \ | ||
136 | + VReg *Vd = (VReg *)vd; \ | ||
137 | + VReg *Vj = (VReg *)vj; \ | ||
138 | + VReg *Vk = (VReg *)vk; \ | ||
139 | + int oprsz = simd_oprsz(desc); \ | ||
140 | + \ | ||
141 | + ofs = LSX_LEN / BIT; \ | ||
142 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
143 | + for (j = 0; j < ofs; j++) { \ | ||
144 | + Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ | ||
145 | + Vk->E3(j + ofs * i) % BIT, \ | ||
146 | + BIT / 2 - 1); \ | ||
147 | + } \ | ||
148 | + Vd->D(2 * i + 1) = 0; \ | ||
149 | + } \ | ||
150 | } | ||
151 | |||
152 | -VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H) | ||
153 | -VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W) | ||
154 | -VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D) | ||
155 | +VSSRLRN(vssrlrn_b_h, 16, B, H, UH) | ||
156 | +VSSRLRN(vssrlrn_h_w, 32, H, W, UW) | ||
157 | +VSSRLRN(vssrlrn_w_d, 64, W, D, UD) | ||
158 | |||
159 | #define SSRARNS(E1, E2, T1, T2) \ | ||
160 | static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ | ||
161 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ | ||
162 | \ | ||
163 | shft_res = do_vsrar_ ## E2(e2, sa); \ | ||
164 | T2 mask; \ | ||
165 | - mask = (1ll << sh) -1; \ | ||
166 | + mask = (1ll << sh) - 1; \ | ||
167 | if (shft_res > mask) { \ | ||
168 | return mask; \ | ||
169 | } else if (shft_res < -(mask +1)) { \ | ||
170 | @@ -XXX,XX +XXX,XX @@ SSRARNS(B, H, int16_t, int8_t) | ||
171 | SSRARNS(H, W, int32_t, int16_t) | ||
172 | SSRARNS(W, D, int64_t, int32_t) | ||
173 | |||
174 | -#define VSSRARN(NAME, BIT, T, E1, E2) \ | ||
175 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
176 | -{ \ | ||
177 | - int i; \ | ||
178 | - VReg *Vd = (VReg *)vd; \ | ||
179 | - VReg *Vj = (VReg *)vj; \ | ||
180 | - VReg *Vk = (VReg *)vk; \ | ||
181 | - \ | ||
182 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
183 | - Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
184 | - } \ | ||
185 | - Vd->D(1) = 0; \ | ||
186 | +#define VSSRARN(NAME, BIT, E1, E2, E3) \ | ||
187 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
188 | +{ \ | ||
189 | + int i, j, ofs; \ | ||
190 | + VReg *Vd = (VReg *)vd; \ | ||
191 | + VReg *Vj = (VReg *)vj; \ | ||
192 | + VReg *Vk = (VReg *)vk; \ | ||
193 | + int oprsz = simd_oprsz(desc); \ | ||
194 | + \ | ||
195 | + ofs = LSX_LEN / BIT; \ | ||
196 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
197 | + for (j = 0; j < ofs; j++) { \ | ||
198 | + Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ | ||
199 | + Vk->E3(j + ofs * i) % BIT, \ | ||
200 | + BIT/ 2 - 1); \ | ||
201 | + } \ | ||
202 | + Vd->D(2 * i + 1) = 0; \ | ||
203 | + } \ | ||
204 | } | ||
205 | |||
206 | -VSSRARN(vssrarn_b_h, 16, uint16_t, B, H) | ||
207 | -VSSRARN(vssrarn_h_w, 32, uint32_t, H, W) | ||
208 | -VSSRARN(vssrarn_w_d, 64, uint64_t, W, D) | ||
209 | +VSSRARN(vssrarn_b_h, 16, B, H, UH) | ||
210 | +VSSRARN(vssrarn_h_w, 32, H, W, UW) | ||
211 | +VSSRARN(vssrarn_w_d, 64, W, D, UD) | ||
212 | |||
213 | #define SSRLRNU(E1, E2, T1, T2, T3) \ | ||
214 | static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ | ||
215 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ | ||
216 | shft_res = do_vsrlr_ ## E2(e2, sa); \ | ||
217 | \ | ||
218 | T2 mask; \ | ||
219 | - mask = (1ull << sh) -1; \ | ||
220 | + mask = (1ull << sh) - 1; \ | ||
221 | if (shft_res > mask) { \ | ||
222 | return mask; \ | ||
223 | } else { \ | ||
224 | @@ -XXX,XX +XXX,XX @@ SSRLRNU(B, H, uint16_t, uint8_t, int16_t) | ||
225 | SSRLRNU(H, W, uint32_t, uint16_t, int32_t) | ||
226 | SSRLRNU(W, D, uint64_t, uint32_t, int64_t) | ||
227 | |||
228 | -#define VSSRLRNU(NAME, BIT, T, E1, E2) \ | ||
229 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
230 | -{ \ | ||
231 | - int i; \ | ||
232 | - VReg *Vd = (VReg *)vd; \ | ||
233 | - VReg *Vj = (VReg *)vj; \ | ||
234 | - VReg *Vk = (VReg *)vk; \ | ||
235 | - \ | ||
236 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
237 | - Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
238 | - } \ | ||
239 | - Vd->D(1) = 0; \ | ||
240 | +#define VSSRLRNU(NAME, BIT, E1, E2, E3) \ | ||
241 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
242 | +{ \ | ||
243 | + int i, j, ofs; \ | ||
244 | + VReg *Vd = (VReg *)vd; \ | ||
245 | + VReg *Vj = (VReg *)vj; \ | ||
246 | + VReg *Vk = (VReg *)vk; \ | ||
247 | + int oprsz = simd_oprsz(desc); \ | ||
248 | + \ | ||
249 | + ofs = LSX_LEN / BIT; \ | ||
250 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
251 | + for (j = 0; j < ofs; j++) { \ | ||
252 | + Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
253 | + Vk->E3(j + ofs * i) % BIT, \ | ||
254 | + BIT / 2); \ | ||
255 | + } \ | ||
256 | + Vd->D(2 * i + 1) = 0; \ | ||
257 | + } \ | ||
258 | } | ||
259 | |||
260 | -VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H) | ||
261 | -VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W) | ||
262 | -VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D) | ||
263 | +VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH) | ||
264 | +VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW) | ||
265 | +VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD) | ||
266 | |||
267 | #define SSRARNU(E1, E2, T1, T2, T3) \ | ||
268 | static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ | ||
269 | @@ -XXX,XX +XXX,XX @@ static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ | ||
270 | shft_res = do_vsrar_ ## E2(e2, sa); \ | ||
271 | } \ | ||
272 | T2 mask; \ | ||
273 | - mask = (1ull << sh) -1; \ | ||
274 | + mask = (1ull << sh) - 1; \ | ||
275 | if (shft_res > mask) { \ | ||
276 | return mask; \ | ||
277 | } else { \ | ||
278 | @@ -XXX,XX +XXX,XX @@ SSRARNU(B, H, uint16_t, uint8_t, int16_t) | ||
279 | SSRARNU(H, W, uint32_t, uint16_t, int32_t) | ||
280 | SSRARNU(W, D, uint64_t, uint32_t, int64_t) | ||
281 | |||
282 | -#define VSSRARNU(NAME, BIT, T, E1, E2) \ | ||
283 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
284 | -{ \ | ||
285 | - int i; \ | ||
286 | - VReg *Vd = (VReg *)vd; \ | ||
287 | - VReg *Vj = (VReg *)vj; \ | ||
288 | - VReg *Vk = (VReg *)vk; \ | ||
289 | - \ | ||
290 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
291 | - Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
292 | - } \ | ||
293 | - Vd->D(1) = 0; \ | ||
294 | +#define VSSRARNU(NAME, BIT, E1, E2, E3) \ | ||
295 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
296 | +{ \ | ||
297 | + int i, j, ofs; \ | ||
298 | + VReg *Vd = (VReg *)vd; \ | ||
299 | + VReg *Vj = (VReg *)vj; \ | ||
300 | + VReg *Vk = (VReg *)vk; \ | ||
301 | + int oprsz = simd_oprsz(desc); \ | ||
302 | + \ | ||
303 | + ofs = LSX_LEN / BIT; \ | ||
304 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
305 | + for (j = 0; j < ofs; j++) { \ | ||
306 | + Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
307 | + Vk->E3(j + ofs * i) % BIT, \ | ||
308 | + BIT / 2); \ | ||
309 | + } \ | ||
310 | + Vd->D(2 * i + 1) = 0; \ | ||
311 | + } \ | ||
312 | } | ||
313 | |||
314 | -VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H) | ||
315 | -VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) | ||
316 | -VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) | ||
317 | +VSSRARNU(vssrarn_bu_h, 16, B, H, UH) | ||
318 | +VSSRARNU(vssrarn_hu_w, 32, H, W, UW) | ||
319 | +VSSRARNU(vssrarn_wu_d, 64, W, D, UD) | ||
320 | + | ||
321 | +#define VSSRLRNI(NAME, BIT, E1, E2) \ | ||
322 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
323 | +{ \ | ||
324 | + int i, j, ofs; \ | ||
325 | + VReg temp = {}; \ | ||
326 | + VReg *Vd = (VReg *)vd; \ | ||
327 | + VReg *Vj = (VReg *)vj; \ | ||
328 | + int oprsz = simd_oprsz(desc); \ | ||
329 | + \ | ||
330 | + ofs = LSX_LEN / BIT; \ | ||
331 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
332 | + for (j = 0; j < ofs; j++) { \ | ||
333 | + temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ | ||
334 | + imm, BIT / 2 - 1); \ | ||
335 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \ | ||
336 | + imm, BIT / 2 - 1); \ | ||
337 | + } \ | ||
338 | + } \ | ||
339 | + *Vd = temp; \ | ||
340 | +} | ||
341 | + | ||
342 | +static void do_vssrlrni_q(VReg *Vd, VReg * Vj, | ||
343 | + uint64_t imm, int idx, Int128 mask) | ||
344 | +{ | ||
345 | + Int128 shft_res1, shft_res2, r1, r2; | ||
346 | + if (imm == 0) { | ||
347 | + shft_res1 = Vj->Q(idx); | ||
348 | + shft_res2 = Vd->Q(idx); | ||
349 | + } else { | ||
350 | + r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one()); | ||
351 | + r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one()); | ||
352 | + shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1)); | ||
353 | + shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2)); | ||
354 | + } | ||
355 | |||
356 | -#define VSSRLRNI(NAME, BIT, E1, E2) \ | ||
357 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
358 | -{ \ | ||
359 | - int i; \ | ||
360 | - VReg temp; \ | ||
361 | - VReg *Vd = (VReg *)vd; \ | ||
362 | - VReg *Vj = (VReg *)vj; \ | ||
363 | - \ | ||
364 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
365 | - temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
366 | - temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | ||
367 | - } \ | ||
368 | - *Vd = temp; \ | ||
369 | + if (int128_ult(mask, shft_res1)) { | ||
370 | + Vd->D(idx * 2) = int128_getlo(mask); | ||
371 | + }else { | ||
372 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
373 | + } | ||
374 | + | ||
375 | + if (int128_ult(mask, shft_res2)) { | ||
376 | + Vd->D(idx * 2 + 1) = int128_getlo(mask); | ||
377 | + }else { | ||
378 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
379 | + } | ||
380 | } | ||
381 | |||
382 | -#define VSSRLRNI_Q(NAME, sh) \ | ||
383 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
384 | -{ \ | ||
385 | - Int128 shft_res1, shft_res2, mask, r1, r2; \ | ||
386 | - VReg *Vd = (VReg *)vd; \ | ||
387 | - VReg *Vj = (VReg *)vj; \ | ||
388 | - \ | ||
389 | - if (imm == 0) { \ | ||
390 | - shft_res1 = Vj->Q(0); \ | ||
391 | - shft_res2 = Vd->Q(0); \ | ||
392 | - } else { \ | ||
393 | - r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \ | ||
394 | - r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \ | ||
395 | - \ | ||
396 | - shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \ | ||
397 | - shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \ | ||
398 | - } \ | ||
399 | - \ | ||
400 | - mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \ | ||
401 | - \ | ||
402 | - if (int128_ult(mask, shft_res1)) { \ | ||
403 | - Vd->D(0) = int128_getlo(mask); \ | ||
404 | - }else { \ | ||
405 | - Vd->D(0) = int128_getlo(shft_res1); \ | ||
406 | - } \ | ||
407 | - \ | ||
408 | - if (int128_ult(mask, shft_res2)) { \ | ||
409 | - Vd->D(1) = int128_getlo(mask); \ | ||
410 | - }else { \ | ||
411 | - Vd->D(1) = int128_getlo(shft_res2); \ | ||
412 | - } \ | ||
413 | +void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
414 | +{ | ||
415 | + int i; | ||
416 | + Int128 mask; | ||
417 | + VReg *Vd = (VReg *)vd; | ||
418 | + VReg *Vj = (VReg *)vj; | ||
419 | + int oprsz = simd_oprsz(desc); | ||
420 | + | ||
421 | + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
422 | + | ||
423 | + for (i = 0; i < oprsz / 16; i++) { | ||
424 | + do_vssrlrni_q(Vd, Vj, imm, i, mask); | ||
425 | + } | ||
426 | } | ||
427 | |||
428 | VSSRLRNI(vssrlrni_b_h, 16, B, H) | ||
429 | VSSRLRNI(vssrlrni_h_w, 32, H, W) | ||
430 | VSSRLRNI(vssrlrni_w_d, 64, W, D) | ||
431 | -VSSRLRNI_Q(vssrlrni_d_q, 63) | ||
432 | - | ||
433 | -#define VSSRARNI(NAME, BIT, E1, E2) \ | ||
434 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
435 | -{ \ | ||
436 | - int i; \ | ||
437 | - VReg temp; \ | ||
438 | - VReg *Vd = (VReg *)vd; \ | ||
439 | - VReg *Vj = (VReg *)vj; \ | ||
440 | - \ | ||
441 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
442 | - temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
443 | - temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | ||
444 | - } \ | ||
445 | - *Vd = temp; \ | ||
446 | -} | ||
447 | |||
448 | -void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
449 | -{ | ||
450 | - Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
451 | - VReg *Vd = (VReg *)vd; | ||
452 | - VReg *Vj = (VReg *)vj; | ||
453 | +#define VSSRARNI(NAME, BIT, E1, E2) \ | ||
454 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
455 | +{ \ | ||
456 | + int i, j, ofs; \ | ||
457 | + VReg temp = {}; \ | ||
458 | + VReg *Vd = (VReg *)vd; \ | ||
459 | + VReg *Vj = (VReg *)vj; \ | ||
460 | + int oprsz = simd_oprsz(desc); \ | ||
461 | + \ | ||
462 | + ofs = LSX_LEN / BIT; \ | ||
463 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
464 | + for (j = 0; j < ofs; j++) { \ | ||
465 | + temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ | ||
466 | + imm, BIT / 2 - 1); \ | ||
467 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \ | ||
468 | + imm, BIT / 2 - 1); \ | ||
469 | + } \ | ||
470 | + } \ | ||
471 | + *Vd = temp; \ | ||
472 | +} | ||
473 | + | ||
474 | +static void do_vssrarni_d_q(VReg *Vd, VReg *Vj, | ||
475 | + uint64_t imm, int idx, Int128 mask1, Int128 mask2) | ||
476 | +{ | ||
477 | + Int128 shft_res1, shft_res2, r1, r2; | ||
478 | |||
479 | if (imm == 0) { | ||
480 | - shft_res1 = Vj->Q(0); | ||
481 | - shft_res2 = Vd->Q(0); | ||
482 | + shft_res1 = Vj->Q(idx); | ||
483 | + shft_res2 = Vd->Q(idx); | ||
484 | } else { | ||
485 | - r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
486 | - r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
487 | - | ||
488 | - shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | ||
489 | - shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | ||
490 | + r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); | ||
491 | + r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); | ||
492 | + shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); | ||
493 | + shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); | ||
494 | } | ||
495 | - | ||
496 | - mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
497 | - mask2 = int128_lshift(int128_one(), 63); | ||
498 | - | ||
499 | - if (int128_gt(shft_res1, mask1)) { | ||
500 | - Vd->D(0) = int128_getlo(mask1); | ||
501 | + if (int128_gt(shft_res1, mask1)) { | ||
502 | + Vd->D(idx * 2) = int128_getlo(mask1); | ||
503 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { | ||
504 | - Vd->D(0) = int128_getlo(mask2); | ||
505 | + Vd->D(idx * 2) = int128_getlo(mask2); | ||
506 | } else { | ||
507 | - Vd->D(0) = int128_getlo(shft_res1); | ||
508 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
509 | } | ||
510 | |||
511 | if (int128_gt(shft_res2, mask1)) { | ||
512 | - Vd->D(1) = int128_getlo(mask1); | ||
513 | + Vd->D(idx * 2 + 1) = int128_getlo(mask1); | ||
514 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { | ||
515 | - Vd->D(1) = int128_getlo(mask2); | ||
516 | + Vd->D(idx * 2 + 1) = int128_getlo(mask2); | ||
517 | } else { | ||
518 | - Vd->D(1) = int128_getlo(shft_res2); | ||
519 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
520 | + } | ||
521 | +} | ||
522 | + | ||
523 | +void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
524 | +{ | ||
525 | + int i; | ||
526 | + Int128 mask1, mask2; | ||
527 | + VReg *Vd = (VReg *)vd; | ||
528 | + VReg *Vj = (VReg *)vj; | ||
529 | + int oprsz = simd_oprsz(desc); | ||
530 | + | ||
531 | + mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
532 | + mask2 = int128_lshift(int128_one(), 63); | ||
533 | + | ||
534 | + for (i = 0; i < oprsz / 16; i++) { | ||
535 | + do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2); | ||
536 | } | ||
537 | } | ||
538 | |||
539 | @@ -XXX,XX +XXX,XX @@ VSSRARNI(vssrarni_b_h, 16, B, H) | ||
540 | VSSRARNI(vssrarni_h_w, 32, H, W) | ||
541 | VSSRARNI(vssrarni_w_d, 64, W, D) | ||
542 | |||
543 | -#define VSSRLRNUI(NAME, BIT, E1, E2) \ | ||
544 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
545 | -{ \ | ||
546 | - int i; \ | ||
547 | - VReg temp; \ | ||
548 | - VReg *Vd = (VReg *)vd; \ | ||
549 | - VReg *Vj = (VReg *)vj; \ | ||
550 | - \ | ||
551 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
552 | - temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
553 | - temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
554 | - } \ | ||
555 | - *Vd = temp; \ | ||
556 | +#define VSSRLRNUI(NAME, BIT, E1, E2) \ | ||
557 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
558 | +{ \ | ||
559 | + int i, j, ofs; \ | ||
560 | + VReg temp = {}; \ | ||
561 | + VReg *Vd = (VReg *)vd; \ | ||
562 | + VReg *Vj = (VReg *)vj; \ | ||
563 | + int oprsz = simd_oprsz(desc); \ | ||
564 | + \ | ||
565 | + ofs = LSX_LEN / BIT; \ | ||
566 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
567 | + for (j = 0; j < ofs; j++) { \ | ||
568 | + temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
569 | + imm, BIT / 2); \ | ||
570 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \ | ||
571 | + imm, BIT / 2); \ | ||
572 | + } \ | ||
573 | + } \ | ||
574 | + *Vd = temp; \ | ||
575 | +} | ||
576 | + | ||
577 | +void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
578 | +{ | ||
579 | + int i; | ||
580 | + Int128 mask; | ||
581 | + VReg *Vd = (VReg *)vd; | ||
582 | + VReg *Vj = (VReg *)vj; | ||
583 | + int oprsz = simd_oprsz(desc); | ||
584 | + | ||
585 | + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
586 | + | ||
587 | + for (i = 0; i < oprsz / 16; i++) { | ||
588 | + do_vssrlrni_q(Vd, Vj, imm, i, mask); | ||
589 | + } | ||
590 | } | ||
591 | |||
592 | VSSRLRNUI(vssrlrni_bu_h, 16, B, H) | ||
593 | VSSRLRNUI(vssrlrni_hu_w, 32, H, W) | ||
594 | VSSRLRNUI(vssrlrni_wu_d, 64, W, D) | ||
595 | -VSSRLRNI_Q(vssrlrni_du_q, 64) | ||
596 | |||
597 | -#define VSSRARNUI(NAME, BIT, E1, E2) \ | ||
598 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
599 | -{ \ | ||
600 | - int i; \ | ||
601 | - VReg temp; \ | ||
602 | - VReg *Vd = (VReg *)vd; \ | ||
603 | - VReg *Vj = (VReg *)vj; \ | ||
604 | - \ | ||
605 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
606 | - temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
607 | - temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
608 | - } \ | ||
609 | - *Vd = temp; \ | ||
610 | -} | ||
611 | - | ||
612 | -void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
613 | -{ | ||
614 | - Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
615 | - VReg *Vd = (VReg *)vd; | ||
616 | - VReg *Vj = (VReg *)vj; | ||
617 | +#define VSSRARNUI(NAME, BIT, E1, E2) \ | ||
618 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
619 | +{ \ | ||
620 | + int i, j, ofs; \ | ||
621 | + VReg temp = {}; \ | ||
622 | + VReg *Vd = (VReg *)vd; \ | ||
623 | + VReg *Vj = (VReg *)vj; \ | ||
624 | + int oprsz = simd_oprsz(desc); \ | ||
625 | + \ | ||
626 | + ofs = LSX_LEN / BIT; \ | ||
627 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
628 | + for (j = 0; j < ofs; j++) { \ | ||
629 | + temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ | ||
630 | + imm, BIT / 2); \ | ||
631 | + temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \ | ||
632 | + imm, BIT / 2); \ | ||
633 | + } \ | ||
634 | + } \ | ||
635 | + *Vd = temp; \ | ||
636 | +} | ||
637 | + | ||
638 | +static void do_vssrarni_du_q(VReg *Vd, VReg *Vj, | ||
639 | + uint64_t imm, int idx, Int128 mask1, Int128 mask2) | ||
640 | +{ | ||
641 | + Int128 shft_res1, shft_res2, r1, r2; | ||
642 | |||
643 | if (imm == 0) { | ||
644 | - shft_res1 = Vj->Q(0); | ||
645 | - shft_res2 = Vd->Q(0); | ||
646 | + shft_res1 = Vj->Q(idx); | ||
647 | + shft_res2 = Vd->Q(idx); | ||
648 | } else { | ||
649 | - r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
650 | - r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
651 | - | ||
652 | - shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | ||
653 | - shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | ||
654 | + r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); | ||
655 | + r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); | ||
656 | + shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); | ||
657 | + shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); | ||
658 | } | ||
659 | |||
660 | - if (int128_lt(Vj->Q(0), int128_zero())) { | ||
661 | + if (int128_lt(Vj->Q(idx), int128_zero())) { | ||
662 | shft_res1 = int128_zero(); | ||
663 | } | ||
664 | - if (int128_lt(Vd->Q(0), int128_zero())) { | ||
665 | + if (int128_lt(Vd->Q(idx), int128_zero())) { | ||
666 | shft_res2 = int128_zero(); | ||
667 | } | ||
668 | |||
669 | - mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
670 | - mask2 = int128_lshift(int128_one(), 64); | ||
671 | - | ||
672 | if (int128_gt(shft_res1, mask1)) { | ||
673 | - Vd->D(0) = int128_getlo(mask1); | ||
674 | + Vd->D(idx * 2) = int128_getlo(mask1); | ||
675 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { | ||
676 | - Vd->D(0) = int128_getlo(mask2); | ||
677 | + Vd->D(idx * 2) = int128_getlo(mask2); | ||
678 | } else { | ||
679 | - Vd->D(0) = int128_getlo(shft_res1); | ||
680 | + Vd->D(idx * 2) = int128_getlo(shft_res1); | ||
681 | } | ||
682 | |||
683 | if (int128_gt(shft_res2, mask1)) { | ||
684 | - Vd->D(1) = int128_getlo(mask1); | ||
685 | + Vd->D(idx * 2 + 1) = int128_getlo(mask1); | ||
686 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { | ||
687 | - Vd->D(1) = int128_getlo(mask2); | ||
688 | + Vd->D(idx * 2 + 1) = int128_getlo(mask2); | ||
689 | } else { | ||
690 | - Vd->D(1) = int128_getlo(shft_res2); | ||
691 | + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); | ||
692 | + } | ||
693 | +} | ||
694 | + | ||
695 | +void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
696 | +{ | ||
697 | + int i; | ||
698 | + Int128 mask1, mask2; | ||
699 | + VReg *Vd = (VReg *)vd; | ||
700 | + VReg *Vj = (VReg *)vj; | ||
701 | + int oprsz = simd_oprsz(desc); | ||
702 | + | ||
703 | + mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
704 | + mask2 = int128_lshift(int128_one(), 64); | ||
705 | + | ||
706 | + for (i = 0; i < oprsz / 16; i++) { | ||
707 | + do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2); | ||
708 | } | ||
709 | } | ||
710 | |||
711 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
712 | index XXXXXXX..XXXXXXX 100644 | ||
713 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
714 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
715 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d) | ||
716 | TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h) | ||
717 | TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w) | ||
718 | TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d) | ||
719 | +TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h) | ||
720 | +TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w) | ||
721 | +TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d) | ||
722 | +TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h) | ||
723 | +TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w) | ||
724 | +TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d) | ||
725 | +TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h) | ||
726 | +TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w) | ||
727 | +TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d) | ||
728 | +TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h) | ||
729 | +TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w) | ||
730 | +TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d) | ||
731 | |||
732 | TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h) | ||
733 | TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w) | ||
734 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h) | ||
735 | TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w) | ||
736 | TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d) | ||
737 | TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q) | ||
738 | +TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h) | ||
739 | +TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w) | ||
740 | +TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d) | ||
741 | +TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q) | ||
742 | +TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h) | ||
743 | +TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w) | ||
744 | +TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d) | ||
745 | +TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q) | ||
746 | +TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h) | ||
747 | +TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w) | ||
748 | +TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d) | ||
749 | +TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q) | ||
750 | +TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h) | ||
751 | +TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w) | ||
752 | +TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d) | ||
753 | +TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q) | ||
754 | |||
755 | TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b) | ||
756 | TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h) | ||
757 | -- | ||
758 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVCLO.{B/H/W/D}; | ||
3 | - XVCLZ.{B/H/W/D}. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-43-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 9 +++++++++ | ||
10 | target/loongarch/disas.c | 9 +++++++++ | ||
11 | target/loongarch/vec_helper.c | 3 ++- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++ | ||
13 | 4 files changed, 28 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5 | ||
20 | xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6 | ||
21 | xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7 | ||
22 | |||
23 | +xvclo_b 0111 01101001 11000 00000 ..... ..... @vv | ||
24 | +xvclo_h 0111 01101001 11000 00001 ..... ..... @vv | ||
25 | +xvclo_w 0111 01101001 11000 00010 ..... ..... @vv | ||
26 | +xvclo_d 0111 01101001 11000 00011 ..... ..... @vv | ||
27 | +xvclz_b 0111 01101001 11000 00100 ..... ..... @vv | ||
28 | +xvclz_h 0111 01101001 11000 00101 ..... ..... @vv | ||
29 | +xvclz_w 0111 01101001 11000 00110 ..... ..... @vv | ||
30 | +xvclz_d 0111 01101001 11000 00111 ..... ..... @vv | ||
31 | + | ||
32 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
33 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
34 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
35 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/loongarch/disas.c | ||
38 | +++ b/target/loongarch/disas.c | ||
39 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvssrarni_hu_w, vv_i) | ||
40 | INSN_LASX(xvssrarni_wu_d, vv_i) | ||
41 | INSN_LASX(xvssrarni_du_q, vv_i) | ||
42 | |||
43 | +INSN_LASX(xvclo_b, vv) | ||
44 | +INSN_LASX(xvclo_h, vv) | ||
45 | +INSN_LASX(xvclo_w, vv) | ||
46 | +INSN_LASX(xvclo_d, vv) | ||
47 | +INSN_LASX(xvclz_b, vv) | ||
48 | +INSN_LASX(xvclz_h, vv) | ||
49 | +INSN_LASX(xvclz_w, vv) | ||
50 | +INSN_LASX(xvclz_d, vv) | ||
51 | + | ||
52 | INSN_LASX(xvreplgr2vr_b, vr) | ||
53 | INSN_LASX(xvreplgr2vr_h, vr) | ||
54 | INSN_LASX(xvreplgr2vr_w, vr) | ||
55 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/loongarch/vec_helper.c | ||
58 | +++ b/target/loongarch/vec_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
60 | int i; \ | ||
61 | VReg *Vd = (VReg *)vd; \ | ||
62 | VReg *Vj = (VReg *)vj; \ | ||
63 | + int oprsz = simd_oprsz(desc); \ | ||
64 | \ | ||
65 | - for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
66 | + for (i = 0; i < oprsz / (BIT / 8); i++) \ | ||
67 | { \ | ||
68 | Vd->E(i) = DO_OP(Vj->E(i)); \ | ||
69 | } \ | ||
70 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
73 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b) | ||
75 | TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h) | ||
76 | TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w) | ||
77 | TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d) | ||
78 | +TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b) | ||
79 | +TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h) | ||
80 | +TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w) | ||
81 | +TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d) | ||
82 | +TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b) | ||
83 | +TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h) | ||
84 | +TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w) | ||
85 | +TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d) | ||
86 | |||
87 | TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b) | ||
88 | TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h) | ||
89 | -- | ||
90 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VPCNT.{B/H/W/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-44-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 +++++ | ||
9 | target/loongarch/disas.c | 5 +++++ | ||
10 | target/loongarch/vec_helper.c | 3 ++- | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++ | ||
12 | 4 files changed, 16 insertions(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvclz_h 0111 01101001 11000 00101 ..... ..... @vv | ||
19 | xvclz_w 0111 01101001 11000 00110 ..... ..... @vv | ||
20 | xvclz_d 0111 01101001 11000 00111 ..... ..... @vv | ||
21 | |||
22 | +xvpcnt_b 0111 01101001 11000 01000 ..... ..... @vv | ||
23 | +xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv | ||
24 | +xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv | ||
25 | +xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv | ||
26 | + | ||
27 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
28 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
29 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
30 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/loongarch/disas.c | ||
33 | +++ b/target/loongarch/disas.c | ||
34 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvclz_h, vv) | ||
35 | INSN_LASX(xvclz_w, vv) | ||
36 | INSN_LASX(xvclz_d, vv) | ||
37 | |||
38 | +INSN_LASX(xvpcnt_b, vv) | ||
39 | +INSN_LASX(xvpcnt_h, vv) | ||
40 | +INSN_LASX(xvpcnt_w, vv) | ||
41 | +INSN_LASX(xvpcnt_d, vv) | ||
42 | + | ||
43 | INSN_LASX(xvreplgr2vr_b, vr) | ||
44 | INSN_LASX(xvreplgr2vr_h, vr) | ||
45 | INSN_LASX(xvreplgr2vr_w, vr) | ||
46 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/loongarch/vec_helper.c | ||
49 | +++ b/target/loongarch/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
51 | int i; \ | ||
52 | VReg *Vd = (VReg *)vd; \ | ||
53 | VReg *Vj = (VReg *)vj; \ | ||
54 | + int oprsz = simd_oprsz(desc); \ | ||
55 | \ | ||
56 | - for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
57 | + for (i = 0; i < oprsz / (BIT / 8); i++) \ | ||
58 | { \ | ||
59 | Vd->E(i) = FN(Vj->E(i)); \ | ||
60 | } \ | ||
61 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
64 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
65 | @@ -XXX,XX +XXX,XX @@ TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b) | ||
66 | TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h) | ||
67 | TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w) | ||
68 | TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d) | ||
69 | +TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b) | ||
70 | +TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h) | ||
71 | +TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w) | ||
72 | +TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d) | ||
73 | |||
74 | static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, | ||
75 | void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) | ||
76 | -- | ||
77 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVBITCLR[I].{B/H/W/D}; | ||
3 | - XVBITSET[I].{B/H/W/D}; | ||
4 | - XVBITREV[I].{B/H/W/D}. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-45-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 27 +++++++++++++ | ||
11 | target/loongarch/disas.c | 25 ++++++++++++ | ||
12 | target/loongarch/vec_helper.c | 44 +++++++++++---------- | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 24 +++++++++++ | ||
14 | 4 files changed, 99 insertions(+), 21 deletions(-) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv | ||
21 | xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv | ||
22 | xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv | ||
23 | |||
24 | +xvbitclr_b 0111 01010000 11000 ..... ..... ..... @vvv | ||
25 | +xvbitclr_h 0111 01010000 11001 ..... ..... ..... @vvv | ||
26 | +xvbitclr_w 0111 01010000 11010 ..... ..... ..... @vvv | ||
27 | +xvbitclr_d 0111 01010000 11011 ..... ..... ..... @vvv | ||
28 | +xvbitclri_b 0111 01110001 00000 01 ... ..... ..... @vv_ui3 | ||
29 | +xvbitclri_h 0111 01110001 00000 1 .... ..... ..... @vv_ui4 | ||
30 | +xvbitclri_w 0111 01110001 00001 ..... ..... ..... @vv_ui5 | ||
31 | +xvbitclri_d 0111 01110001 0001 ...... ..... ..... @vv_ui6 | ||
32 | + | ||
33 | +xvbitset_b 0111 01010000 11100 ..... ..... ..... @vvv | ||
34 | +xvbitset_h 0111 01010000 11101 ..... ..... ..... @vvv | ||
35 | +xvbitset_w 0111 01010000 11110 ..... ..... ..... @vvv | ||
36 | +xvbitset_d 0111 01010000 11111 ..... ..... ..... @vvv | ||
37 | +xvbitseti_b 0111 01110001 01000 01 ... ..... ..... @vv_ui3 | ||
38 | +xvbitseti_h 0111 01110001 01000 1 .... ..... ..... @vv_ui4 | ||
39 | +xvbitseti_w 0111 01110001 01001 ..... ..... ..... @vv_ui5 | ||
40 | +xvbitseti_d 0111 01110001 0101 ...... ..... ..... @vv_ui6 | ||
41 | + | ||
42 | +xvbitrev_b 0111 01010001 00000 ..... ..... ..... @vvv | ||
43 | +xvbitrev_h 0111 01010001 00001 ..... ..... ..... @vvv | ||
44 | +xvbitrev_w 0111 01010001 00010 ..... ..... ..... @vvv | ||
45 | +xvbitrev_d 0111 01010001 00011 ..... ..... ..... @vvv | ||
46 | +xvbitrevi_b 0111 01110001 10000 01 ... ..... ..... @vv_ui3 | ||
47 | +xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4 | ||
48 | +xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5 | ||
49 | +xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6 | ||
50 | + | ||
51 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
52 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
53 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
54 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/loongarch/disas.c | ||
57 | +++ b/target/loongarch/disas.c | ||
58 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvpcnt_h, vv) | ||
59 | INSN_LASX(xvpcnt_w, vv) | ||
60 | INSN_LASX(xvpcnt_d, vv) | ||
61 | |||
62 | +INSN_LASX(xvbitclr_b, vvv) | ||
63 | +INSN_LASX(xvbitclr_h, vvv) | ||
64 | +INSN_LASX(xvbitclr_w, vvv) | ||
65 | +INSN_LASX(xvbitclr_d, vvv) | ||
66 | +INSN_LASX(xvbitclri_b, vv_i) | ||
67 | +INSN_LASX(xvbitclri_h, vv_i) | ||
68 | +INSN_LASX(xvbitclri_w, vv_i) | ||
69 | +INSN_LASX(xvbitclri_d, vv_i) | ||
70 | +INSN_LASX(xvbitset_b, vvv) | ||
71 | +INSN_LASX(xvbitset_h, vvv) | ||
72 | +INSN_LASX(xvbitset_w, vvv) | ||
73 | +INSN_LASX(xvbitset_d, vvv) | ||
74 | +INSN_LASX(xvbitseti_b, vv_i) | ||
75 | +INSN_LASX(xvbitseti_h, vv_i) | ||
76 | +INSN_LASX(xvbitseti_w, vv_i) | ||
77 | +INSN_LASX(xvbitseti_d, vv_i) | ||
78 | +INSN_LASX(xvbitrev_b, vvv) | ||
79 | +INSN_LASX(xvbitrev_h, vvv) | ||
80 | +INSN_LASX(xvbitrev_w, vvv) | ||
81 | +INSN_LASX(xvbitrev_d, vvv) | ||
82 | +INSN_LASX(xvbitrevi_b, vv_i) | ||
83 | +INSN_LASX(xvbitrevi_h, vv_i) | ||
84 | +INSN_LASX(xvbitrevi_w, vv_i) | ||
85 | +INSN_LASX(xvbitrevi_d, vv_i) | ||
86 | + | ||
87 | INSN_LASX(xvreplgr2vr_b, vr) | ||
88 | INSN_LASX(xvreplgr2vr_h, vr) | ||
89 | INSN_LASX(xvreplgr2vr_w, vr) | ||
90 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/target/loongarch/vec_helper.c | ||
93 | +++ b/target/loongarch/vec_helper.c | ||
94 | @@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_d, 64, UD, ctpop64) | ||
95 | #define DO_BITSET(a, bit) (a | 1ull << bit) | ||
96 | #define DO_BITREV(a, bit) (a ^ (1ull << bit)) | ||
97 | |||
98 | -#define DO_BIT(NAME, BIT, E, DO_OP) \ | ||
99 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
100 | -{ \ | ||
101 | - int i; \ | ||
102 | - VReg *Vd = (VReg *)vd; \ | ||
103 | - VReg *Vj = (VReg *)vj; \ | ||
104 | - VReg *Vk = (VReg *)vk; \ | ||
105 | - \ | ||
106 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
107 | - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ | ||
108 | - } \ | ||
109 | +#define DO_BIT(NAME, BIT, E, DO_OP) \ | ||
110 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
111 | +{ \ | ||
112 | + int i; \ | ||
113 | + VReg *Vd = (VReg *)vd; \ | ||
114 | + VReg *Vj = (VReg *)vj; \ | ||
115 | + VReg *Vk = (VReg *)vk; \ | ||
116 | + int oprsz = simd_oprsz(desc); \ | ||
117 | + \ | ||
118 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
119 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ | ||
120 | + } \ | ||
121 | } | ||
122 | |||
123 | DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) | ||
124 | @@ -XXX,XX +XXX,XX @@ DO_BIT(vbitrev_h, 16, UH, DO_BITREV) | ||
125 | DO_BIT(vbitrev_w, 32, UW, DO_BITREV) | ||
126 | DO_BIT(vbitrev_d, 64, UD, DO_BITREV) | ||
127 | |||
128 | -#define DO_BITI(NAME, BIT, E, DO_OP) \ | ||
129 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
130 | -{ \ | ||
131 | - int i; \ | ||
132 | - VReg *Vd = (VReg *)vd; \ | ||
133 | - VReg *Vj = (VReg *)vj; \ | ||
134 | - \ | ||
135 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
136 | - Vd->E(i) = DO_OP(Vj->E(i), imm); \ | ||
137 | - } \ | ||
138 | +#define DO_BITI(NAME, BIT, E, DO_OP) \ | ||
139 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
140 | +{ \ | ||
141 | + int i; \ | ||
142 | + VReg *Vd = (VReg *)vd; \ | ||
143 | + VReg *Vj = (VReg *)vj; \ | ||
144 | + int oprsz = simd_oprsz(desc); \ | ||
145 | + \ | ||
146 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
147 | + Vd->E(i) = DO_OP(Vj->E(i), imm); \ | ||
148 | + } \ | ||
149 | } | ||
150 | |||
151 | DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) | ||
152 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
155 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
156 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr) | ||
157 | TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr) | ||
158 | TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr) | ||
159 | TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr) | ||
160 | +TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr) | ||
161 | +TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr) | ||
162 | +TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr) | ||
163 | +TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr) | ||
164 | |||
165 | static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm, | ||
166 | void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) | ||
167 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri) | ||
168 | TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri) | ||
169 | TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri) | ||
170 | TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri) | ||
171 | +TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri) | ||
172 | +TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri) | ||
173 | +TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri) | ||
174 | +TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri) | ||
175 | |||
176 | static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
177 | uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
178 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset) | ||
179 | TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset) | ||
180 | TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset) | ||
181 | TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset) | ||
182 | +TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset) | ||
183 | +TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset) | ||
184 | +TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset) | ||
185 | +TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset) | ||
186 | |||
187 | static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
188 | int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
189 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti) | ||
190 | TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti) | ||
191 | TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti) | ||
192 | TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti) | ||
193 | +TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti) | ||
194 | +TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti) | ||
195 | +TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti) | ||
196 | +TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti) | ||
197 | |||
198 | static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
199 | uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
200 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev) | ||
201 | TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev) | ||
202 | TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev) | ||
203 | TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev) | ||
204 | +TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev) | ||
205 | +TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev) | ||
206 | +TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev) | ||
207 | +TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev) | ||
208 | |||
209 | static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
210 | int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
211 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi) | ||
212 | TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi) | ||
213 | TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi) | ||
214 | TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi) | ||
215 | +TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi) | ||
216 | +TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi) | ||
217 | +TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi) | ||
218 | +TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi) | ||
219 | |||
220 | TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b) | ||
221 | TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h) | ||
222 | -- | ||
223 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVFRSTP[I].{B/H}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-46-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/insns.decode | 5 ++++ | ||
9 | target/loongarch/disas.c | 5 ++++ | ||
10 | target/loongarch/vec_helper.c | 32 +++++++++++++-------- | ||
11 | target/loongarch/insn_trans/trans_vec.c.inc | 4 +++ | ||
12 | 4 files changed, 34 insertions(+), 12 deletions(-) | ||
13 | |||
14 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/insns.decode | ||
17 | +++ b/target/loongarch/insns.decode | ||
18 | @@ -XXX,XX +XXX,XX @@ xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4 | ||
19 | xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5 | ||
20 | xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6 | ||
21 | |||
22 | +xvfrstp_b 0111 01010010 10110 ..... ..... ..... @vvv | ||
23 | +xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv | ||
24 | +xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5 | ||
25 | +xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5 | ||
26 | + | ||
27 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
28 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
29 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
30 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/loongarch/disas.c | ||
33 | +++ b/target/loongarch/disas.c | ||
34 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvbitrevi_h, vv_i) | ||
35 | INSN_LASX(xvbitrevi_w, vv_i) | ||
36 | INSN_LASX(xvbitrevi_d, vv_i) | ||
37 | |||
38 | +INSN_LASX(xvfrstp_b, vvv) | ||
39 | +INSN_LASX(xvfrstp_h, vvv) | ||
40 | +INSN_LASX(xvfrstpi_b, vv_i) | ||
41 | +INSN_LASX(xvfrstpi_h, vv_i) | ||
42 | + | ||
43 | INSN_LASX(xvreplgr2vr_b, vr) | ||
44 | INSN_LASX(xvreplgr2vr_h, vr) | ||
45 | INSN_LASX(xvreplgr2vr_w, vr) | ||
46 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/loongarch/vec_helper.c | ||
49 | +++ b/target/loongarch/vec_helper.c | ||
50 | @@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) | ||
51 | #define VFRSTP(NAME, BIT, MASK, E) \ | ||
52 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
53 | { \ | ||
54 | - int i, m; \ | ||
55 | + int i, j, m, ofs; \ | ||
56 | VReg *Vd = (VReg *)vd; \ | ||
57 | VReg *Vj = (VReg *)vj; \ | ||
58 | VReg *Vk = (VReg *)vk; \ | ||
59 | + int oprsz = simd_oprsz(desc); \ | ||
60 | \ | ||
61 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
62 | - if (Vj->E(i) < 0) { \ | ||
63 | - break; \ | ||
64 | + ofs = LSX_LEN / BIT; \ | ||
65 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
66 | + m = Vk->E(i * ofs) & MASK; \ | ||
67 | + for (j = 0; j < ofs; j++) { \ | ||
68 | + if (Vj->E(j + ofs * i) < 0) { \ | ||
69 | + break; \ | ||
70 | + } \ | ||
71 | } \ | ||
72 | + Vd->E(m + i * ofs) = j; \ | ||
73 | } \ | ||
74 | - m = Vk->E(0) & MASK; \ | ||
75 | - Vd->E(m) = i; \ | ||
76 | } | ||
77 | |||
78 | VFRSTP(vfrstp_b, 8, 0xf, B) | ||
79 | @@ -XXX,XX +XXX,XX @@ VFRSTP(vfrstp_h, 16, 0x7, H) | ||
80 | #define VFRSTPI(NAME, BIT, E) \ | ||
81 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
82 | { \ | ||
83 | - int i, m; \ | ||
84 | + int i, j, m, ofs; \ | ||
85 | VReg *Vd = (VReg *)vd; \ | ||
86 | VReg *Vj = (VReg *)vj; \ | ||
87 | + int oprsz = simd_oprsz(desc); \ | ||
88 | \ | ||
89 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
90 | - if (Vj->E(i) < 0) { \ | ||
91 | - break; \ | ||
92 | + ofs = LSX_LEN / BIT; \ | ||
93 | + m = imm % ofs; \ | ||
94 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
95 | + for (j = 0; j < ofs; j++) { \ | ||
96 | + if (Vj->E(j + ofs * i) < 0) { \ | ||
97 | + break; \ | ||
98 | + } \ | ||
99 | } \ | ||
100 | + Vd->E(m + i * ofs) = j; \ | ||
101 | } \ | ||
102 | - m = imm % (LSX_LEN/BIT); \ | ||
103 | - Vd->E(m) = i; \ | ||
104 | } | ||
105 | |||
106 | VFRSTPI(vfrstpi_b, 8, B) | ||
107 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
110 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
111 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b) | ||
112 | TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h) | ||
113 | TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b) | ||
114 | TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h) | ||
115 | +TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b) | ||
116 | +TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h) | ||
117 | +TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b) | ||
118 | +TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h) | ||
119 | |||
120 | TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s) | ||
121 | TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d) | ||
122 | -- | ||
123 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVF{ADD/SUB/MUL/DIV}.{S/D}; | ||
3 | - XVF{MADD/MSUB/NMADD/NMSUB}.{S/D}; | ||
4 | - XVF{MAX/MIN}.{S/D}; | ||
5 | - XVF{MAXA/MINA}.{S/D}; | ||
6 | - XVFLOGB.{S/D}; | ||
7 | - XVFCLASS.{S/D}; | ||
8 | - XVF{SQRT/RECIP/RSQRT}.{S/D}. | ||
1 | 9 | ||
10 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-Id: <20230914022645.1151356-47-gaosong@loongson.cn> | ||
13 | --- | ||
14 | target/loongarch/insns.decode | 41 +++++++++++ | ||
15 | target/loongarch/disas.c | 46 +++++++++++++ | ||
16 | target/loongarch/vec_helper.c | 12 ++-- | ||
17 | target/loongarch/insn_trans/trans_vec.c.inc | 75 +++++++++++++++++---- | ||
18 | 4 files changed, 158 insertions(+), 16 deletions(-) | ||
19 | |||
20 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/loongarch/insns.decode | ||
23 | +++ b/target/loongarch/insns.decode | ||
24 | @@ -XXX,XX +XXX,XX @@ xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv | ||
25 | xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5 | ||
26 | xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5 | ||
27 | |||
28 | +xvfadd_s 0111 01010011 00001 ..... ..... ..... @vvv | ||
29 | +xvfadd_d 0111 01010011 00010 ..... ..... ..... @vvv | ||
30 | +xvfsub_s 0111 01010011 00101 ..... ..... ..... @vvv | ||
31 | +xvfsub_d 0111 01010011 00110 ..... ..... ..... @vvv | ||
32 | +xvfmul_s 0111 01010011 10001 ..... ..... ..... @vvv | ||
33 | +xvfmul_d 0111 01010011 10010 ..... ..... ..... @vvv | ||
34 | +xvfdiv_s 0111 01010011 10101 ..... ..... ..... @vvv | ||
35 | +xvfdiv_d 0111 01010011 10110 ..... ..... ..... @vvv | ||
36 | + | ||
37 | +xvfmadd_s 0000 10100001 ..... ..... ..... ..... @vvvv | ||
38 | +xvfmadd_d 0000 10100010 ..... ..... ..... ..... @vvvv | ||
39 | +xvfmsub_s 0000 10100101 ..... ..... ..... ..... @vvvv | ||
40 | +xvfmsub_d 0000 10100110 ..... ..... ..... ..... @vvvv | ||
41 | +xvfnmadd_s 0000 10101001 ..... ..... ..... ..... @vvvv | ||
42 | +xvfnmadd_d 0000 10101010 ..... ..... ..... ..... @vvvv | ||
43 | +xvfnmsub_s 0000 10101101 ..... ..... ..... ..... @vvvv | ||
44 | +xvfnmsub_d 0000 10101110 ..... ..... ..... ..... @vvvv | ||
45 | + | ||
46 | +xvfmax_s 0111 01010011 11001 ..... ..... ..... @vvv | ||
47 | +xvfmax_d 0111 01010011 11010 ..... ..... ..... @vvv | ||
48 | +xvfmin_s 0111 01010011 11101 ..... ..... ..... @vvv | ||
49 | +xvfmin_d 0111 01010011 11110 ..... ..... ..... @vvv | ||
50 | + | ||
51 | +xvfmaxa_s 0111 01010100 00001 ..... ..... ..... @vvv | ||
52 | +xvfmaxa_d 0111 01010100 00010 ..... ..... ..... @vvv | ||
53 | +xvfmina_s 0111 01010100 00101 ..... ..... ..... @vvv | ||
54 | +xvfmina_d 0111 01010100 00110 ..... ..... ..... @vvv | ||
55 | + | ||
56 | +xvflogb_s 0111 01101001 11001 10001 ..... ..... @vv | ||
57 | +xvflogb_d 0111 01101001 11001 10010 ..... ..... @vv | ||
58 | + | ||
59 | +xvfclass_s 0111 01101001 11001 10101 ..... ..... @vv | ||
60 | +xvfclass_d 0111 01101001 11001 10110 ..... ..... @vv | ||
61 | + | ||
62 | +xvfsqrt_s 0111 01101001 11001 11001 ..... ..... @vv | ||
63 | +xvfsqrt_d 0111 01101001 11001 11010 ..... ..... @vv | ||
64 | +xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv | ||
65 | +xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv | ||
66 | +xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv | ||
67 | +xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv | ||
68 | + | ||
69 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
70 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
71 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
72 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/loongarch/disas.c | ||
75 | +++ b/target/loongarch/disas.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic) | ||
77 | output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm); | ||
78 | } | ||
79 | |||
80 | +static void output_vvvv_x(DisasContext *ctx, arg_vvvv *a, const char *mnemonic) | ||
81 | +{ | ||
82 | + output(ctx, mnemonic, "x%d, x%d, x%d, x%d", a->vd, a->vj, a->vk, a->va); | ||
83 | +} | ||
84 | + | ||
85 | static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic) | ||
86 | { | ||
87 | output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk); | ||
88 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvfrstp_h, vvv) | ||
89 | INSN_LASX(xvfrstpi_b, vv_i) | ||
90 | INSN_LASX(xvfrstpi_h, vv_i) | ||
91 | |||
92 | +INSN_LASX(xvfadd_s, vvv) | ||
93 | +INSN_LASX(xvfadd_d, vvv) | ||
94 | +INSN_LASX(xvfsub_s, vvv) | ||
95 | +INSN_LASX(xvfsub_d, vvv) | ||
96 | +INSN_LASX(xvfmul_s, vvv) | ||
97 | +INSN_LASX(xvfmul_d, vvv) | ||
98 | +INSN_LASX(xvfdiv_s, vvv) | ||
99 | +INSN_LASX(xvfdiv_d, vvv) | ||
100 | + | ||
101 | +INSN_LASX(xvfmadd_s, vvvv) | ||
102 | +INSN_LASX(xvfmadd_d, vvvv) | ||
103 | +INSN_LASX(xvfmsub_s, vvvv) | ||
104 | +INSN_LASX(xvfmsub_d, vvvv) | ||
105 | +INSN_LASX(xvfnmadd_s, vvvv) | ||
106 | +INSN_LASX(xvfnmadd_d, vvvv) | ||
107 | +INSN_LASX(xvfnmsub_s, vvvv) | ||
108 | +INSN_LASX(xvfnmsub_d, vvvv) | ||
109 | + | ||
110 | +INSN_LASX(xvfmax_s, vvv) | ||
111 | +INSN_LASX(xvfmax_d, vvv) | ||
112 | +INSN_LASX(xvfmin_s, vvv) | ||
113 | +INSN_LASX(xvfmin_d, vvv) | ||
114 | + | ||
115 | +INSN_LASX(xvfmaxa_s, vvv) | ||
116 | +INSN_LASX(xvfmaxa_d, vvv) | ||
117 | +INSN_LASX(xvfmina_s, vvv) | ||
118 | +INSN_LASX(xvfmina_d, vvv) | ||
119 | + | ||
120 | +INSN_LASX(xvflogb_s, vv) | ||
121 | +INSN_LASX(xvflogb_d, vv) | ||
122 | + | ||
123 | +INSN_LASX(xvfclass_s, vv) | ||
124 | +INSN_LASX(xvfclass_d, vv) | ||
125 | + | ||
126 | +INSN_LASX(xvfsqrt_s, vv) | ||
127 | +INSN_LASX(xvfsqrt_d, vv) | ||
128 | +INSN_LASX(xvfrecip_s, vv) | ||
129 | +INSN_LASX(xvfrecip_d, vv) | ||
130 | +INSN_LASX(xvfrsqrt_s, vv) | ||
131 | +INSN_LASX(xvfrsqrt_d, vv) | ||
132 | + | ||
133 | INSN_LASX(xvreplgr2vr_b, vr) | ||
134 | INSN_LASX(xvreplgr2vr_h, vr) | ||
135 | INSN_LASX(xvreplgr2vr_w, vr) | ||
136 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
137 | index XXXXXXX..XXXXXXX 100644 | ||
138 | --- a/target/loongarch/vec_helper.c | ||
139 | +++ b/target/loongarch/vec_helper.c | ||
140 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, \ | ||
141 | VReg *Vd = (VReg *)vd; \ | ||
142 | VReg *Vj = (VReg *)vj; \ | ||
143 | VReg *Vk = (VReg *)vk; \ | ||
144 | + int oprsz = simd_oprsz(desc); \ | ||
145 | \ | ||
146 | vec_clear_cause(env); \ | ||
147 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
148 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
149 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | ||
150 | vec_update_fcsr0(env, GETPC()); \ | ||
151 | } \ | ||
152 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ | ||
153 | VReg *Vj = (VReg *)vj; \ | ||
154 | VReg *Vk = (VReg *)vk; \ | ||
155 | VReg *Va = (VReg *)va; \ | ||
156 | + int oprsz = simd_oprsz(desc); \ | ||
157 | \ | ||
158 | vec_clear_cause(env); \ | ||
159 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
160 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
161 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ | ||
162 | vec_update_fcsr0(env, GETPC()); \ | ||
163 | } \ | ||
164 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \ | ||
165 | int i; \ | ||
166 | VReg *Vd = (VReg *)vd; \ | ||
167 | VReg *Vj = (VReg *)vj; \ | ||
168 | + int oprsz = simd_oprsz(desc); \ | ||
169 | \ | ||
170 | vec_clear_cause(env); \ | ||
171 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
172 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
173 | Vd->E(i) = FN(env, Vj->E(i)); \ | ||
174 | } \ | ||
175 | } | ||
176 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \ | ||
177 | int i; \ | ||
178 | VReg *Vd = (VReg *)vd; \ | ||
179 | VReg *Vj = (VReg *)vj; \ | ||
180 | + int oprsz = simd_oprsz(desc); \ | ||
181 | \ | ||
182 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
183 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
184 | Vd->E(i) = FN(env, Vj->E(i)); \ | ||
185 | } \ | ||
186 | } | ||
187 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
188 | index XXXXXXX..XXXXXXX 100644 | ||
189 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
190 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
191 | @@ -XXX,XX +XXX,XX @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz) | ||
192 | static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
193 | gen_helper_gvec_4_ptr *fn) | ||
194 | { | ||
195 | + if (!check_vec(ctx, oprsz)) { | ||
196 | + return true; | ||
197 | + } | ||
198 | + | ||
199 | tcg_gen_gvec_4_ptr(vec_full_offset(a->vd), | ||
200 | vec_full_offset(a->vj), | ||
201 | vec_full_offset(a->vk), | ||
202 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
203 | static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, | ||
204 | gen_helper_gvec_4_ptr *fn) | ||
205 | { | ||
206 | - if (!check_vec(ctx, 16)) { | ||
207 | - return true; | ||
208 | - } | ||
209 | - | ||
210 | return gen_vvvv_ptr_vl(ctx, a, 16, fn); | ||
211 | } | ||
212 | |||
213 | +static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a, | ||
214 | + gen_helper_gvec_4_ptr *fn) | ||
215 | +{ | ||
216 | + return gen_vvvv_ptr_vl(ctx, a, 32, fn); | ||
217 | +} | ||
218 | + | ||
219 | static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
220 | gen_helper_gvec_4 *fn) | ||
221 | { | ||
222 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
223 | static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
224 | gen_helper_gvec_3_ptr *fn) | ||
225 | { | ||
226 | + if (!check_vec(ctx, oprsz)) { | ||
227 | + return true; | ||
228 | + } | ||
229 | tcg_gen_gvec_3_ptr(vec_full_offset(a->vd), | ||
230 | vec_full_offset(a->vj), | ||
231 | vec_full_offset(a->vk), | ||
232 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
233 | static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, | ||
234 | gen_helper_gvec_3_ptr *fn) | ||
235 | { | ||
236 | - if (!check_vec(ctx, 16)) { | ||
237 | - return true; | ||
238 | - } | ||
239 | - | ||
240 | return gen_vvv_ptr_vl(ctx, a, 16, fn); | ||
241 | } | ||
242 | |||
243 | +static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a, | ||
244 | + gen_helper_gvec_3_ptr *fn) | ||
245 | +{ | ||
246 | + return gen_vvv_ptr_vl(ctx, a, 32, fn); | ||
247 | +} | ||
248 | + | ||
249 | static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
250 | gen_helper_gvec_3 *fn) | ||
251 | { | ||
252 | @@ -XXX,XX +XXX,XX @@ static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) | ||
253 | static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
254 | gen_helper_gvec_2_ptr *fn) | ||
255 | { | ||
256 | + if (!check_vec(ctx, oprsz)) { | ||
257 | + return true; | ||
258 | + } | ||
259 | + | ||
260 | tcg_gen_gvec_2_ptr(vec_full_offset(a->vd), | ||
261 | vec_full_offset(a->vj), | ||
262 | cpu_env, | ||
263 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
264 | static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, | ||
265 | gen_helper_gvec_2_ptr *fn) | ||
266 | { | ||
267 | - if (!check_vec(ctx, 16)) { | ||
268 | - return true; | ||
269 | - } | ||
270 | - | ||
271 | return gen_vv_ptr_vl(ctx, a, 16, fn); | ||
272 | } | ||
273 | |||
274 | +static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a, | ||
275 | + gen_helper_gvec_2_ptr *fn) | ||
276 | +{ | ||
277 | + return gen_vv_ptr_vl(ctx, a, 32, fn); | ||
278 | +} | ||
279 | + | ||
280 | static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, | ||
281 | gen_helper_gvec_2 *fn) | ||
282 | { | ||
283 | @@ -XXX,XX +XXX,XX @@ TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s) | ||
284 | TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d) | ||
285 | TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s) | ||
286 | TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d) | ||
287 | +TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s) | ||
288 | +TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d) | ||
289 | +TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s) | ||
290 | +TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d) | ||
291 | +TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s) | ||
292 | +TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d) | ||
293 | +TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s) | ||
294 | +TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d) | ||
295 | |||
296 | TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s) | ||
297 | TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d) | ||
298 | @@ -XXX,XX +XXX,XX @@ TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s) | ||
299 | TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d) | ||
300 | TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s) | ||
301 | TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d) | ||
302 | +TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s) | ||
303 | +TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d) | ||
304 | +TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s) | ||
305 | +TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d) | ||
306 | +TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s) | ||
307 | +TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d) | ||
308 | +TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s) | ||
309 | +TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d) | ||
310 | |||
311 | TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s) | ||
312 | TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d) | ||
313 | TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s) | ||
314 | TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d) | ||
315 | +TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s) | ||
316 | +TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d) | ||
317 | +TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s) | ||
318 | +TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d) | ||
319 | |||
320 | TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s) | ||
321 | TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d) | ||
322 | TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s) | ||
323 | TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d) | ||
324 | +TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s) | ||
325 | +TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d) | ||
326 | +TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s) | ||
327 | +TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d) | ||
328 | |||
329 | TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s) | ||
330 | TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d) | ||
331 | +TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s) | ||
332 | +TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d) | ||
333 | |||
334 | TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s) | ||
335 | TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d) | ||
336 | +TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s) | ||
337 | +TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d) | ||
338 | |||
339 | TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s) | ||
340 | TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d) | ||
341 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s) | ||
342 | TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d) | ||
343 | TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) | ||
344 | TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) | ||
345 | +TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s) | ||
346 | +TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d) | ||
347 | +TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s) | ||
348 | +TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d) | ||
349 | +TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s) | ||
350 | +TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d) | ||
351 | |||
352 | TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h) | ||
353 | TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h) | ||
354 | -- | ||
355 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVFCVT{L/H}.{S.H/D.S}; | ||
3 | - XVFCVT.{H.S/S.D}; | ||
4 | - XVFRINT[{RNE/RZ/RP/RM}].{S/D}; | ||
5 | - XVFTINT[{RNE/RZ/RP/RM}].{W.S/L.D}; | ||
6 | - XVFTINT[RZ].{WU.S/LU.D}; | ||
7 | - XVFTINT[{RNE/RZ/RP/RM}].W.D; | ||
8 | - XVFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S; | ||
9 | - XVFFINT.{S.W/D.L}[U]; | ||
10 | - X[CVFFINT.S.L, VFFINT{L/H}.D.W. | ||
1 | 11 | ||
12 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-Id: <20230914022645.1151356-48-gaosong@loongson.cn> | ||
15 | --- | ||
16 | target/loongarch/insns.decode | 58 +++++ | ||
17 | target/loongarch/disas.c | 56 +++++ | ||
18 | target/loongarch/vec_helper.c | 235 +++++++++++++------- | ||
19 | target/loongarch/insn_trans/trans_vec.c.inc | 52 +++++ | ||
20 | 4 files changed, 315 insertions(+), 86 deletions(-) | ||
21 | |||
22 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/target/loongarch/insns.decode | ||
25 | +++ b/target/loongarch/insns.decode | ||
26 | @@ -XXX,XX +XXX,XX @@ xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv | ||
27 | xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv | ||
28 | xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv | ||
29 | |||
30 | +xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv | ||
31 | +xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv | ||
32 | +xvfcvtl_d_s 0111 01101001 11011 11100 ..... ..... @vv | ||
33 | +xvfcvth_d_s 0111 01101001 11011 11101 ..... ..... @vv | ||
34 | +xvfcvt_h_s 0111 01010100 01100 ..... ..... ..... @vvv | ||
35 | +xvfcvt_s_d 0111 01010100 01101 ..... ..... ..... @vvv | ||
36 | + | ||
37 | +xvfrintrne_s 0111 01101001 11010 11101 ..... ..... @vv | ||
38 | +xvfrintrne_d 0111 01101001 11010 11110 ..... ..... @vv | ||
39 | +xvfrintrz_s 0111 01101001 11010 11001 ..... ..... @vv | ||
40 | +xvfrintrz_d 0111 01101001 11010 11010 ..... ..... @vv | ||
41 | +xvfrintrp_s 0111 01101001 11010 10101 ..... ..... @vv | ||
42 | +xvfrintrp_d 0111 01101001 11010 10110 ..... ..... @vv | ||
43 | +xvfrintrm_s 0111 01101001 11010 10001 ..... ..... @vv | ||
44 | +xvfrintrm_d 0111 01101001 11010 10010 ..... ..... @vv | ||
45 | +xvfrint_s 0111 01101001 11010 01101 ..... ..... @vv | ||
46 | +xvfrint_d 0111 01101001 11010 01110 ..... ..... @vv | ||
47 | + | ||
48 | +xvftintrne_w_s 0111 01101001 11100 10100 ..... ..... @vv | ||
49 | +xvftintrne_l_d 0111 01101001 11100 10101 ..... ..... @vv | ||
50 | +xvftintrz_w_s 0111 01101001 11100 10010 ..... ..... @vv | ||
51 | +xvftintrz_l_d 0111 01101001 11100 10011 ..... ..... @vv | ||
52 | +xvftintrp_w_s 0111 01101001 11100 10000 ..... ..... @vv | ||
53 | +xvftintrp_l_d 0111 01101001 11100 10001 ..... ..... @vv | ||
54 | +xvftintrm_w_s 0111 01101001 11100 01110 ..... ..... @vv | ||
55 | +xvftintrm_l_d 0111 01101001 11100 01111 ..... ..... @vv | ||
56 | +xvftint_w_s 0111 01101001 11100 01100 ..... ..... @vv | ||
57 | +xvftint_l_d 0111 01101001 11100 01101 ..... ..... @vv | ||
58 | +xvftintrz_wu_s 0111 01101001 11100 11100 ..... ..... @vv | ||
59 | +xvftintrz_lu_d 0111 01101001 11100 11101 ..... ..... @vv | ||
60 | +xvftint_wu_s 0111 01101001 11100 10110 ..... ..... @vv | ||
61 | +xvftint_lu_d 0111 01101001 11100 10111 ..... ..... @vv | ||
62 | + | ||
63 | +xvftintrne_w_d 0111 01010100 10111 ..... ..... ..... @vvv | ||
64 | +xvftintrz_w_d 0111 01010100 10110 ..... ..... ..... @vvv | ||
65 | +xvftintrp_w_d 0111 01010100 10101 ..... ..... ..... @vvv | ||
66 | +xvftintrm_w_d 0111 01010100 10100 ..... ..... ..... @vvv | ||
67 | +xvftint_w_d 0111 01010100 10011 ..... ..... ..... @vvv | ||
68 | + | ||
69 | +xvftintrnel_l_s 0111 01101001 11101 01000 ..... ..... @vv | ||
70 | +xvftintrneh_l_s 0111 01101001 11101 01001 ..... ..... @vv | ||
71 | +xvftintrzl_l_s 0111 01101001 11101 00110 ..... ..... @vv | ||
72 | +xvftintrzh_l_s 0111 01101001 11101 00111 ..... ..... @vv | ||
73 | +xvftintrpl_l_s 0111 01101001 11101 00100 ..... ..... @vv | ||
74 | +xvftintrph_l_s 0111 01101001 11101 00101 ..... ..... @vv | ||
75 | +xvftintrml_l_s 0111 01101001 11101 00010 ..... ..... @vv | ||
76 | +xvftintrmh_l_s 0111 01101001 11101 00011 ..... ..... @vv | ||
77 | +xvftintl_l_s 0111 01101001 11101 00000 ..... ..... @vv | ||
78 | +xvftinth_l_s 0111 01101001 11101 00001 ..... ..... @vv | ||
79 | + | ||
80 | +xvffint_s_w 0111 01101001 11100 00000 ..... ..... @vv | ||
81 | +xvffint_d_l 0111 01101001 11100 00010 ..... ..... @vv | ||
82 | +xvffint_s_wu 0111 01101001 11100 00001 ..... ..... @vv | ||
83 | +xvffint_d_lu 0111 01101001 11100 00011 ..... ..... @vv | ||
84 | +xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv | ||
85 | +xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv | ||
86 | +xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv | ||
87 | + | ||
88 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
89 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
90 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
91 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
92 | index XXXXXXX..XXXXXXX 100644 | ||
93 | --- a/target/loongarch/disas.c | ||
94 | +++ b/target/loongarch/disas.c | ||
95 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvfrecip_d, vv) | ||
96 | INSN_LASX(xvfrsqrt_s, vv) | ||
97 | INSN_LASX(xvfrsqrt_d, vv) | ||
98 | |||
99 | +INSN_LASX(xvfcvtl_s_h, vv) | ||
100 | +INSN_LASX(xvfcvth_s_h, vv) | ||
101 | +INSN_LASX(xvfcvtl_d_s, vv) | ||
102 | +INSN_LASX(xvfcvth_d_s, vv) | ||
103 | +INSN_LASX(xvfcvt_h_s, vvv) | ||
104 | +INSN_LASX(xvfcvt_s_d, vvv) | ||
105 | + | ||
106 | +INSN_LASX(xvfrint_s, vv) | ||
107 | +INSN_LASX(xvfrint_d, vv) | ||
108 | +INSN_LASX(xvfrintrm_s, vv) | ||
109 | +INSN_LASX(xvfrintrm_d, vv) | ||
110 | +INSN_LASX(xvfrintrp_s, vv) | ||
111 | +INSN_LASX(xvfrintrp_d, vv) | ||
112 | +INSN_LASX(xvfrintrz_s, vv) | ||
113 | +INSN_LASX(xvfrintrz_d, vv) | ||
114 | +INSN_LASX(xvfrintrne_s, vv) | ||
115 | +INSN_LASX(xvfrintrne_d, vv) | ||
116 | + | ||
117 | +INSN_LASX(xvftint_w_s, vv) | ||
118 | +INSN_LASX(xvftint_l_d, vv) | ||
119 | +INSN_LASX(xvftintrm_w_s, vv) | ||
120 | +INSN_LASX(xvftintrm_l_d, vv) | ||
121 | +INSN_LASX(xvftintrp_w_s, vv) | ||
122 | +INSN_LASX(xvftintrp_l_d, vv) | ||
123 | +INSN_LASX(xvftintrz_w_s, vv) | ||
124 | +INSN_LASX(xvftintrz_l_d, vv) | ||
125 | +INSN_LASX(xvftintrne_w_s, vv) | ||
126 | +INSN_LASX(xvftintrne_l_d, vv) | ||
127 | +INSN_LASX(xvftint_wu_s, vv) | ||
128 | +INSN_LASX(xvftint_lu_d, vv) | ||
129 | +INSN_LASX(xvftintrz_wu_s, vv) | ||
130 | +INSN_LASX(xvftintrz_lu_d, vv) | ||
131 | +INSN_LASX(xvftint_w_d, vvv) | ||
132 | +INSN_LASX(xvftintrm_w_d, vvv) | ||
133 | +INSN_LASX(xvftintrp_w_d, vvv) | ||
134 | +INSN_LASX(xvftintrz_w_d, vvv) | ||
135 | +INSN_LASX(xvftintrne_w_d, vvv) | ||
136 | +INSN_LASX(xvftintl_l_s, vv) | ||
137 | +INSN_LASX(xvftinth_l_s, vv) | ||
138 | +INSN_LASX(xvftintrml_l_s, vv) | ||
139 | +INSN_LASX(xvftintrmh_l_s, vv) | ||
140 | +INSN_LASX(xvftintrpl_l_s, vv) | ||
141 | +INSN_LASX(xvftintrph_l_s, vv) | ||
142 | +INSN_LASX(xvftintrzl_l_s, vv) | ||
143 | +INSN_LASX(xvftintrzh_l_s, vv) | ||
144 | +INSN_LASX(xvftintrnel_l_s, vv) | ||
145 | +INSN_LASX(xvftintrneh_l_s, vv) | ||
146 | + | ||
147 | +INSN_LASX(xvffint_s_w, vv) | ||
148 | +INSN_LASX(xvffint_s_wu, vv) | ||
149 | +INSN_LASX(xvffint_d_l, vv) | ||
150 | +INSN_LASX(xvffint_d_lu, vv) | ||
151 | +INSN_LASX(xvffintl_d_w, vv) | ||
152 | +INSN_LASX(xvffinth_d_w, vv) | ||
153 | +INSN_LASX(xvffint_s_l, vvv) | ||
154 | + | ||
155 | INSN_LASX(xvreplgr2vr_b, vr) | ||
156 | INSN_LASX(xvreplgr2vr_h, vr) | ||
157 | INSN_LASX(xvreplgr2vr_w, vr) | ||
158 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
159 | index XXXXXXX..XXXXXXX 100644 | ||
160 | --- a/target/loongarch/vec_helper.c | ||
161 | +++ b/target/loongarch/vec_helper.c | ||
162 | @@ -XXX,XX +XXX,XX @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status) | ||
163 | void HELPER(vfcvtl_s_h)(void *vd, void *vj, | ||
164 | CPULoongArchState *env, uint32_t desc) | ||
165 | { | ||
166 | - int i; | ||
167 | - VReg temp; | ||
168 | + int i, j, ofs; | ||
169 | + VReg temp = {}; | ||
170 | VReg *Vd = (VReg *)vd; | ||
171 | VReg *Vj = (VReg *)vj; | ||
172 | + int oprsz = simd_oprsz(desc); | ||
173 | |||
174 | + ofs = LSX_LEN / 32; | ||
175 | vec_clear_cause(env); | ||
176 | - for (i = 0; i < LSX_LEN/32; i++) { | ||
177 | - temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status); | ||
178 | + for (i = 0; i < oprsz / 16; i++) { | ||
179 | + for (j = 0; j < ofs; j++) { | ||
180 | + temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i), | ||
181 | + &env->fp_status); | ||
182 | + } | ||
183 | vec_update_fcsr0(env, GETPC()); | ||
184 | } | ||
185 | *Vd = temp; | ||
186 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_s_h)(void *vd, void *vj, | ||
187 | void HELPER(vfcvtl_d_s)(void *vd, void *vj, | ||
188 | CPULoongArchState *env, uint32_t desc) | ||
189 | { | ||
190 | - int i; | ||
191 | - VReg temp; | ||
192 | + int i, j, ofs; | ||
193 | + VReg temp = {}; | ||
194 | VReg *Vd = (VReg *)vd; | ||
195 | VReg *Vj = (VReg *)vj; | ||
196 | + int oprsz = simd_oprsz(desc); | ||
197 | |||
198 | + ofs = LSX_LEN / 64; | ||
199 | vec_clear_cause(env); | ||
200 | - for (i = 0; i < LSX_LEN/64; i++) { | ||
201 | - temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status); | ||
202 | + for (i = 0; i < oprsz / 16; i++) { | ||
203 | + for (j = 0; j < ofs; j++) { | ||
204 | + temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i), | ||
205 | + &env->fp_status); | ||
206 | + } | ||
207 | vec_update_fcsr0(env, GETPC()); | ||
208 | } | ||
209 | *Vd = temp; | ||
210 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvtl_d_s)(void *vd, void *vj, | ||
211 | void HELPER(vfcvth_s_h)(void *vd, void *vj, | ||
212 | CPULoongArchState *env, uint32_t desc) | ||
213 | { | ||
214 | - int i; | ||
215 | - VReg temp; | ||
216 | + int i, j, ofs; | ||
217 | + VReg temp = {}; | ||
218 | VReg *Vd = (VReg *)vd; | ||
219 | VReg *Vj = (VReg *)vj; | ||
220 | + int oprsz = simd_oprsz(desc); | ||
221 | |||
222 | + ofs = LSX_LEN / 32; | ||
223 | vec_clear_cause(env); | ||
224 | - for (i = 0; i < LSX_LEN/32; i++) { | ||
225 | - temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status); | ||
226 | + for (i = 0; i < oprsz / 16; i++) { | ||
227 | + for (j = 0; j < ofs; j++) { | ||
228 | + temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)), | ||
229 | + &env->fp_status); | ||
230 | + } | ||
231 | vec_update_fcsr0(env, GETPC()); | ||
232 | } | ||
233 | *Vd = temp; | ||
234 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_s_h)(void *vd, void *vj, | ||
235 | void HELPER(vfcvth_d_s)(void *vd, void *vj, | ||
236 | CPULoongArchState *env, uint32_t desc) | ||
237 | { | ||
238 | - int i; | ||
239 | - VReg temp; | ||
240 | + int i, j, ofs; | ||
241 | + VReg temp = {}; | ||
242 | VReg *Vd = (VReg *)vd; | ||
243 | VReg *Vj = (VReg *)vj; | ||
244 | + int oprsz = simd_oprsz(desc); | ||
245 | |||
246 | + ofs = LSX_LEN / 64; | ||
247 | vec_clear_cause(env); | ||
248 | - for (i = 0; i < LSX_LEN/64; i++) { | ||
249 | - temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status); | ||
250 | + for (i = 0; i < oprsz / 16; i++) { | ||
251 | + for (j = 0; j < ofs; j++) { | ||
252 | + temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)), | ||
253 | + &env->fp_status); | ||
254 | + } | ||
255 | vec_update_fcsr0(env, GETPC()); | ||
256 | } | ||
257 | *Vd = temp; | ||
258 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvth_d_s)(void *vd, void *vj, | ||
259 | void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, | ||
260 | CPULoongArchState *env, uint32_t desc) | ||
261 | { | ||
262 | - int i; | ||
263 | - VReg temp; | ||
264 | + int i, j, ofs; | ||
265 | + VReg temp = {}; | ||
266 | VReg *Vd = (VReg *)vd; | ||
267 | VReg *Vj = (VReg *)vj; | ||
268 | VReg *Vk = (VReg *)vk; | ||
269 | + int oprsz = simd_oprsz(desc); | ||
270 | |||
271 | + ofs = LSX_LEN / 32; | ||
272 | vec_clear_cause(env); | ||
273 | - for(i = 0; i < LSX_LEN/32; i++) { | ||
274 | - temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status); | ||
275 | - temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status); | ||
276 | + for(i = 0; i < oprsz / 16; i++) { | ||
277 | + for (j = 0; j < ofs; j++) { | ||
278 | + temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i), | ||
279 | + &env->fp_status); | ||
280 | + temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i), | ||
281 | + &env->fp_status); | ||
282 | + } | ||
283 | vec_update_fcsr0(env, GETPC()); | ||
284 | } | ||
285 | *Vd = temp; | ||
286 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, | ||
287 | void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, | ||
288 | CPULoongArchState *env, uint32_t desc) | ||
289 | { | ||
290 | - int i; | ||
291 | - VReg temp; | ||
292 | + int i, j, ofs; | ||
293 | + VReg temp = {}; | ||
294 | VReg *Vd = (VReg *)vd; | ||
295 | VReg *Vj = (VReg *)vj; | ||
296 | VReg *Vk = (VReg *)vk; | ||
297 | + int oprsz = simd_oprsz(desc); | ||
298 | |||
299 | + ofs = LSX_LEN / 64; | ||
300 | vec_clear_cause(env); | ||
301 | - for(i = 0; i < LSX_LEN/64; i++) { | ||
302 | - temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status); | ||
303 | - temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status); | ||
304 | + for(i = 0; i < oprsz / 16; i++) { | ||
305 | + for (j = 0; j < ofs; j++) { | ||
306 | + temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i), | ||
307 | + &env->fp_status); | ||
308 | + temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i), | ||
309 | + &env->fp_status); | ||
310 | + } | ||
311 | vec_update_fcsr0(env, GETPC()); | ||
312 | } | ||
313 | *Vd = temp; | ||
314 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_s)(void *vd, void *vj, | ||
315 | int i; | ||
316 | VReg *Vd = (VReg *)vd; | ||
317 | VReg *Vj = (VReg *)vj; | ||
318 | + int oprsz = simd_oprsz(desc); | ||
319 | |||
320 | vec_clear_cause(env); | ||
321 | - for (i = 0; i < 4; i++) { | ||
322 | + for (i = 0; i < oprsz / 4; i++) { | ||
323 | Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); | ||
324 | vec_update_fcsr0(env, GETPC()); | ||
325 | } | ||
326 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfrint_d)(void *vd, void *vj, | ||
327 | int i; | ||
328 | VReg *Vd = (VReg *)vd; | ||
329 | VReg *Vj = (VReg *)vj; | ||
330 | + int oprsz = simd_oprsz(desc); | ||
331 | |||
332 | vec_clear_cause(env); | ||
333 | - for (i = 0; i < 2; i++) { | ||
334 | + for (i = 0; i < oprsz / 8; i++) { | ||
335 | Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); | ||
336 | vec_update_fcsr0(env, GETPC()); | ||
337 | } | ||
338 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, \ | ||
339 | int i; \ | ||
340 | VReg *Vd = (VReg *)vd; \ | ||
341 | VReg *Vj = (VReg *)vj; \ | ||
342 | + int oprsz = simd_oprsz(desc); \ | ||
343 | \ | ||
344 | vec_clear_cause(env); \ | ||
345 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
346 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
347 | FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ | ||
348 | set_float_rounding_mode(MODE, &env->fp_status); \ | ||
349 | Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ | ||
350 | @@ -XXX,XX +XXX,XX @@ FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) | ||
351 | FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) | ||
352 | FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) | ||
353 | |||
354 | -#define FTINT_W_D(NAME, FN) \ | ||
355 | -void HELPER(NAME)(void *vd, void *vj, void *vk, \ | ||
356 | - CPULoongArchState *env, uint32_t desc) \ | ||
357 | -{ \ | ||
358 | - int i; \ | ||
359 | - VReg temp; \ | ||
360 | - VReg *Vd = (VReg *)vd; \ | ||
361 | - VReg *Vj = (VReg *)vj; \ | ||
362 | - VReg *Vk = (VReg *)vk; \ | ||
363 | - \ | ||
364 | - vec_clear_cause(env); \ | ||
365 | - for (i = 0; i < 2; i++) { \ | ||
366 | - temp.W(i + 2) = FN(env, Vj->UD(i)); \ | ||
367 | - temp.W(i) = FN(env, Vk->UD(i)); \ | ||
368 | - } \ | ||
369 | - *Vd = temp; \ | ||
370 | +#define FTINT_W_D(NAME, FN) \ | ||
371 | +void HELPER(NAME)(void *vd, void *vj, void *vk, \ | ||
372 | + CPULoongArchState *env, uint32_t desc) \ | ||
373 | +{ \ | ||
374 | + int i, j, ofs; \ | ||
375 | + VReg temp = {}; \ | ||
376 | + VReg *Vd = (VReg *)vd; \ | ||
377 | + VReg *Vj = (VReg *)vj; \ | ||
378 | + VReg *Vk = (VReg *)vk; \ | ||
379 | + int oprsz = simd_oprsz(desc); \ | ||
380 | + \ | ||
381 | + ofs = LSX_LEN / 64; \ | ||
382 | + vec_clear_cause(env); \ | ||
383 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
384 | + for (j = 0; j < ofs; j++) { \ | ||
385 | + temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \ | ||
386 | + temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \ | ||
387 | + } \ | ||
388 | + } \ | ||
389 | + *Vd = temp; \ | ||
390 | } | ||
391 | |||
392 | FTINT_W_D(vftint_w_d, do_float64_to_int32) | ||
393 | @@ -XXX,XX +XXX,XX @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | ||
394 | FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | ||
395 | FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | ||
396 | |||
397 | -#define FTINTL_L_S(NAME, FN) \ | ||
398 | -void HELPER(NAME)(void *vd, void *vj, \ | ||
399 | - CPULoongArchState *env, uint32_t desc) \ | ||
400 | -{ \ | ||
401 | - int i; \ | ||
402 | - VReg temp; \ | ||
403 | - VReg *Vd = (VReg *)vd; \ | ||
404 | - VReg *Vj = (VReg *)vj; \ | ||
405 | - \ | ||
406 | - vec_clear_cause(env); \ | ||
407 | - for (i = 0; i < 2; i++) { \ | ||
408 | - temp.D(i) = FN(env, Vj->UW(i)); \ | ||
409 | - } \ | ||
410 | - *Vd = temp; \ | ||
411 | +#define FTINTL_L_S(NAME, FN) \ | ||
412 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
413 | + CPULoongArchState *env, uint32_t desc) \ | ||
414 | +{ \ | ||
415 | + int i, j, ofs; \ | ||
416 | + VReg temp; \ | ||
417 | + VReg *Vd = (VReg *)vd; \ | ||
418 | + VReg *Vj = (VReg *)vj; \ | ||
419 | + int oprsz = simd_oprsz(desc); \ | ||
420 | + \ | ||
421 | + ofs = LSX_LEN / 64; \ | ||
422 | + vec_clear_cause(env); \ | ||
423 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
424 | + for (j = 0; j < ofs; j++) { \ | ||
425 | + temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \ | ||
426 | + } \ | ||
427 | + } \ | ||
428 | + *Vd = temp; \ | ||
429 | } | ||
430 | |||
431 | FTINTL_L_S(vftintl_l_s, do_float32_to_int64) | ||
432 | @@ -XXX,XX +XXX,XX @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) | ||
433 | FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) | ||
434 | FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) | ||
435 | |||
436 | -#define FTINTH_L_S(NAME, FN) \ | ||
437 | -void HELPER(NAME)(void *vd, void *vj, \ | ||
438 | - CPULoongArchState *env, uint32_t desc) \ | ||
439 | -{ \ | ||
440 | - int i; \ | ||
441 | - VReg temp; \ | ||
442 | - VReg *Vd = (VReg *)vd; \ | ||
443 | - VReg *Vj = (VReg *)vj; \ | ||
444 | - \ | ||
445 | - vec_clear_cause(env); \ | ||
446 | - for (i = 0; i < 2; i++) { \ | ||
447 | - temp.D(i) = FN(env, Vj->UW(i + 2)); \ | ||
448 | - } \ | ||
449 | - *Vd = temp; \ | ||
450 | +#define FTINTH_L_S(NAME, FN) \ | ||
451 | +void HELPER(NAME)(void *vd, void *vj, \ | ||
452 | + CPULoongArchState *env, uint32_t desc) \ | ||
453 | +{ \ | ||
454 | + int i, j, ofs; \ | ||
455 | + VReg temp = {}; \ | ||
456 | + VReg *Vd = (VReg *)vd; \ | ||
457 | + VReg *Vj = (VReg *)vj; \ | ||
458 | + int oprsz = simd_oprsz(desc); \ | ||
459 | + \ | ||
460 | + ofs = LSX_LEN / 64; \ | ||
461 | + vec_clear_cause(env); \ | ||
462 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
463 | + for (j = 0; j < ofs; j++) { \ | ||
464 | + temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \ | ||
465 | + } \ | ||
466 | + } \ | ||
467 | + *Vd = temp; \ | ||
468 | } | ||
469 | |||
470 | FTINTH_L_S(vftinth_l_s, do_float32_to_int64) | ||
471 | @@ -XXX,XX +XXX,XX @@ DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) | ||
472 | void HELPER(vffintl_d_w)(void *vd, void *vj, | ||
473 | CPULoongArchState *env, uint32_t desc) | ||
474 | { | ||
475 | - int i; | ||
476 | - VReg temp; | ||
477 | + int i, j, ofs; | ||
478 | + VReg temp = {}; | ||
479 | VReg *Vd = (VReg *)vd; | ||
480 | VReg *Vj = (VReg *)vj; | ||
481 | + int oprsz = simd_oprsz(desc); | ||
482 | |||
483 | + ofs = LSX_LEN / 64; | ||
484 | vec_clear_cause(env); | ||
485 | - for (i = 0; i < 2; i++) { | ||
486 | - temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status); | ||
487 | + for (i = 0; i < oprsz / 16; i++) { | ||
488 | + for (j = 0; j < ofs; j++) { | ||
489 | + temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i), | ||
490 | + &env->fp_status); | ||
491 | + } | ||
492 | vec_update_fcsr0(env, GETPC()); | ||
493 | } | ||
494 | *Vd = temp; | ||
495 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffintl_d_w)(void *vd, void *vj, | ||
496 | void HELPER(vffinth_d_w)(void *vd, void *vj, | ||
497 | CPULoongArchState *env, uint32_t desc) | ||
498 | { | ||
499 | - int i; | ||
500 | - VReg temp; | ||
501 | + int i, j, ofs; | ||
502 | + VReg temp = {}; | ||
503 | VReg *Vd = (VReg *)vd; | ||
504 | VReg *Vj = (VReg *)vj; | ||
505 | + int oprsz = simd_oprsz(desc); | ||
506 | |||
507 | + ofs = LSX_LEN / 64; | ||
508 | vec_clear_cause(env); | ||
509 | - for (i = 0; i < 2; i++) { | ||
510 | - temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status); | ||
511 | + for (i = 0; i < oprsz /16; i++) { | ||
512 | + for (j = 0; j < ofs; j++) { | ||
513 | + temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)), | ||
514 | + &env->fp_status); | ||
515 | + } | ||
516 | vec_update_fcsr0(env, GETPC()); | ||
517 | } | ||
518 | *Vd = temp; | ||
519 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffinth_d_w)(void *vd, void *vj, | ||
520 | void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, | ||
521 | CPULoongArchState *env, uint32_t desc) | ||
522 | { | ||
523 | - int i; | ||
524 | - VReg temp; | ||
525 | + int i, j, ofs; | ||
526 | + VReg temp = {}; | ||
527 | VReg *Vd = (VReg *)vd; | ||
528 | VReg *Vj = (VReg *)vj; | ||
529 | VReg *Vk = (VReg *)vk; | ||
530 | + int oprsz = simd_oprsz(desc); | ||
531 | |||
532 | + ofs = LSX_LEN / 64; | ||
533 | vec_clear_cause(env); | ||
534 | - for (i = 0; i < 2; i++) { | ||
535 | - temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status); | ||
536 | - temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status); | ||
537 | + for (i = 0; i < oprsz / 16; i++) { | ||
538 | + for (j = 0; j < ofs; j++) { | ||
539 | + temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i), | ||
540 | + &env->fp_status); | ||
541 | + temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i), | ||
542 | + &env->fp_status); | ||
543 | + } | ||
544 | vec_update_fcsr0(env, GETPC()); | ||
545 | } | ||
546 | *Vd = temp; | ||
547 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
548 | index XXXXXXX..XXXXXXX 100644 | ||
549 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
550 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
551 | @@ -XXX,XX +XXX,XX @@ TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s) | ||
552 | TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s) | ||
553 | TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s) | ||
554 | TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d) | ||
555 | +TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h) | ||
556 | +TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h) | ||
557 | +TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s) | ||
558 | +TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s) | ||
559 | +TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s) | ||
560 | +TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d) | ||
561 | |||
562 | TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s) | ||
563 | TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d) | ||
564 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s) | ||
565 | TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d) | ||
566 | TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s) | ||
567 | TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d) | ||
568 | +TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s) | ||
569 | +TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d) | ||
570 | +TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s) | ||
571 | +TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d) | ||
572 | +TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s) | ||
573 | +TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d) | ||
574 | +TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s) | ||
575 | +TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d) | ||
576 | +TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s) | ||
577 | +TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d) | ||
578 | |||
579 | TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s) | ||
580 | TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d) | ||
581 | @@ -XXX,XX +XXX,XX @@ TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s) | ||
582 | TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s) | ||
583 | TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s) | ||
584 | TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s) | ||
585 | +TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s) | ||
586 | +TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d) | ||
587 | +TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s) | ||
588 | +TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d) | ||
589 | +TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s) | ||
590 | +TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d) | ||
591 | +TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s) | ||
592 | +TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d) | ||
593 | +TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s) | ||
594 | +TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d) | ||
595 | +TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s) | ||
596 | +TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d) | ||
597 | +TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s) | ||
598 | +TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d) | ||
599 | +TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d) | ||
600 | +TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d) | ||
601 | +TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d) | ||
602 | +TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d) | ||
603 | +TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d) | ||
604 | +TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s) | ||
605 | +TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s) | ||
606 | +TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s) | ||
607 | +TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s) | ||
608 | +TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s) | ||
609 | +TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s) | ||
610 | +TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s) | ||
611 | +TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s) | ||
612 | +TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s) | ||
613 | +TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s) | ||
614 | |||
615 | TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w) | ||
616 | TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l) | ||
617 | @@ -XXX,XX +XXX,XX @@ TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu) | ||
618 | TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w) | ||
619 | TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w) | ||
620 | TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l) | ||
621 | +TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w) | ||
622 | +TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l) | ||
623 | +TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu) | ||
624 | +TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu) | ||
625 | +TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w) | ||
626 | +TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w) | ||
627 | +TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l) | ||
628 | |||
629 | static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
630 | { | ||
631 | -- | ||
632 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSEQ[I].{B/H/W/D}; | ||
3 | - XVSLE[I].{B/H/W/D}[U]; | ||
4 | - XVSLT[I].{B/H/W/D/}[U]. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-49-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 43 ++++ | ||
11 | target/loongarch/disas.c | 43 ++++ | ||
12 | target/loongarch/vec_helper.c | 23 +- | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 257 ++++++++------------ | ||
14 | 4 files changed, 201 insertions(+), 165 deletions(-) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv | ||
21 | xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv | ||
22 | xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv | ||
23 | |||
24 | +xvseq_b 0111 01000000 00000 ..... ..... ..... @vvv | ||
25 | +xvseq_h 0111 01000000 00001 ..... ..... ..... @vvv | ||
26 | +xvseq_w 0111 01000000 00010 ..... ..... ..... @vvv | ||
27 | +xvseq_d 0111 01000000 00011 ..... ..... ..... @vvv | ||
28 | +xvseqi_b 0111 01101000 00000 ..... ..... ..... @vv_i5 | ||
29 | +xvseqi_h 0111 01101000 00001 ..... ..... ..... @vv_i5 | ||
30 | +xvseqi_w 0111 01101000 00010 ..... ..... ..... @vv_i5 | ||
31 | +xvseqi_d 0111 01101000 00011 ..... ..... ..... @vv_i5 | ||
32 | + | ||
33 | +xvsle_b 0111 01000000 00100 ..... ..... ..... @vvv | ||
34 | +xvsle_h 0111 01000000 00101 ..... ..... ..... @vvv | ||
35 | +xvsle_w 0111 01000000 00110 ..... ..... ..... @vvv | ||
36 | +xvsle_d 0111 01000000 00111 ..... ..... ..... @vvv | ||
37 | +xvslei_b 0111 01101000 00100 ..... ..... ..... @vv_i5 | ||
38 | +xvslei_h 0111 01101000 00101 ..... ..... ..... @vv_i5 | ||
39 | +xvslei_w 0111 01101000 00110 ..... ..... ..... @vv_i5 | ||
40 | +xvslei_d 0111 01101000 00111 ..... ..... ..... @vv_i5 | ||
41 | +xvsle_bu 0111 01000000 01000 ..... ..... ..... @vvv | ||
42 | +xvsle_hu 0111 01000000 01001 ..... ..... ..... @vvv | ||
43 | +xvsle_wu 0111 01000000 01010 ..... ..... ..... @vvv | ||
44 | +xvsle_du 0111 01000000 01011 ..... ..... ..... @vvv | ||
45 | +xvslei_bu 0111 01101000 01000 ..... ..... ..... @vv_ui5 | ||
46 | +xvslei_hu 0111 01101000 01001 ..... ..... ..... @vv_ui5 | ||
47 | +xvslei_wu 0111 01101000 01010 ..... ..... ..... @vv_ui5 | ||
48 | +xvslei_du 0111 01101000 01011 ..... ..... ..... @vv_ui5 | ||
49 | + | ||
50 | +xvslt_b 0111 01000000 01100 ..... ..... ..... @vvv | ||
51 | +xvslt_h 0111 01000000 01101 ..... ..... ..... @vvv | ||
52 | +xvslt_w 0111 01000000 01110 ..... ..... ..... @vvv | ||
53 | +xvslt_d 0111 01000000 01111 ..... ..... ..... @vvv | ||
54 | +xvslti_b 0111 01101000 01100 ..... ..... ..... @vv_i5 | ||
55 | +xvslti_h 0111 01101000 01101 ..... ..... ..... @vv_i5 | ||
56 | +xvslti_w 0111 01101000 01110 ..... ..... ..... @vv_i5 | ||
57 | +xvslti_d 0111 01101000 01111 ..... ..... ..... @vv_i5 | ||
58 | +xvslt_bu 0111 01000000 10000 ..... ..... ..... @vvv | ||
59 | +xvslt_hu 0111 01000000 10001 ..... ..... ..... @vvv | ||
60 | +xvslt_wu 0111 01000000 10010 ..... ..... ..... @vvv | ||
61 | +xvslt_du 0111 01000000 10011 ..... ..... ..... @vvv | ||
62 | +xvslti_bu 0111 01101000 10000 ..... ..... ..... @vv_ui5 | ||
63 | +xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5 | ||
64 | +xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5 | ||
65 | +xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5 | ||
66 | + | ||
67 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
68 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
69 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
70 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/target/loongarch/disas.c | ||
73 | +++ b/target/loongarch/disas.c | ||
74 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvffintl_d_w, vv) | ||
75 | INSN_LASX(xvffinth_d_w, vv) | ||
76 | INSN_LASX(xvffint_s_l, vvv) | ||
77 | |||
78 | +INSN_LASX(xvseq_b, vvv) | ||
79 | +INSN_LASX(xvseq_h, vvv) | ||
80 | +INSN_LASX(xvseq_w, vvv) | ||
81 | +INSN_LASX(xvseq_d, vvv) | ||
82 | +INSN_LASX(xvseqi_b, vv_i) | ||
83 | +INSN_LASX(xvseqi_h, vv_i) | ||
84 | +INSN_LASX(xvseqi_w, vv_i) | ||
85 | +INSN_LASX(xvseqi_d, vv_i) | ||
86 | + | ||
87 | +INSN_LASX(xvsle_b, vvv) | ||
88 | +INSN_LASX(xvsle_h, vvv) | ||
89 | +INSN_LASX(xvsle_w, vvv) | ||
90 | +INSN_LASX(xvsle_d, vvv) | ||
91 | +INSN_LASX(xvslei_b, vv_i) | ||
92 | +INSN_LASX(xvslei_h, vv_i) | ||
93 | +INSN_LASX(xvslei_w, vv_i) | ||
94 | +INSN_LASX(xvslei_d, vv_i) | ||
95 | +INSN_LASX(xvsle_bu, vvv) | ||
96 | +INSN_LASX(xvsle_hu, vvv) | ||
97 | +INSN_LASX(xvsle_wu, vvv) | ||
98 | +INSN_LASX(xvsle_du, vvv) | ||
99 | +INSN_LASX(xvslei_bu, vv_i) | ||
100 | +INSN_LASX(xvslei_hu, vv_i) | ||
101 | +INSN_LASX(xvslei_wu, vv_i) | ||
102 | +INSN_LASX(xvslei_du, vv_i) | ||
103 | + | ||
104 | +INSN_LASX(xvslt_b, vvv) | ||
105 | +INSN_LASX(xvslt_h, vvv) | ||
106 | +INSN_LASX(xvslt_w, vvv) | ||
107 | +INSN_LASX(xvslt_d, vvv) | ||
108 | +INSN_LASX(xvslti_b, vv_i) | ||
109 | +INSN_LASX(xvslti_h, vv_i) | ||
110 | +INSN_LASX(xvslti_w, vv_i) | ||
111 | +INSN_LASX(xvslti_d, vv_i) | ||
112 | +INSN_LASX(xvslt_bu, vvv) | ||
113 | +INSN_LASX(xvslt_hu, vvv) | ||
114 | +INSN_LASX(xvslt_wu, vvv) | ||
115 | +INSN_LASX(xvslt_du, vvv) | ||
116 | +INSN_LASX(xvslti_bu, vv_i) | ||
117 | +INSN_LASX(xvslti_hu, vv_i) | ||
118 | +INSN_LASX(xvslti_wu, vv_i) | ||
119 | +INSN_LASX(xvslti_du, vv_i) | ||
120 | + | ||
121 | INSN_LASX(xvreplgr2vr_b, vr) | ||
122 | INSN_LASX(xvreplgr2vr_h, vr) | ||
123 | INSN_LASX(xvreplgr2vr_w, vr) | ||
124 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/target/loongarch/vec_helper.c | ||
127 | +++ b/target/loongarch/vec_helper.c | ||
128 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, | ||
129 | #define VSLE(a, b) (a <= b ? -1 : 0) | ||
130 | #define VSLT(a, b) (a < b ? -1 : 0) | ||
131 | |||
132 | -#define VCMPI(NAME, BIT, E, DO_OP) \ | ||
133 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
134 | -{ \ | ||
135 | - int i; \ | ||
136 | - VReg *Vd = (VReg *)vd; \ | ||
137 | - VReg *Vj = (VReg *)vj; \ | ||
138 | - typedef __typeof(Vd->E(0)) TD; \ | ||
139 | - \ | ||
140 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
141 | - Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
142 | - } \ | ||
143 | +#define VCMPI(NAME, BIT, E, DO_OP) \ | ||
144 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
145 | +{ \ | ||
146 | + int i; \ | ||
147 | + VReg *Vd = (VReg *)vd; \ | ||
148 | + VReg *Vj = (VReg *)vj; \ | ||
149 | + typedef __typeof(Vd->E(0)) TD; \ | ||
150 | + int oprsz = simd_oprsz(desc); \ | ||
151 | + \ | ||
152 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
153 | + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
154 | + } \ | ||
155 | } | ||
156 | |||
157 | VCMPI(vseqi_b, 8, B, VSEQ) | ||
158 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
159 | index XXXXXXX..XXXXXXX 100644 | ||
160 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
161 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
162 | @@ -XXX,XX +XXX,XX @@ TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w) | ||
163 | TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w) | ||
164 | TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l) | ||
165 | |||
166 | -static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
167 | +static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a, | ||
168 | + uint32_t oprsz, MemOp mop, TCGCond cond) | ||
169 | { | ||
170 | uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
171 | |||
172 | - if (!check_vec(ctx, 16)) { | ||
173 | + if (!check_vec(ctx, oprsz)) { | ||
174 | return true; | ||
175 | } | ||
176 | |||
177 | @@ -XXX,XX +XXX,XX @@ static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
178 | vj_ofs = vec_full_offset(a->vj); | ||
179 | vk_ofs = vec_full_offset(a->vk); | ||
180 | |||
181 | - tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8); | ||
182 | + tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); | ||
183 | return true; | ||
184 | } | ||
185 | |||
186 | -static void do_cmpi_vec(TCGCond cond, | ||
187 | - unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
188 | -{ | ||
189 | - tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
190 | -} | ||
191 | - | ||
192 | -static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
193 | -{ | ||
194 | - do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm); | ||
195 | -} | ||
196 | - | ||
197 | -static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
198 | -{ | ||
199 | - do_cmpi_vec(TCG_COND_LE, vece, t, a, imm); | ||
200 | -} | ||
201 | - | ||
202 | -static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
203 | -{ | ||
204 | - do_cmpi_vec(TCG_COND_LT, vece, t, a, imm); | ||
205 | -} | ||
206 | - | ||
207 | -static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
208 | -{ | ||
209 | - do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm); | ||
210 | -} | ||
211 | - | ||
212 | -static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
213 | -{ | ||
214 | - do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm); | ||
215 | -} | ||
216 | - | ||
217 | -#define DO_CMPI_S(NAME) \ | ||
218 | -static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
219 | -{ \ | ||
220 | - uint32_t vd_ofs, vj_ofs; \ | ||
221 | - \ | ||
222 | - if (!check_vec(ctx, 16)) { \ | ||
223 | - return true; \ | ||
224 | - } \ | ||
225 | - \ | ||
226 | - static const TCGOpcode vecop_list[] = { \ | ||
227 | - INDEX_op_cmp_vec, 0 \ | ||
228 | - }; \ | ||
229 | - static const GVecGen2i op[4] = { \ | ||
230 | - { \ | ||
231 | - .fniv = gen_## NAME ##_s_vec, \ | ||
232 | - .fnoi = gen_helper_## NAME ##_b, \ | ||
233 | - .opt_opc = vecop_list, \ | ||
234 | - .vece = MO_8 \ | ||
235 | - }, \ | ||
236 | - { \ | ||
237 | - .fniv = gen_## NAME ##_s_vec, \ | ||
238 | - .fnoi = gen_helper_## NAME ##_h, \ | ||
239 | - .opt_opc = vecop_list, \ | ||
240 | - .vece = MO_16 \ | ||
241 | - }, \ | ||
242 | - { \ | ||
243 | - .fniv = gen_## NAME ##_s_vec, \ | ||
244 | - .fnoi = gen_helper_## NAME ##_w, \ | ||
245 | - .opt_opc = vecop_list, \ | ||
246 | - .vece = MO_32 \ | ||
247 | - }, \ | ||
248 | - { \ | ||
249 | - .fniv = gen_## NAME ##_s_vec, \ | ||
250 | - .fnoi = gen_helper_## NAME ##_d, \ | ||
251 | - .opt_opc = vecop_list, \ | ||
252 | - .vece = MO_64 \ | ||
253 | - } \ | ||
254 | - }; \ | ||
255 | - \ | ||
256 | - vd_ofs = vec_full_offset(a->vd); \ | ||
257 | - vj_ofs = vec_full_offset(a->vj); \ | ||
258 | - \ | ||
259 | - tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \ | ||
260 | - \ | ||
261 | - return true; \ | ||
262 | -} | ||
263 | - | ||
264 | -DO_CMPI_S(vseqi) | ||
265 | -DO_CMPI_S(vslei) | ||
266 | -DO_CMPI_S(vslti) | ||
267 | - | ||
268 | -#define DO_CMPI_U(NAME) \ | ||
269 | -static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
270 | -{ \ | ||
271 | - uint32_t vd_ofs, vj_ofs; \ | ||
272 | - \ | ||
273 | - if (!check_vec(ctx, 16)) { \ | ||
274 | - return true; \ | ||
275 | - } \ | ||
276 | - \ | ||
277 | - static const TCGOpcode vecop_list[] = { \ | ||
278 | - INDEX_op_cmp_vec, 0 \ | ||
279 | - }; \ | ||
280 | - static const GVecGen2i op[4] = { \ | ||
281 | - { \ | ||
282 | - .fniv = gen_## NAME ##_u_vec, \ | ||
283 | - .fnoi = gen_helper_## NAME ##_bu, \ | ||
284 | - .opt_opc = vecop_list, \ | ||
285 | - .vece = MO_8 \ | ||
286 | - }, \ | ||
287 | - { \ | ||
288 | - .fniv = gen_## NAME ##_u_vec, \ | ||
289 | - .fnoi = gen_helper_## NAME ##_hu, \ | ||
290 | - .opt_opc = vecop_list, \ | ||
291 | - .vece = MO_16 \ | ||
292 | - }, \ | ||
293 | - { \ | ||
294 | - .fniv = gen_## NAME ##_u_vec, \ | ||
295 | - .fnoi = gen_helper_## NAME ##_wu, \ | ||
296 | - .opt_opc = vecop_list, \ | ||
297 | - .vece = MO_32 \ | ||
298 | - }, \ | ||
299 | - { \ | ||
300 | - .fniv = gen_## NAME ##_u_vec, \ | ||
301 | - .fnoi = gen_helper_## NAME ##_du, \ | ||
302 | - .opt_opc = vecop_list, \ | ||
303 | - .vece = MO_64 \ | ||
304 | - } \ | ||
305 | - }; \ | ||
306 | - \ | ||
307 | - vd_ofs = vec_full_offset(a->vd); \ | ||
308 | - vj_ofs = vec_full_offset(a->vj); \ | ||
309 | - \ | ||
310 | - tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \ | ||
311 | - \ | ||
312 | - return true; \ | ||
313 | -} | ||
314 | - | ||
315 | -DO_CMPI_U(vslei) | ||
316 | -DO_CMPI_U(vslti) | ||
317 | +static bool do_cmp(DisasContext *ctx, arg_vvv *a, | ||
318 | + MemOp mop, TCGCond cond) | ||
319 | +{ | ||
320 | + return do_cmp_vl(ctx, a, 16, mop, cond); | ||
321 | +} | ||
322 | + | ||
323 | +static bool do_xcmp(DisasContext *ctx, arg_vvv *a, | ||
324 | + MemOp mop, TCGCond cond) | ||
325 | +{ | ||
326 | + return do_cmp_vl(ctx, a, 32, mop, cond); | ||
327 | +} | ||
328 | + | ||
329 | +static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a, | ||
330 | + uint32_t oprsz, MemOp mop, TCGCond cond) | ||
331 | +{ | ||
332 | + uint32_t vd_ofs, vj_ofs; | ||
333 | + | ||
334 | + if (!check_vec(ctx, oprsz)) { | ||
335 | + return true; | ||
336 | + } | ||
337 | + | ||
338 | + vd_ofs = vec_full_offset(a->vd); | ||
339 | + vj_ofs = vec_full_offset(a->vj); | ||
340 | + | ||
341 | + tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); | ||
342 | + return true; | ||
343 | +} | ||
344 | + | ||
345 | +static bool do_cmpi(DisasContext *ctx, arg_vv_i *a, | ||
346 | + MemOp mop, TCGCond cond) | ||
347 | +{ | ||
348 | + return do_cmpi_vl(ctx, a, 16, mop, cond); | ||
349 | +} | ||
350 | + | ||
351 | +static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a, | ||
352 | + MemOp mop, TCGCond cond) | ||
353 | +{ | ||
354 | + return do_cmpi_vl(ctx, a, 32, mop, cond); | ||
355 | +} | ||
356 | |||
357 | TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ) | ||
358 | TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ) | ||
359 | TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ) | ||
360 | TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ) | ||
361 | -TRANS(vseqi_b, LSX, do_vseqi_s, MO_8) | ||
362 | -TRANS(vseqi_h, LSX, do_vseqi_s, MO_16) | ||
363 | -TRANS(vseqi_w, LSX, do_vseqi_s, MO_32) | ||
364 | -TRANS(vseqi_d, LSX, do_vseqi_s, MO_64) | ||
365 | +TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ) | ||
366 | +TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ) | ||
367 | +TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ) | ||
368 | +TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ) | ||
369 | +TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ) | ||
370 | +TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ) | ||
371 | +TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ) | ||
372 | +TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ) | ||
373 | +TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ) | ||
374 | +TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ) | ||
375 | +TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ) | ||
376 | +TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ) | ||
377 | |||
378 | TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE) | ||
379 | TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE) | ||
380 | TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE) | ||
381 | TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE) | ||
382 | -TRANS(vslei_b, LSX, do_vslei_s, MO_8) | ||
383 | -TRANS(vslei_h, LSX, do_vslei_s, MO_16) | ||
384 | -TRANS(vslei_w, LSX, do_vslei_s, MO_32) | ||
385 | -TRANS(vslei_d, LSX, do_vslei_s, MO_64) | ||
386 | +TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE) | ||
387 | +TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE) | ||
388 | +TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE) | ||
389 | +TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE) | ||
390 | TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU) | ||
391 | TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU) | ||
392 | TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU) | ||
393 | TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU) | ||
394 | -TRANS(vslei_bu, LSX, do_vslei_u, MO_8) | ||
395 | -TRANS(vslei_hu, LSX, do_vslei_u, MO_16) | ||
396 | -TRANS(vslei_wu, LSX, do_vslei_u, MO_32) | ||
397 | -TRANS(vslei_du, LSX, do_vslei_u, MO_64) | ||
398 | +TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU) | ||
399 | +TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU) | ||
400 | +TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU) | ||
401 | +TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU) | ||
402 | +TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE) | ||
403 | +TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE) | ||
404 | +TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE) | ||
405 | +TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE) | ||
406 | +TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE) | ||
407 | +TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE) | ||
408 | +TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE) | ||
409 | +TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE) | ||
410 | +TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU) | ||
411 | +TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU) | ||
412 | +TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU) | ||
413 | +TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU) | ||
414 | +TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU) | ||
415 | +TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU) | ||
416 | +TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU) | ||
417 | +TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU) | ||
418 | |||
419 | TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT) | ||
420 | TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT) | ||
421 | TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT) | ||
422 | TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT) | ||
423 | -TRANS(vslti_b, LSX, do_vslti_s, MO_8) | ||
424 | -TRANS(vslti_h, LSX, do_vslti_s, MO_16) | ||
425 | -TRANS(vslti_w, LSX, do_vslti_s, MO_32) | ||
426 | -TRANS(vslti_d, LSX, do_vslti_s, MO_64) | ||
427 | +TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT) | ||
428 | +TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT) | ||
429 | +TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT) | ||
430 | +TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT) | ||
431 | TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU) | ||
432 | TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU) | ||
433 | TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU) | ||
434 | TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU) | ||
435 | -TRANS(vslti_bu, LSX, do_vslti_u, MO_8) | ||
436 | -TRANS(vslti_hu, LSX, do_vslti_u, MO_16) | ||
437 | -TRANS(vslti_wu, LSX, do_vslti_u, MO_32) | ||
438 | -TRANS(vslti_du, LSX, do_vslti_u, MO_64) | ||
439 | +TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU) | ||
440 | +TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU) | ||
441 | +TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU) | ||
442 | +TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU) | ||
443 | +TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT) | ||
444 | +TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT) | ||
445 | +TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT) | ||
446 | +TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT) | ||
447 | +TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT) | ||
448 | +TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT) | ||
449 | +TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT) | ||
450 | +TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT) | ||
451 | +TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU) | ||
452 | +TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU) | ||
453 | +TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU) | ||
454 | +TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU) | ||
455 | +TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU) | ||
456 | +TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU) | ||
457 | +TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU) | ||
458 | +TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) | ||
459 | |||
460 | static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a) | ||
461 | { | ||
462 | -- | ||
463 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVFCMP.cond.{S/D}. | ||
1 | 3 | ||
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-Id: <20230914022645.1151356-50-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/helper.h | 8 +- | ||
9 | target/loongarch/insns.decode | 3 + | ||
10 | target/loongarch/disas.c | 93 +++++++++++++++++++++ | ||
11 | target/loongarch/vec_helper.c | 4 +- | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 31 ++++--- | ||
13 | 5 files changed, 117 insertions(+), 22 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/helper.h | ||
18 | +++ b/target/loongarch/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
20 | DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
21 | DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
22 | |||
23 | -DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32) | ||
24 | -DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32) | ||
25 | -DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32) | ||
26 | -DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32) | ||
27 | +DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32) | ||
28 | +DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32) | ||
29 | +DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32) | ||
30 | +DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32) | ||
31 | |||
32 | DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
33 | |||
34 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/loongarch/insns.decode | ||
37 | +++ b/target/loongarch/insns.decode | ||
38 | @@ -XXX,XX +XXX,XX @@ xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5 | ||
39 | xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5 | ||
40 | xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5 | ||
41 | |||
42 | +xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond | ||
43 | +xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond | ||
44 | + | ||
45 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
46 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
47 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
48 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/loongarch/disas.c | ||
51 | +++ b/target/loongarch/disas.c | ||
52 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvslti_hu, vv_i) | ||
53 | INSN_LASX(xvslti_wu, vv_i) | ||
54 | INSN_LASX(xvslti_du, vv_i) | ||
55 | |||
56 | +#define output_xvfcmp(C, PREFIX, SUFFIX) \ | ||
57 | +{ \ | ||
58 | + (C)->info->fprintf_func((C)->info->stream, "%08x %s%s\tx%d, x%d, x%d", \ | ||
59 | + (C)->insn, PREFIX, SUFFIX, a->vd, \ | ||
60 | + a->vj, a->vk); \ | ||
61 | +} | ||
62 | +static bool output_xxx_fcond(DisasContext *ctx, arg_vvv_fcond * a, | ||
63 | + const char *suffix) | ||
64 | +{ | ||
65 | + bool ret = true; | ||
66 | + switch (a->fcond) { | ||
67 | + case 0x0: | ||
68 | + output_xvfcmp(ctx, "xvfcmp_caf_", suffix); | ||
69 | + break; | ||
70 | + case 0x1: | ||
71 | + output_xvfcmp(ctx, "xvfcmp_saf_", suffix); | ||
72 | + break; | ||
73 | + case 0x2: | ||
74 | + output_xvfcmp(ctx, "xvfcmp_clt_", suffix); | ||
75 | + break; | ||
76 | + case 0x3: | ||
77 | + output_xvfcmp(ctx, "xvfcmp_slt_", suffix); | ||
78 | + break; | ||
79 | + case 0x4: | ||
80 | + output_xvfcmp(ctx, "xvfcmp_ceq_", suffix); | ||
81 | + break; | ||
82 | + case 0x5: | ||
83 | + output_xvfcmp(ctx, "xvfcmp_seq_", suffix); | ||
84 | + break; | ||
85 | + case 0x6: | ||
86 | + output_xvfcmp(ctx, "xvfcmp_cle_", suffix); | ||
87 | + break; | ||
88 | + case 0x7: | ||
89 | + output_xvfcmp(ctx, "xvfcmp_sle_", suffix); | ||
90 | + break; | ||
91 | + case 0x8: | ||
92 | + output_xvfcmp(ctx, "xvfcmp_cun_", suffix); | ||
93 | + break; | ||
94 | + case 0x9: | ||
95 | + output_xvfcmp(ctx, "xvfcmp_sun_", suffix); | ||
96 | + break; | ||
97 | + case 0xA: | ||
98 | + output_xvfcmp(ctx, "xvfcmp_cult_", suffix); | ||
99 | + break; | ||
100 | + case 0xB: | ||
101 | + output_xvfcmp(ctx, "xvfcmp_sult_", suffix); | ||
102 | + break; | ||
103 | + case 0xC: | ||
104 | + output_xvfcmp(ctx, "xvfcmp_cueq_", suffix); | ||
105 | + break; | ||
106 | + case 0xD: | ||
107 | + output_xvfcmp(ctx, "xvfcmp_sueq_", suffix); | ||
108 | + break; | ||
109 | + case 0xE: | ||
110 | + output_xvfcmp(ctx, "xvfcmp_cule_", suffix); | ||
111 | + break; | ||
112 | + case 0xF: | ||
113 | + output_xvfcmp(ctx, "xvfcmp_sule_", suffix); | ||
114 | + break; | ||
115 | + case 0x10: | ||
116 | + output_xvfcmp(ctx, "xvfcmp_cne_", suffix); | ||
117 | + break; | ||
118 | + case 0x11: | ||
119 | + output_xvfcmp(ctx, "xvfcmp_sne_", suffix); | ||
120 | + break; | ||
121 | + case 0x14: | ||
122 | + output_xvfcmp(ctx, "xvfcmp_cor_", suffix); | ||
123 | + break; | ||
124 | + case 0x15: | ||
125 | + output_xvfcmp(ctx, "xvfcmp_sor_", suffix); | ||
126 | + break; | ||
127 | + case 0x18: | ||
128 | + output_xvfcmp(ctx, "xvfcmp_cune_", suffix); | ||
129 | + break; | ||
130 | + case 0x19: | ||
131 | + output_xvfcmp(ctx, "xvfcmp_sune_", suffix); | ||
132 | + break; | ||
133 | + default: | ||
134 | + ret = false; | ||
135 | + } | ||
136 | + return ret; | ||
137 | +} | ||
138 | + | ||
139 | +#define LASX_FCMP_INSN(suffix) \ | ||
140 | +static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \ | ||
141 | + arg_vvv_fcond * a) \ | ||
142 | +{ \ | ||
143 | + return output_xxx_fcond(ctx, a, #suffix); \ | ||
144 | +} | ||
145 | + | ||
146 | +LASX_FCMP_INSN(s) | ||
147 | +LASX_FCMP_INSN(d) | ||
148 | + | ||
149 | INSN_LASX(xvreplgr2vr_b, vr) | ||
150 | INSN_LASX(xvreplgr2vr_h, vr) | ||
151 | INSN_LASX(xvreplgr2vr_w, vr) | ||
152 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/target/loongarch/vec_helper.c | ||
155 | +++ b/target/loongarch/vec_helper.c | ||
156 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfcmp_common(CPULoongArchState *env, | ||
157 | } | ||
158 | |||
159 | #define VFCMP(NAME, BIT, E, FN) \ | ||
160 | -void HELPER(NAME)(CPULoongArchState *env, \ | ||
161 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \ | ||
162 | uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ | ||
163 | { \ | ||
164 | int i; \ | ||
165 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | ||
166 | VReg *Vk = &(env->fpr[vk].vreg); \ | ||
167 | \ | ||
168 | vec_clear_cause(env); \ | ||
169 | - for (i = 0; i < LSX_LEN/BIT ; i++) { \ | ||
170 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
171 | FloatRelation cmp; \ | ||
172 | cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | ||
173 | t.E(i) = vfcmp_common(env, cmp, flags); \ | ||
174 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
177 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
178 | @@ -XXX,XX +XXX,XX @@ TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU) | ||
179 | TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU) | ||
180 | TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) | ||
181 | |||
182 | -static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a) | ||
183 | +static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) | ||
184 | { | ||
185 | uint32_t flags; | ||
186 | - void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
187 | + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
188 | TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
189 | TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
190 | TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
191 | + TCGv_i32 oprsz = tcg_constant_i32(sz); | ||
192 | |||
193 | - if (!avail_LSX(ctx)) { | ||
194 | - return false; | ||
195 | - } | ||
196 | - | ||
197 | - if (!check_vec(ctx, 16)) { | ||
198 | + if (!check_vec(ctx, sz)) { | ||
199 | return true; | ||
200 | } | ||
201 | |||
202 | fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); | ||
203 | flags = get_fcmp_flags(a->fcond >> 1); | ||
204 | - fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags)); | ||
205 | + fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); | ||
206 | |||
207 | return true; | ||
208 | } | ||
209 | |||
210 | -static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a) | ||
211 | +static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) | ||
212 | { | ||
213 | uint32_t flags; | ||
214 | - void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
215 | + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
216 | TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
217 | TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
218 | TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
219 | + TCGv_i32 oprsz = tcg_constant_i32(sz); | ||
220 | |||
221 | - if (!avail_LSX(ctx)) { | ||
222 | - return false; | ||
223 | - } | ||
224 | - | ||
225 | - if (!check_vec(ctx, 16)) { | ||
226 | + if (!check_vec(ctx, sz)) { | ||
227 | return true; | ||
228 | } | ||
229 | |||
230 | fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); | ||
231 | flags = get_fcmp_flags(a->fcond >> 1); | ||
232 | - fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags)); | ||
233 | + fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); | ||
234 | |||
235 | return true; | ||
236 | } | ||
237 | |||
238 | +TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16) | ||
239 | +TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16) | ||
240 | +TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32) | ||
241 | +TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32) | ||
242 | + | ||
243 | static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a) | ||
244 | { | ||
245 | if (!avail_LSX(ctx)) { | ||
246 | -- | ||
247 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVBITSEL.V; | ||
3 | - XVBITSELI.B; | ||
4 | - XVSET{EQZ/NEZ}.V; | ||
5 | - XVSETANYEQZ.{B/H/W/D}; | ||
6 | - XVSETALLNEZ.{B/H/W/D}. | ||
1 | 7 | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-Id: <20230914022645.1151356-51-gaosong@loongson.cn> | ||
11 | --- | ||
12 | target/loongarch/helper.h | 16 ++-- | ||
13 | target/loongarch/insns.decode | 15 ++++ | ||
14 | target/loongarch/disas.c | 19 ++++ | ||
15 | target/loongarch/vec_helper.c | 42 +++++---- | ||
16 | target/loongarch/insn_trans/trans_vec.c.inc | 99 ++++++++++++++++----- | ||
17 | 5 files changed, 147 insertions(+), 44 deletions(-) | ||
18 | |||
19 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/loongarch/helper.h | ||
22 | +++ b/target/loongarch/helper.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32) | ||
24 | |||
25 | DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
26 | |||
27 | -DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32) | ||
28 | -DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32) | ||
29 | -DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32) | ||
30 | -DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32) | ||
31 | -DEF_HELPER_3(vsetallnez_b, void, env, i32, i32) | ||
32 | -DEF_HELPER_3(vsetallnez_h, void, env, i32, i32) | ||
33 | -DEF_HELPER_3(vsetallnez_w, void, env, i32, i32) | ||
34 | -DEF_HELPER_3(vsetallnez_d, void, env, i32, i32) | ||
35 | +DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32) | ||
36 | +DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32) | ||
37 | +DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32) | ||
38 | +DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32) | ||
39 | +DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32) | ||
40 | +DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32) | ||
41 | +DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32) | ||
42 | +DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32) | ||
43 | |||
44 | DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
45 | DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
46 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/loongarch/insns.decode | ||
49 | +++ b/target/loongarch/insns.decode | ||
50 | @@ -XXX,XX +XXX,XX @@ xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5 | ||
51 | xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond | ||
52 | xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond | ||
53 | |||
54 | +xvbitsel_v 0000 11010010 ..... ..... ..... ..... @vvvv | ||
55 | + | ||
56 | +xvbitseli_b 0111 01111100 01 ........ ..... ..... @vv_ui8 | ||
57 | + | ||
58 | +xvseteqz_v 0111 01101001 11001 00110 ..... 00 ... @cv | ||
59 | +xvsetnez_v 0111 01101001 11001 00111 ..... 00 ... @cv | ||
60 | +xvsetanyeqz_b 0111 01101001 11001 01000 ..... 00 ... @cv | ||
61 | +xvsetanyeqz_h 0111 01101001 11001 01001 ..... 00 ... @cv | ||
62 | +xvsetanyeqz_w 0111 01101001 11001 01010 ..... 00 ... @cv | ||
63 | +xvsetanyeqz_d 0111 01101001 11001 01011 ..... 00 ... @cv | ||
64 | +xvsetallnez_b 0111 01101001 11001 01100 ..... 00 ... @cv | ||
65 | +xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv | ||
66 | +xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv | ||
67 | +xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv | ||
68 | + | ||
69 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
70 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
71 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
72 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/loongarch/disas.c | ||
75 | +++ b/target/loongarch/disas.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \ | ||
77 | return true; \ | ||
78 | } | ||
79 | |||
80 | +static void output_cv_x(DisasContext *ctx, arg_cv *a, const char *mnemonic) | ||
81 | +{ | ||
82 | + output(ctx, mnemonic, "fcc%d, x%d", a->cd, a->vj); | ||
83 | +} | ||
84 | + | ||
85 | static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic) | ||
86 | { | ||
87 | output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm); | ||
88 | @@ -XXX,XX +XXX,XX @@ static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \ | ||
89 | LASX_FCMP_INSN(s) | ||
90 | LASX_FCMP_INSN(d) | ||
91 | |||
92 | +INSN_LASX(xvbitsel_v, vvvv) | ||
93 | +INSN_LASX(xvbitseli_b, vv_i) | ||
94 | + | ||
95 | +INSN_LASX(xvseteqz_v, cv) | ||
96 | +INSN_LASX(xvsetnez_v, cv) | ||
97 | +INSN_LASX(xvsetanyeqz_b, cv) | ||
98 | +INSN_LASX(xvsetanyeqz_h, cv) | ||
99 | +INSN_LASX(xvsetanyeqz_w, cv) | ||
100 | +INSN_LASX(xvsetanyeqz_d, cv) | ||
101 | +INSN_LASX(xvsetallnez_b, cv) | ||
102 | +INSN_LASX(xvsetallnez_h, cv) | ||
103 | +INSN_LASX(xvsetallnez_w, cv) | ||
104 | +INSN_LASX(xvsetallnez_d, cv) | ||
105 | + | ||
106 | INSN_LASX(xvreplgr2vr_b, vr) | ||
107 | INSN_LASX(xvreplgr2vr_h, vr) | ||
108 | INSN_LASX(xvreplgr2vr_w, vr) | ||
109 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
110 | index XXXXXXX..XXXXXXX 100644 | ||
111 | --- a/target/loongarch/vec_helper.c | ||
112 | +++ b/target/loongarch/vec_helper.c | ||
113 | @@ -XXX,XX +XXX,XX @@ VFCMP(vfcmp_s_s, 32, UW, float32_compare) | ||
114 | VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) | ||
115 | VFCMP(vfcmp_s_d, 64, UD, float64_compare) | ||
116 | |||
117 | -void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
118 | +void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
119 | { | ||
120 | int i; | ||
121 | VReg *Vd = (VReg *)vd; | ||
122 | VReg *Vj = (VReg *)vj; | ||
123 | |||
124 | - for (i = 0; i < 16; i++) { | ||
125 | + for (i = 0; i < simd_oprsz(desc); i++) { | ||
126 | Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); | ||
127 | } | ||
128 | } | ||
129 | @@ -XXX,XX +XXX,XX @@ void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
130 | /* Copy from target/arm/tcg/sve_helper.c */ | ||
131 | static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) | ||
132 | { | ||
133 | - uint64_t bits = 8 << esz; | ||
134 | + int bits = 8 << esz; | ||
135 | uint64_t ones = dup_const(esz, 1); | ||
136 | uint64_t signs = ones << (bits - 1); | ||
137 | uint64_t cmp0, cmp1; | ||
138 | @@ -XXX,XX +XXX,XX @@ static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) | ||
139 | return (cmp0 | cmp1) & signs; | ||
140 | } | ||
141 | |||
142 | -#define SETANYEQZ(NAME, MO) \ | ||
143 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | ||
144 | -{ \ | ||
145 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
146 | - \ | ||
147 | - env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ | ||
148 | +#define SETANYEQZ(NAME, MO) \ | ||
149 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
150 | + uint32_t oprsz, uint32_t cd, uint32_t vj) \ | ||
151 | +{ \ | ||
152 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
153 | + \ | ||
154 | + env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ | ||
155 | + if (oprsz == 32) { \ | ||
156 | + env->cf[cd & 0x7] = env->cf[cd & 0x7] || \ | ||
157 | + do_match2(0, Vj->D(2), Vj->D(3), MO); \ | ||
158 | + } \ | ||
159 | } | ||
160 | + | ||
161 | SETANYEQZ(vsetanyeqz_b, MO_8) | ||
162 | SETANYEQZ(vsetanyeqz_h, MO_16) | ||
163 | SETANYEQZ(vsetanyeqz_w, MO_32) | ||
164 | SETANYEQZ(vsetanyeqz_d, MO_64) | ||
165 | |||
166 | -#define SETALLNEZ(NAME, MO) \ | ||
167 | -void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | ||
168 | -{ \ | ||
169 | - VReg *Vj = &(env->fpr[vj].vreg); \ | ||
170 | - \ | ||
171 | - env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ | ||
172 | +#define SETALLNEZ(NAME, MO) \ | ||
173 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
174 | + uint32_t oprsz, uint32_t cd, uint32_t vj) \ | ||
175 | +{ \ | ||
176 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
177 | + \ | ||
178 | + env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ | ||
179 | + if (oprsz == 32) { \ | ||
180 | + env->cf[cd & 0x7] = env->cf[cd & 0x7] && \ | ||
181 | + !do_match2(0, Vj->D(2), Vj->D(3), MO); \ | ||
182 | + } \ | ||
183 | } | ||
184 | + | ||
185 | SETALLNEZ(vsetallnez_b, MO_8) | ||
186 | SETALLNEZ(vsetallnez_h, MO_16) | ||
187 | SETALLNEZ(vsetallnez_w, MO_32) | ||
188 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
189 | index XXXXXXX..XXXXXXX 100644 | ||
190 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
191 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
192 | @@ -XXX,XX +XXX,XX @@ static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) | ||
193 | return gen_vv_i_vl(ctx, a, 32, fn); | ||
194 | } | ||
195 | |||
196 | -static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
197 | - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
198 | +static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz, | ||
199 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
200 | { | ||
201 | - TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
202 | - TCGv_i32 cd = tcg_constant_i32(a->cd); | ||
203 | - | ||
204 | - if (!check_vec(ctx, 16)) { | ||
205 | + if (!check_vec(ctx, sz)) { | ||
206 | return true; | ||
207 | } | ||
208 | |||
209 | - func(cpu_env, cd, vj); | ||
210 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
211 | + TCGv_i32 cd = tcg_constant_i32(a->cd); | ||
212 | + TCGv_i32 oprsz = tcg_constant_i32(sz); | ||
213 | + | ||
214 | + func(cpu_env, oprsz, cd, vj); | ||
215 | return true; | ||
216 | } | ||
217 | |||
218 | +static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
219 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
220 | +{ | ||
221 | + return gen_cv_vl(ctx, a, 16, func); | ||
222 | +} | ||
223 | + | ||
224 | +static bool gen_cx(DisasContext *ctx, arg_cv *a, | ||
225 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
226 | +{ | ||
227 | + return gen_cv_vl(ctx, a, 32, func); | ||
228 | +} | ||
229 | + | ||
230 | static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a, | ||
231 | uint32_t oprsz, MemOp mop, | ||
232 | void (*func)(unsigned, uint32_t, uint32_t, | ||
233 | @@ -XXX,XX +XXX,XX @@ TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16) | ||
234 | TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32) | ||
235 | TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32) | ||
236 | |||
237 | -static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a) | ||
238 | +static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz) | ||
239 | { | ||
240 | - if (!avail_LSX(ctx)) { | ||
241 | - return false; | ||
242 | - } | ||
243 | - | ||
244 | - if (!check_vec(ctx, 16)) { | ||
245 | + if (!check_vec(ctx, oprsz)) { | ||
246 | return true; | ||
247 | } | ||
248 | |||
249 | tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va), | ||
250 | vec_full_offset(a->vk), vec_full_offset(a->vj), | ||
251 | - 16, ctx->vl/8); | ||
252 | + oprsz, ctx->vl / 8); | ||
253 | return true; | ||
254 | } | ||
255 | |||
256 | +TRANS(vbitsel_v, LSX, do_vbitsel_v, 16) | ||
257 | +TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32) | ||
258 | + | ||
259 | static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm) | ||
260 | { | ||
261 | tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b); | ||
262 | } | ||
263 | |||
264 | -static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a) | ||
265 | +static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) | ||
266 | { | ||
267 | static const GVecGen2i op = { | ||
268 | .fniv = gen_vbitseli, | ||
269 | @@ -XXX,XX +XXX,XX @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a) | ||
270 | .load_dest = true | ||
271 | }; | ||
272 | |||
273 | - if (!avail_LSX(ctx)) { | ||
274 | - return false; | ||
275 | - } | ||
276 | - | ||
277 | - if (!check_vec(ctx, 16)) { | ||
278 | + if (!check_vec(ctx, oprsz)) { | ||
279 | return true; | ||
280 | } | ||
281 | |||
282 | tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj), | ||
283 | - 16, ctx->vl/8, a->imm, &op); | ||
284 | + oprsz, ctx->vl / 8, a->imm , &op); | ||
285 | return true; | ||
286 | } | ||
287 | |||
288 | +TRANS(vbitseli_b, LSX, do_vbitseli_b, 16) | ||
289 | +TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32) | ||
290 | + | ||
291 | #define VSET(NAME, COND) \ | ||
292 | static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \ | ||
293 | { \ | ||
294 | @@ -XXX,XX +XXX,XX @@ TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h) | ||
295 | TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w) | ||
296 | TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d) | ||
297 | |||
298 | +#define XVSET(NAME, COND) \ | ||
299 | +static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \ | ||
300 | +{ \ | ||
301 | + TCGv_i64 t1, t2, d[4]; \ | ||
302 | + \ | ||
303 | + d[0] = tcg_temp_new_i64(); \ | ||
304 | + d[1] = tcg_temp_new_i64(); \ | ||
305 | + d[2] = tcg_temp_new_i64(); \ | ||
306 | + d[3] = tcg_temp_new_i64(); \ | ||
307 | + t1 = tcg_temp_new_i64(); \ | ||
308 | + t2 = tcg_temp_new_i64(); \ | ||
309 | + \ | ||
310 | + get_vreg64(d[0], a->vj, 0); \ | ||
311 | + get_vreg64(d[1], a->vj, 1); \ | ||
312 | + get_vreg64(d[2], a->vj, 2); \ | ||
313 | + get_vreg64(d[3], a->vj, 3); \ | ||
314 | + \ | ||
315 | + if (!avail_LASX(ctx)) { \ | ||
316 | + return false; \ | ||
317 | + } \ | ||
318 | + \ | ||
319 | + if (!check_vec(ctx, 32)) { \ | ||
320 | + return true; \ | ||
321 | + } \ | ||
322 | + \ | ||
323 | + tcg_gen_or_i64(t1, d[0], d[1]); \ | ||
324 | + tcg_gen_or_i64(t2, d[2], d[3]); \ | ||
325 | + tcg_gen_or_i64(t1, t2, t1); \ | ||
326 | + tcg_gen_setcondi_i64(COND, t1, t1, 0); \ | ||
327 | + tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ | ||
328 | + \ | ||
329 | + return true; \ | ||
330 | +} | ||
331 | + | ||
332 | +XVSET(xvseteqz_v, TCG_COND_EQ) | ||
333 | +XVSET(xvsetnez_v, TCG_COND_NE) | ||
334 | + | ||
335 | +TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b) | ||
336 | +TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h) | ||
337 | +TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w) | ||
338 | +TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d) | ||
339 | +TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b) | ||
340 | +TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h) | ||
341 | +TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w) | ||
342 | +TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d) | ||
343 | + | ||
344 | static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a) | ||
345 | { | ||
346 | TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
347 | -- | ||
348 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVINSGR2VR.{W/D}; | ||
3 | - XVPICKVE2GR.{W/D}[U]. | ||
1 | 4 | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20230914022645.1151356-52-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/insns.decode | 7 + | ||
10 | target/loongarch/disas.c | 17 ++ | ||
11 | target/loongarch/translate.c | 12 ++ | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 208 ++++---------------- | ||
13 | 4 files changed, 74 insertions(+), 170 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv | ||
20 | xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv | ||
21 | xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv | ||
22 | |||
23 | +xvinsgr2vr_w 0111 01101110 10111 10 ... ..... ..... @vr_ui3 | ||
24 | +xvinsgr2vr_d 0111 01101110 10111 110 .. ..... ..... @vr_ui2 | ||
25 | +xvpickve2gr_w 0111 01101110 11111 10 ... ..... ..... @rv_ui3 | ||
26 | +xvpickve2gr_d 0111 01101110 11111 110 .. ..... ..... @rv_ui2 | ||
27 | +xvpickve2gr_wu 0111 01101111 00111 10 ... ..... ..... @rv_ui3 | ||
28 | +xvpickve2gr_du 0111 01101111 00111 110 .. ..... ..... @rv_ui2 | ||
29 | + | ||
30 | xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
31 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
32 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
33 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/loongarch/disas.c | ||
36 | +++ b/target/loongarch/disas.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic) | ||
38 | output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj); | ||
39 | } | ||
40 | |||
41 | +static void output_vr_i_x(DisasContext *ctx, arg_vr_i *a, const char *mnemonic) | ||
42 | +{ | ||
43 | + output(ctx, mnemonic, "x%d, r%d, 0x%x", a->vd, a->rj, a->imm); | ||
44 | +} | ||
45 | + | ||
46 | +static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic) | ||
47 | +{ | ||
48 | + output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm); | ||
49 | +} | ||
50 | + | ||
51 | INSN_LASX(xvadd_b, vvv) | ||
52 | INSN_LASX(xvadd_h, vvv) | ||
53 | INSN_LASX(xvadd_w, vvv) | ||
54 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvsetallnez_h, cv) | ||
55 | INSN_LASX(xvsetallnez_w, cv) | ||
56 | INSN_LASX(xvsetallnez_d, cv) | ||
57 | |||
58 | +INSN_LASX(xvinsgr2vr_w, vr_i) | ||
59 | +INSN_LASX(xvinsgr2vr_d, vr_i) | ||
60 | +INSN_LASX(xvpickve2gr_w, rv_i) | ||
61 | +INSN_LASX(xvpickve2gr_d, rv_i) | ||
62 | +INSN_LASX(xvpickve2gr_wu, rv_i) | ||
63 | +INSN_LASX(xvpickve2gr_du, rv_i) | ||
64 | + | ||
65 | INSN_LASX(xvreplgr2vr_b, vr) | ||
66 | INSN_LASX(xvreplgr2vr_h, vr) | ||
67 | INSN_LASX(xvreplgr2vr_w, vr) | ||
68 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/translate.c | ||
71 | +++ b/target/loongarch/translate.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static inline int vec_full_offset(int regno) | ||
73 | return offsetof(CPULoongArchState, fpr[regno]); | ||
74 | } | ||
75 | |||
76 | +static inline int vec_reg_offset(int regno, int index, MemOp mop) | ||
77 | +{ | ||
78 | + const uint8_t size = 1 << mop; | ||
79 | + int offs = index * size; | ||
80 | + | ||
81 | + if (HOST_BIG_ENDIAN && size < 8 ) { | ||
82 | + offs ^= (8 - size); | ||
83 | + } | ||
84 | + | ||
85 | + return offs + vec_full_offset(regno); | ||
86 | +} | ||
87 | + | ||
88 | static inline void get_vreg64(TCGv_i64 dest, int regno, int index) | ||
89 | { | ||
90 | tcg_gen_ld_i64(dest, cpu_env, | ||
91 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
92 | index XXXXXXX..XXXXXXX 100644 | ||
93 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
94 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
95 | @@ -XXX,XX +XXX,XX @@ TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h) | ||
96 | TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w) | ||
97 | TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d) | ||
98 | |||
99 | -static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a) | ||
100 | +static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop, | ||
101 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
102 | { | ||
103 | TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
104 | |||
105 | - if (!avail_LSX(ctx)) { | ||
106 | - return false; | ||
107 | - } | ||
108 | - | ||
109 | - if (!check_vec(ctx, 16)) { | ||
110 | - return true; | ||
111 | - } | ||
112 | - | ||
113 | - tcg_gen_st8_i64(src, cpu_env, | ||
114 | - offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm))); | ||
115 | - return true; | ||
116 | -} | ||
117 | - | ||
118 | -static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a) | ||
119 | -{ | ||
120 | - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
121 | - | ||
122 | - if (!avail_LSX(ctx)) { | ||
123 | - return false; | ||
124 | - } | ||
125 | - | ||
126 | - if (!check_vec(ctx, 16)) { | ||
127 | - return true; | ||
128 | - } | ||
129 | - | ||
130 | - tcg_gen_st16_i64(src, cpu_env, | ||
131 | - offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm))); | ||
132 | - return true; | ||
133 | -} | ||
134 | - | ||
135 | -static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a) | ||
136 | -{ | ||
137 | - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
138 | - | ||
139 | - if (!avail_LSX(ctx)) { | ||
140 | - return false; | ||
141 | - } | ||
142 | - | ||
143 | - if (!check_vec(ctx, 16)) { | ||
144 | + if (!check_vec(ctx, oprsz)) { | ||
145 | return true; | ||
146 | } | ||
147 | |||
148 | - tcg_gen_st32_i64(src, cpu_env, | ||
149 | - offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm))); | ||
150 | - return true; | ||
151 | -} | ||
152 | - | ||
153 | -static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a) | ||
154 | -{ | ||
155 | - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
156 | - | ||
157 | - if (!avail_LSX(ctx)) { | ||
158 | - return false; | ||
159 | - } | ||
160 | + func(src, cpu_env, vec_reg_offset(a->vd, a->imm, mop)); | ||
161 | |||
162 | - if (!check_vec(ctx, 16)) { | ||
163 | - return true; | ||
164 | - } | ||
165 | - | ||
166 | - tcg_gen_st_i64(src, cpu_env, | ||
167 | - offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm))); | ||
168 | return true; | ||
169 | } | ||
170 | |||
171 | -static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a) | ||
172 | +static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop, | ||
173 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
174 | { | ||
175 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
176 | - | ||
177 | - if (!avail_LSX(ctx)) { | ||
178 | - return false; | ||
179 | - } | ||
180 | - | ||
181 | - if (!check_vec(ctx, 16)) { | ||
182 | - return true; | ||
183 | - } | ||
184 | - | ||
185 | - tcg_gen_ld8s_i64(dst, cpu_env, | ||
186 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
187 | - return true; | ||
188 | + return gen_g2v_vl(ctx, a, 16, mop, func); | ||
189 | } | ||
190 | |||
191 | -static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a) | ||
192 | +static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop, | ||
193 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
194 | { | ||
195 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
196 | - | ||
197 | - if (!avail_LSX(ctx)) { | ||
198 | - return false; | ||
199 | - } | ||
200 | - | ||
201 | - if (!check_vec(ctx, 16)) { | ||
202 | - return true; | ||
203 | - } | ||
204 | - | ||
205 | - tcg_gen_ld16s_i64(dst, cpu_env, | ||
206 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
207 | - return true; | ||
208 | + return gen_g2v_vl(ctx, a, 32, mop, func); | ||
209 | } | ||
210 | |||
211 | -static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a) | ||
212 | -{ | ||
213 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
214 | +TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64) | ||
215 | +TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64) | ||
216 | +TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64) | ||
217 | +TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64) | ||
218 | +TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64) | ||
219 | +TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64) | ||
220 | |||
221 | - if (!avail_LSX(ctx)) { | ||
222 | - return false; | ||
223 | - } | ||
224 | - | ||
225 | - if (!check_vec(ctx, 16)) { | ||
226 | - return true; | ||
227 | - } | ||
228 | - | ||
229 | - tcg_gen_ld32s_i64(dst, cpu_env, | ||
230 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
231 | - return true; | ||
232 | -} | ||
233 | - | ||
234 | -static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a) | ||
235 | +static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop, | ||
236 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
237 | { | ||
238 | TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
239 | |||
240 | - if (!avail_LSX(ctx)) { | ||
241 | - return false; | ||
242 | - } | ||
243 | - | ||
244 | - if (!check_vec(ctx, 16)) { | ||
245 | + if (!check_vec(ctx, oprsz)) { | ||
246 | return true; | ||
247 | } | ||
248 | |||
249 | - tcg_gen_ld_i64(dst, cpu_env, | ||
250 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
251 | - return true; | ||
252 | -} | ||
253 | + func(dst, cpu_env, vec_reg_offset(a->vj, a->imm, mop)); | ||
254 | |||
255 | -static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a) | ||
256 | -{ | ||
257 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
258 | - | ||
259 | - if (!avail_LSX(ctx)) { | ||
260 | - return false; | ||
261 | - } | ||
262 | - | ||
263 | - if (!check_vec(ctx, 16)) { | ||
264 | - return true; | ||
265 | - } | ||
266 | - | ||
267 | - tcg_gen_ld8u_i64(dst, cpu_env, | ||
268 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
269 | return true; | ||
270 | } | ||
271 | |||
272 | -static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a) | ||
273 | +static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, | ||
274 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
275 | { | ||
276 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
277 | - | ||
278 | - if (!avail_LSX(ctx)) { | ||
279 | - return false; | ||
280 | - } | ||
281 | - | ||
282 | - if (!check_vec(ctx, 16)) { | ||
283 | - return true; | ||
284 | - } | ||
285 | - | ||
286 | - tcg_gen_ld16u_i64(dst, cpu_env, | ||
287 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
288 | - return true; | ||
289 | + return gen_v2g_vl(ctx, a, 16, mop, func); | ||
290 | } | ||
291 | |||
292 | -static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a) | ||
293 | +static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, | ||
294 | + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) | ||
295 | { | ||
296 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
297 | - | ||
298 | - if (!avail_LSX(ctx)) { | ||
299 | - return false; | ||
300 | - } | ||
301 | - | ||
302 | - if (!check_vec(ctx, 16)) { | ||
303 | - return true; | ||
304 | - } | ||
305 | - | ||
306 | - tcg_gen_ld32u_i64(dst, cpu_env, | ||
307 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
308 | - return true; | ||
309 | + return gen_v2g_vl(ctx, a, 32, mop, func); | ||
310 | } | ||
311 | |||
312 | -static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a) | ||
313 | -{ | ||
314 | - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); | ||
315 | - | ||
316 | - if (!avail_LSX(ctx)) { | ||
317 | - return false; | ||
318 | - } | ||
319 | - | ||
320 | - if (!check_vec(ctx, 16)) { | ||
321 | - return true; | ||
322 | - } | ||
323 | - | ||
324 | - tcg_gen_ld_i64(dst, cpu_env, | ||
325 | - offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
326 | - return true; | ||
327 | -} | ||
328 | +TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64) | ||
329 | +TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64) | ||
330 | +TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64) | ||
331 | +TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) | ||
332 | +TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64) | ||
333 | +TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64) | ||
334 | +TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64) | ||
335 | +TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) | ||
336 | +TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64) | ||
337 | +TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) | ||
338 | +TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64) | ||
339 | +TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) | ||
340 | |||
341 | static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a, | ||
342 | uint32_t oprsz, MemOp mop) | ||
343 | -- | ||
344 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVREPLVE.{B/H/W/D}; | ||
3 | - XVREPL128VEI.{B/H/W/D}; | ||
4 | - XVREPLVE0.{B/H/W/D/Q}; | ||
5 | - XVINSVE0.{W/D}; | ||
6 | - XVPICKVE.{W/D}; | ||
7 | - XVBSLL.V, XVBSRL.V. | ||
1 | 8 | ||
9 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-Id: <20230914022645.1151356-53-gaosong@loongson.cn> | ||
12 | --- | ||
13 | target/loongarch/helper.h | 5 + | ||
14 | target/loongarch/insns.decode | 25 +++ | ||
15 | target/loongarch/disas.c | 29 ++++ | ||
16 | target/loongarch/vec_helper.c | 28 ++++ | ||
17 | target/loongarch/insn_trans/trans_vec.c.inc | 171 +++++++++++++------- | ||
18 | 5 files changed, 201 insertions(+), 57 deletions(-) | ||
19 | |||
20 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/loongarch/helper.h | ||
23 | +++ b/target/loongarch/helper.h | ||
24 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32) | ||
25 | DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32) | ||
26 | DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32) | ||
27 | |||
28 | +DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
29 | +DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
30 | +DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
31 | +DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
32 | + | ||
33 | DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
34 | DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
35 | DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
36 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/loongarch/insns.decode | ||
39 | +++ b/target/loongarch/insns.decode | ||
40 | @@ -XXX,XX +XXX,XX @@ xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr | ||
41 | xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr | ||
42 | xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr | ||
43 | xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr | ||
44 | + | ||
45 | +xvreplve_b 0111 01010010 00100 ..... ..... ..... @vvr | ||
46 | +xvreplve_h 0111 01010010 00101 ..... ..... ..... @vvr | ||
47 | +xvreplve_w 0111 01010010 00110 ..... ..... ..... @vvr | ||
48 | +xvreplve_d 0111 01010010 00111 ..... ..... ..... @vvr | ||
49 | + | ||
50 | +xvrepl128vei_b 0111 01101111 01111 0 .... ..... ..... @vv_ui4 | ||
51 | +xvrepl128vei_h 0111 01101111 01111 10 ... ..... ..... @vv_ui3 | ||
52 | +xvrepl128vei_w 0111 01101111 01111 110 .. ..... ..... @vv_ui2 | ||
53 | +xvrepl128vei_d 0111 01101111 01111 1110 . ..... ..... @vv_ui1 | ||
54 | + | ||
55 | +xvreplve0_b 0111 01110000 01110 00000 ..... ..... @vv | ||
56 | +xvreplve0_h 0111 01110000 01111 00000 ..... ..... @vv | ||
57 | +xvreplve0_w 0111 01110000 01111 10000 ..... ..... @vv | ||
58 | +xvreplve0_d 0111 01110000 01111 11000 ..... ..... @vv | ||
59 | +xvreplve0_q 0111 01110000 01111 11100 ..... ..... @vv | ||
60 | + | ||
61 | +xvinsve0_w 0111 01101111 11111 10 ... ..... ..... @vv_ui3 | ||
62 | +xvinsve0_d 0111 01101111 11111 110 .. ..... ..... @vv_ui2 | ||
63 | + | ||
64 | +xvpickve_w 0111 01110000 00111 10 ... ..... ..... @vv_ui3 | ||
65 | +xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2 | ||
66 | + | ||
67 | +xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5 | ||
68 | +xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5 | ||
69 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/loongarch/disas.c | ||
72 | +++ b/target/loongarch/disas.c | ||
73 | @@ -XXX,XX +XXX,XX @@ static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic) | ||
74 | output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm); | ||
75 | } | ||
76 | |||
77 | +static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic) | ||
78 | +{ | ||
79 | + output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk); | ||
80 | +} | ||
81 | + | ||
82 | INSN_LASX(xvadd_b, vvv) | ||
83 | INSN_LASX(xvadd_h, vvv) | ||
84 | INSN_LASX(xvadd_w, vvv) | ||
85 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvreplgr2vr_b, vr) | ||
86 | INSN_LASX(xvreplgr2vr_h, vr) | ||
87 | INSN_LASX(xvreplgr2vr_w, vr) | ||
88 | INSN_LASX(xvreplgr2vr_d, vr) | ||
89 | + | ||
90 | +INSN_LASX(xvreplve_b, vvr) | ||
91 | +INSN_LASX(xvreplve_h, vvr) | ||
92 | +INSN_LASX(xvreplve_w, vvr) | ||
93 | +INSN_LASX(xvreplve_d, vvr) | ||
94 | +INSN_LASX(xvrepl128vei_b, vv_i) | ||
95 | +INSN_LASX(xvrepl128vei_h, vv_i) | ||
96 | +INSN_LASX(xvrepl128vei_w, vv_i) | ||
97 | +INSN_LASX(xvrepl128vei_d, vv_i) | ||
98 | + | ||
99 | +INSN_LASX(xvreplve0_b, vv) | ||
100 | +INSN_LASX(xvreplve0_h, vv) | ||
101 | +INSN_LASX(xvreplve0_w, vv) | ||
102 | +INSN_LASX(xvreplve0_d, vv) | ||
103 | +INSN_LASX(xvreplve0_q, vv) | ||
104 | + | ||
105 | +INSN_LASX(xvinsve0_w, vv_i) | ||
106 | +INSN_LASX(xvinsve0_d, vv_i) | ||
107 | + | ||
108 | +INSN_LASX(xvpickve_w, vv_i) | ||
109 | +INSN_LASX(xvpickve_d, vv_i) | ||
110 | + | ||
111 | +INSN_LASX(xvbsll_v, vv_i) | ||
112 | +INSN_LASX(xvbsrl_v, vv_i) | ||
113 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
114 | index XXXXXXX..XXXXXXX 100644 | ||
115 | --- a/target/loongarch/vec_helper.c | ||
116 | +++ b/target/loongarch/vec_helper.c | ||
117 | @@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_h, MO_16) | ||
118 | SETALLNEZ(vsetallnez_w, MO_32) | ||
119 | SETALLNEZ(vsetallnez_d, MO_64) | ||
120 | |||
121 | +#define XVINSVE0(NAME, E, MASK) \ | ||
122 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
123 | +{ \ | ||
124 | + VReg *Vd = (VReg *)vd; \ | ||
125 | + VReg *Vj = (VReg *)vj; \ | ||
126 | + Vd->E(imm & MASK) = Vj->E(0); \ | ||
127 | +} | ||
128 | + | ||
129 | +XVINSVE0(xvinsve0_w, W, 0x7) | ||
130 | +XVINSVE0(xvinsve0_d, D, 0x3) | ||
131 | + | ||
132 | +#define XVPICKVE(NAME, E, BIT, MASK) \ | ||
133 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
134 | +{ \ | ||
135 | + int i; \ | ||
136 | + VReg *Vd = (VReg *)vd; \ | ||
137 | + VReg *Vj = (VReg *)vj; \ | ||
138 | + int oprsz = simd_oprsz(desc); \ | ||
139 | + \ | ||
140 | + Vd->E(0) = Vj->E(imm & MASK); \ | ||
141 | + for (i = 1; i < oprsz / (BIT / 8); i++) { \ | ||
142 | + Vd->E(i) = 0; \ | ||
143 | + } \ | ||
144 | +} | ||
145 | + | ||
146 | +XVPICKVE(xvpickve_w, W, 32, 0x7) | ||
147 | +XVPICKVE(xvpickve_d, D, 64, 0x3) | ||
148 | + | ||
149 | #define VPACKEV(NAME, BIT, E) \ | ||
150 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
151 | { \ | ||
152 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
155 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
156 | @@ -XXX,XX +XXX,XX @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) | ||
157 | return true; | ||
158 | } | ||
159 | |||
160 | -static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, | ||
161 | - void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) | ||
162 | +static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a, | ||
163 | + uint32_t oprsz, int vece, int bit, | ||
164 | + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) | ||
165 | { | ||
166 | + int i; | ||
167 | TCGv_i64 t0 = tcg_temp_new_i64(); | ||
168 | TCGv_ptr t1 = tcg_temp_new_ptr(); | ||
169 | TCGv_i64 t2 = tcg_temp_new_i64(); | ||
170 | |||
171 | - if (!avail_LSX(ctx)) { | ||
172 | - return false; | ||
173 | - } | ||
174 | - | ||
175 | - if (!check_vec(ctx, 16)) { | ||
176 | + if (!check_vec(ctx, oprsz)) { | ||
177 | return true; | ||
178 | } | ||
179 | |||
180 | - tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1); | ||
181 | + tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1); | ||
182 | tcg_gen_shli_i64(t0, t0, vece); | ||
183 | if (HOST_BIG_ENDIAN) { | ||
184 | - tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1)); | ||
185 | + tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1)); | ||
186 | } | ||
187 | |||
188 | tcg_gen_trunc_i64_ptr(t1, t0); | ||
189 | tcg_gen_add_ptr(t1, t1, cpu_env); | ||
190 | - func(t2, t1, vec_full_offset(a->vj)); | ||
191 | - tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2); | ||
192 | + | ||
193 | + for (i = 0; i < oprsz; i += 16) { | ||
194 | + func(t2, t1, vec_full_offset(a->vj) + i); | ||
195 | + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2); | ||
196 | + } | ||
197 | |||
198 | return true; | ||
199 | } | ||
200 | |||
201 | +static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, | ||
202 | + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) | ||
203 | +{ | ||
204 | + return gen_vreplve_vl(ctx, a, 16, vece, bit, func); | ||
205 | +} | ||
206 | + | ||
207 | +static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, | ||
208 | + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) | ||
209 | +{ | ||
210 | + return gen_vreplve_vl(ctx, a, 32, vece, bit, func); | ||
211 | +} | ||
212 | + | ||
213 | TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64) | ||
214 | TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64) | ||
215 | TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64) | ||
216 | TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64) | ||
217 | +TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64) | ||
218 | +TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64) | ||
219 | +TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64) | ||
220 | +TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64) | ||
221 | |||
222 | -static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a) | ||
223 | +static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop) | ||
224 | { | ||
225 | - int ofs; | ||
226 | - TCGv_i64 desthigh, destlow, high, low; | ||
227 | - | ||
228 | - if (!avail_LSX(ctx)) { | ||
229 | - return false; | ||
230 | - } | ||
231 | + int i; | ||
232 | |||
233 | - if (!check_vec(ctx, 16)) { | ||
234 | + if (!check_vec(ctx, 32)) { | ||
235 | return true; | ||
236 | } | ||
237 | |||
238 | - desthigh = tcg_temp_new_i64(); | ||
239 | - destlow = tcg_temp_new_i64(); | ||
240 | - high = tcg_temp_new_i64(); | ||
241 | - low = tcg_temp_new_i64(); | ||
242 | - | ||
243 | - get_vreg64(low, a->vj, 0); | ||
244 | + for (i = 0; i < 32; i += 16) { | ||
245 | + tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i, | ||
246 | + vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16); | ||
247 | |||
248 | - ofs = ((a->imm) & 0xf) * 8; | ||
249 | - if (ofs < 64) { | ||
250 | - get_vreg64(high, a->vj, 1); | ||
251 | - tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); | ||
252 | - tcg_gen_shli_i64(destlow, low, ofs); | ||
253 | - } else { | ||
254 | - tcg_gen_shli_i64(desthigh, low, ofs - 64); | ||
255 | - destlow = tcg_constant_i64(0); | ||
256 | } | ||
257 | + return true; | ||
258 | +} | ||
259 | |||
260 | - set_vreg64(desthigh, a->vd, 1); | ||
261 | - set_vreg64(destlow, a->vd, 0); | ||
262 | +TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8) | ||
263 | +TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16) | ||
264 | +TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32) | ||
265 | +TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64) | ||
266 | |||
267 | +static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop) | ||
268 | +{ | ||
269 | + if (!check_vec(ctx, 32)) { | ||
270 | + return true; | ||
271 | + } | ||
272 | + | ||
273 | + tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd), | ||
274 | + vec_full_offset(a->vj), 32, 32); | ||
275 | return true; | ||
276 | } | ||
277 | |||
278 | -static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a) | ||
279 | +TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8) | ||
280 | +TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16) | ||
281 | +TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32) | ||
282 | +TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64) | ||
283 | +TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128) | ||
284 | + | ||
285 | +TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w) | ||
286 | +TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d) | ||
287 | + | ||
288 | +TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w) | ||
289 | +TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d) | ||
290 | + | ||
291 | +static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) | ||
292 | { | ||
293 | - TCGv_i64 desthigh, destlow, high, low; | ||
294 | - int ofs; | ||
295 | + int i, ofs; | ||
296 | |||
297 | - if (!avail_LSX(ctx)) { | ||
298 | - return false; | ||
299 | + if (!check_vec(ctx, oprsz)) { | ||
300 | + return true; | ||
301 | } | ||
302 | |||
303 | - if (!check_vec(ctx, 16)) { | ||
304 | - return true; | ||
305 | + for (i = 0; i < oprsz / 16; i++) { | ||
306 | + TCGv desthigh = tcg_temp_new_i64(); | ||
307 | + TCGv destlow = tcg_temp_new_i64(); | ||
308 | + TCGv high = tcg_temp_new_i64(); | ||
309 | + TCGv low = tcg_temp_new_i64(); | ||
310 | + | ||
311 | + get_vreg64(low, a->vj, 2 * i); | ||
312 | + | ||
313 | + ofs = ((a->imm) & 0xf) * 8; | ||
314 | + if (ofs < 64) { | ||
315 | + get_vreg64(high, a->vj, 2 * i + 1); | ||
316 | + tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); | ||
317 | + tcg_gen_shli_i64(destlow, low, ofs); | ||
318 | + } else { | ||
319 | + tcg_gen_shli_i64(desthigh, low, ofs - 64); | ||
320 | + destlow = tcg_constant_i64(0); | ||
321 | + } | ||
322 | + set_vreg64(desthigh, a->vd, 2 * i + 1); | ||
323 | + set_vreg64(destlow, a->vd, 2 * i); | ||
324 | } | ||
325 | |||
326 | - desthigh = tcg_temp_new_i64(); | ||
327 | - destlow = tcg_temp_new_i64(); | ||
328 | - high = tcg_temp_new_i64(); | ||
329 | - low = tcg_temp_new_i64(); | ||
330 | + return true; | ||
331 | +} | ||
332 | |||
333 | - get_vreg64(high, a->vj, 1); | ||
334 | +static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) | ||
335 | +{ | ||
336 | + int i, ofs; | ||
337 | |||
338 | - ofs = ((a->imm) & 0xf) * 8; | ||
339 | - if (ofs < 64) { | ||
340 | - get_vreg64(low, a->vj, 0); | ||
341 | - tcg_gen_extract2_i64(destlow, low, high, ofs); | ||
342 | - tcg_gen_shri_i64(desthigh, high, ofs); | ||
343 | - } else { | ||
344 | - tcg_gen_shri_i64(destlow, high, ofs - 64); | ||
345 | - desthigh = tcg_constant_i64(0); | ||
346 | + if (!check_vec(ctx, 32)) { | ||
347 | + return true; | ||
348 | } | ||
349 | |||
350 | - set_vreg64(desthigh, a->vd, 1); | ||
351 | - set_vreg64(destlow, a->vd, 0); | ||
352 | + for (i = 0; i < oprsz / 16; i++) { | ||
353 | + TCGv desthigh = tcg_temp_new_i64(); | ||
354 | + TCGv destlow = tcg_temp_new_i64(); | ||
355 | + TCGv high = tcg_temp_new_i64(); | ||
356 | + TCGv low = tcg_temp_new_i64(); | ||
357 | + get_vreg64(high, a->vj, 2 * i + 1); | ||
358 | + | ||
359 | + ofs = ((a->imm) & 0xf) * 8; | ||
360 | + if (ofs < 64) { | ||
361 | + get_vreg64(low, a->vj, 2 * i); | ||
362 | + tcg_gen_extract2_i64(destlow, low, high, ofs); | ||
363 | + tcg_gen_shri_i64(desthigh, high, ofs); | ||
364 | + } else { | ||
365 | + tcg_gen_shri_i64(destlow, high, ofs - 64); | ||
366 | + desthigh = tcg_constant_i64(0); | ||
367 | + } | ||
368 | + set_vreg64(desthigh, a->vd, 2 * i + 1); | ||
369 | + set_vreg64(destlow, a->vd, 2 * i); | ||
370 | + } | ||
371 | |||
372 | return true; | ||
373 | } | ||
374 | |||
375 | +TRANS(vbsll_v, LSX, do_vbsll_v, 16) | ||
376 | +TRANS(vbsrl_v, LSX, do_vbsrl_v, 16) | ||
377 | +TRANS(xvbsll_v, LASX, do_vbsll_v, 32) | ||
378 | +TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32) | ||
379 | + | ||
380 | TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b) | ||
381 | TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h) | ||
382 | TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w) | ||
383 | -- | ||
384 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVPACK{EV/OD}.{B/H/W/D}; | ||
3 | - XVPICK{EV/OD}.{B/H/W/D}; | ||
4 | - XVILV{L/H}.{B/H/W/D}. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-54-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 27 ++++ | ||
11 | target/loongarch/disas.c | 27 ++++ | ||
12 | target/loongarch/vec_helper.c | 138 +++++++++++--------- | ||
13 | target/loongarch/insn_trans/trans_vec.c.inc | 24 ++++ | ||
14 | 4 files changed, 156 insertions(+), 60 deletions(-) | ||
15 | |||
16 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/insns.decode | ||
19 | +++ b/target/loongarch/insns.decode | ||
20 | @@ -XXX,XX +XXX,XX @@ xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2 | ||
21 | |||
22 | xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5 | ||
23 | xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5 | ||
24 | + | ||
25 | +xvpackev_b 0111 01010001 01100 ..... ..... ..... @vvv | ||
26 | +xvpackev_h 0111 01010001 01101 ..... ..... ..... @vvv | ||
27 | +xvpackev_w 0111 01010001 01110 ..... ..... ..... @vvv | ||
28 | +xvpackev_d 0111 01010001 01111 ..... ..... ..... @vvv | ||
29 | +xvpackod_b 0111 01010001 10000 ..... ..... ..... @vvv | ||
30 | +xvpackod_h 0111 01010001 10001 ..... ..... ..... @vvv | ||
31 | +xvpackod_w 0111 01010001 10010 ..... ..... ..... @vvv | ||
32 | +xvpackod_d 0111 01010001 10011 ..... ..... ..... @vvv | ||
33 | + | ||
34 | +xvpickev_b 0111 01010001 11100 ..... ..... ..... @vvv | ||
35 | +xvpickev_h 0111 01010001 11101 ..... ..... ..... @vvv | ||
36 | +xvpickev_w 0111 01010001 11110 ..... ..... ..... @vvv | ||
37 | +xvpickev_d 0111 01010001 11111 ..... ..... ..... @vvv | ||
38 | +xvpickod_b 0111 01010010 00000 ..... ..... ..... @vvv | ||
39 | +xvpickod_h 0111 01010010 00001 ..... ..... ..... @vvv | ||
40 | +xvpickod_w 0111 01010010 00010 ..... ..... ..... @vvv | ||
41 | +xvpickod_d 0111 01010010 00011 ..... ..... ..... @vvv | ||
42 | + | ||
43 | +xvilvl_b 0111 01010001 10100 ..... ..... ..... @vvv | ||
44 | +xvilvl_h 0111 01010001 10101 ..... ..... ..... @vvv | ||
45 | +xvilvl_w 0111 01010001 10110 ..... ..... ..... @vvv | ||
46 | +xvilvl_d 0111 01010001 10111 ..... ..... ..... @vvv | ||
47 | +xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv | ||
48 | +xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv | ||
49 | +xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv | ||
50 | +xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv | ||
51 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/loongarch/disas.c | ||
54 | +++ b/target/loongarch/disas.c | ||
55 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvpickve_d, vv_i) | ||
56 | |||
57 | INSN_LASX(xvbsll_v, vv_i) | ||
58 | INSN_LASX(xvbsrl_v, vv_i) | ||
59 | + | ||
60 | +INSN_LASX(xvpackev_b, vvv) | ||
61 | +INSN_LASX(xvpackev_h, vvv) | ||
62 | +INSN_LASX(xvpackev_w, vvv) | ||
63 | +INSN_LASX(xvpackev_d, vvv) | ||
64 | +INSN_LASX(xvpackod_b, vvv) | ||
65 | +INSN_LASX(xvpackod_h, vvv) | ||
66 | +INSN_LASX(xvpackod_w, vvv) | ||
67 | +INSN_LASX(xvpackod_d, vvv) | ||
68 | + | ||
69 | +INSN_LASX(xvpickev_b, vvv) | ||
70 | +INSN_LASX(xvpickev_h, vvv) | ||
71 | +INSN_LASX(xvpickev_w, vvv) | ||
72 | +INSN_LASX(xvpickev_d, vvv) | ||
73 | +INSN_LASX(xvpickod_b, vvv) | ||
74 | +INSN_LASX(xvpickod_h, vvv) | ||
75 | +INSN_LASX(xvpickod_w, vvv) | ||
76 | +INSN_LASX(xvpickod_d, vvv) | ||
77 | + | ||
78 | +INSN_LASX(xvilvl_b, vvv) | ||
79 | +INSN_LASX(xvilvl_h, vvv) | ||
80 | +INSN_LASX(xvilvl_w, vvv) | ||
81 | +INSN_LASX(xvilvl_d, vvv) | ||
82 | +INSN_LASX(xvilvh_b, vvv) | ||
83 | +INSN_LASX(xvilvh_h, vvv) | ||
84 | +INSN_LASX(xvilvh_w, vvv) | ||
85 | +INSN_LASX(xvilvh_d, vvv) | ||
86 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/target/loongarch/vec_helper.c | ||
89 | +++ b/target/loongarch/vec_helper.c | ||
90 | @@ -XXX,XX +XXX,XX @@ XVPICKVE(xvpickve_d, D, 64, 0x3) | ||
91 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
92 | { \ | ||
93 | int i; \ | ||
94 | - VReg temp; \ | ||
95 | + VReg temp = {}; \ | ||
96 | VReg *Vd = (VReg *)vd; \ | ||
97 | VReg *Vj = (VReg *)vj; \ | ||
98 | VReg *Vk = (VReg *)vk; \ | ||
99 | + int oprsz = simd_oprsz(desc); \ | ||
100 | \ | ||
101 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
102 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
103 | temp.E(2 * i + 1) = Vj->E(2 * i); \ | ||
104 | temp.E(2 *i) = Vk->E(2 * i); \ | ||
105 | } \ | ||
106 | @@ -XXX,XX +XXX,XX @@ VPACKEV(vpackev_d, 128, D) | ||
107 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
108 | { \ | ||
109 | int i; \ | ||
110 | - VReg temp; \ | ||
111 | + VReg temp = {}; \ | ||
112 | VReg *Vd = (VReg *)vd; \ | ||
113 | VReg *Vj = (VReg *)vj; \ | ||
114 | VReg *Vk = (VReg *)vk; \ | ||
115 | + int oprsz = simd_oprsz(desc); \ | ||
116 | \ | ||
117 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
118 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
119 | temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ | ||
120 | temp.E(2 * i) = Vk->E(2 * i + 1); \ | ||
121 | } \ | ||
122 | @@ -XXX,XX +XXX,XX @@ VPACKOD(vpackod_h, 32, H) | ||
123 | VPACKOD(vpackod_w, 64, W) | ||
124 | VPACKOD(vpackod_d, 128, D) | ||
125 | |||
126 | -#define VPICKEV(NAME, BIT, E) \ | ||
127 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
128 | -{ \ | ||
129 | - int i; \ | ||
130 | - VReg temp; \ | ||
131 | - VReg *Vd = (VReg *)vd; \ | ||
132 | - VReg *Vj = (VReg *)vj; \ | ||
133 | - VReg *Vk = (VReg *)vk; \ | ||
134 | - \ | ||
135 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
136 | - temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ | ||
137 | - temp.E(i) = Vk->E(2 * i); \ | ||
138 | - } \ | ||
139 | - *Vd = temp; \ | ||
140 | +#define VPICKEV(NAME, BIT, E) \ | ||
141 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
142 | +{ \ | ||
143 | + int i, j, ofs; \ | ||
144 | + VReg temp = {}; \ | ||
145 | + VReg *Vd = (VReg *)vd; \ | ||
146 | + VReg *Vj = (VReg *)vj; \ | ||
147 | + VReg *Vk = (VReg *)vk; \ | ||
148 | + int oprsz = simd_oprsz(desc); \ | ||
149 | + \ | ||
150 | + ofs = LSX_LEN / BIT; \ | ||
151 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
152 | + for (j = 0; j < ofs; j++) { \ | ||
153 | + temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \ | ||
154 | + temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \ | ||
155 | + } \ | ||
156 | + } \ | ||
157 | + *Vd = temp; \ | ||
158 | } | ||
159 | |||
160 | VPICKEV(vpickev_b, 16, B) | ||
161 | @@ -XXX,XX +XXX,XX @@ VPICKEV(vpickev_h, 32, H) | ||
162 | VPICKEV(vpickev_w, 64, W) | ||
163 | VPICKEV(vpickev_d, 128, D) | ||
164 | |||
165 | -#define VPICKOD(NAME, BIT, E) \ | ||
166 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
167 | -{ \ | ||
168 | - int i; \ | ||
169 | - VReg temp; \ | ||
170 | - VReg *Vd = (VReg *)vd; \ | ||
171 | - VReg *Vj = (VReg *)vj; \ | ||
172 | - VReg *Vk = (VReg *)vk; \ | ||
173 | - \ | ||
174 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
175 | - temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ | ||
176 | - temp.E(i) = Vk->E(2 * i + 1); \ | ||
177 | - } \ | ||
178 | - *Vd = temp; \ | ||
179 | +#define VPICKOD(NAME, BIT, E) \ | ||
180 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
181 | +{ \ | ||
182 | + int i, j, ofs; \ | ||
183 | + VReg temp = {}; \ | ||
184 | + VReg *Vd = (VReg *)vd; \ | ||
185 | + VReg *Vj = (VReg *)vj; \ | ||
186 | + VReg *Vk = (VReg *)vk; \ | ||
187 | + int oprsz = simd_oprsz(desc); \ | ||
188 | + \ | ||
189 | + ofs = LSX_LEN / BIT; \ | ||
190 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
191 | + for (j = 0; j < ofs; j++) { \ | ||
192 | + temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \ | ||
193 | + temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \ | ||
194 | + } \ | ||
195 | + } \ | ||
196 | + *Vd = temp; \ | ||
197 | } | ||
198 | |||
199 | VPICKOD(vpickod_b, 16, B) | ||
200 | @@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_h, 32, H) | ||
201 | VPICKOD(vpickod_w, 64, W) | ||
202 | VPICKOD(vpickod_d, 128, D) | ||
203 | |||
204 | -#define VILVL(NAME, BIT, E) \ | ||
205 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
206 | -{ \ | ||
207 | - int i; \ | ||
208 | - VReg temp; \ | ||
209 | - VReg *Vd = (VReg *)vd; \ | ||
210 | - VReg *Vj = (VReg *)vj; \ | ||
211 | - VReg *Vk = (VReg *)vk; \ | ||
212 | - \ | ||
213 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
214 | - temp.E(2 * i + 1) = Vj->E(i); \ | ||
215 | - temp.E(2 * i) = Vk->E(i); \ | ||
216 | - } \ | ||
217 | - *Vd = temp; \ | ||
218 | +#define VILVL(NAME, BIT, E) \ | ||
219 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
220 | +{ \ | ||
221 | + int i, j, ofs; \ | ||
222 | + VReg temp = {}; \ | ||
223 | + VReg *Vd = (VReg *)vd; \ | ||
224 | + VReg *Vj = (VReg *)vj; \ | ||
225 | + VReg *Vk = (VReg *)vk; \ | ||
226 | + int oprsz = simd_oprsz(desc); \ | ||
227 | + \ | ||
228 | + ofs = LSX_LEN / BIT; \ | ||
229 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
230 | + for (j = 0; j < ofs; j++) { \ | ||
231 | + temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \ | ||
232 | + temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \ | ||
233 | + } \ | ||
234 | + } \ | ||
235 | + *Vd = temp; \ | ||
236 | } | ||
237 | |||
238 | VILVL(vilvl_b, 16, B) | ||
239 | @@ -XXX,XX +XXX,XX @@ VILVL(vilvl_h, 32, H) | ||
240 | VILVL(vilvl_w, 64, W) | ||
241 | VILVL(vilvl_d, 128, D) | ||
242 | |||
243 | -#define VILVH(NAME, BIT, E) \ | ||
244 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
245 | -{ \ | ||
246 | - int i; \ | ||
247 | - VReg temp; \ | ||
248 | - VReg *Vd = (VReg *)vd; \ | ||
249 | - VReg *Vj = (VReg *)vj; \ | ||
250 | - VReg *Vk = (VReg *)vk; \ | ||
251 | - \ | ||
252 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
253 | - temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ | ||
254 | - temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ | ||
255 | - } \ | ||
256 | - *Vd = temp; \ | ||
257 | +#define VILVH(NAME, BIT, E) \ | ||
258 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
259 | +{ \ | ||
260 | + int i, j, ofs; \ | ||
261 | + VReg temp = {}; \ | ||
262 | + VReg *Vd = (VReg *)vd; \ | ||
263 | + VReg *Vj = (VReg *)vj; \ | ||
264 | + VReg *Vk = (VReg *)vk; \ | ||
265 | + int oprsz = simd_oprsz(desc); \ | ||
266 | + \ | ||
267 | + ofs = LSX_LEN / BIT; \ | ||
268 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
269 | + for (j = 0; j < ofs; j++) { \ | ||
270 | + temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \ | ||
271 | + temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \ | ||
272 | + } \ | ||
273 | + } \ | ||
274 | + *Vd = temp; \ | ||
275 | } | ||
276 | |||
277 | VILVH(vilvh_b, 16, B) | ||
278 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
279 | index XXXXXXX..XXXXXXX 100644 | ||
280 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
281 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
282 | @@ -XXX,XX +XXX,XX @@ TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b) | ||
283 | TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h) | ||
284 | TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w) | ||
285 | TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d) | ||
286 | +TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b) | ||
287 | +TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h) | ||
288 | +TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w) | ||
289 | +TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d) | ||
290 | +TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b) | ||
291 | +TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h) | ||
292 | +TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w) | ||
293 | +TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d) | ||
294 | |||
295 | TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b) | ||
296 | TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h) | ||
297 | @@ -XXX,XX +XXX,XX @@ TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b) | ||
298 | TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h) | ||
299 | TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w) | ||
300 | TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d) | ||
301 | +TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b) | ||
302 | +TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h) | ||
303 | +TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w) | ||
304 | +TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d) | ||
305 | +TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b) | ||
306 | +TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h) | ||
307 | +TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w) | ||
308 | +TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d) | ||
309 | |||
310 | TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b) | ||
311 | TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h) | ||
312 | @@ -XXX,XX +XXX,XX @@ TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b) | ||
313 | TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h) | ||
314 | TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w) | ||
315 | TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d) | ||
316 | +TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b) | ||
317 | +TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h) | ||
318 | +TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w) | ||
319 | +TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d) | ||
320 | +TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b) | ||
321 | +TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h) | ||
322 | +TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w) | ||
323 | +TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d) | ||
324 | |||
325 | TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b) | ||
326 | TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h) | ||
327 | -- | ||
328 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVSHUF.{B/H/W/D}; | ||
3 | - XVPERM.W; | ||
4 | - XVSHUF4i.{B/H/W/D}; | ||
5 | - XVPERMI.{W/D/Q}; | ||
6 | - XVEXTRINS.{B/H/W/D}. | ||
1 | 7 | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-Id: <20230914022645.1151356-55-gaosong@loongson.cn> | ||
11 | --- | ||
12 | target/loongarch/helper.h | 3 + | ||
13 | target/loongarch/insns.decode | 21 +++ | ||
14 | target/loongarch/disas.c | 21 +++ | ||
15 | target/loongarch/vec_helper.c | 146 ++++++++++++++------ | ||
16 | target/loongarch/insn_trans/trans_vec.c.inc | 30 +++- | ||
17 | 5 files changed, 175 insertions(+), 46 deletions(-) | ||
18 | |||
19 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/loongarch/helper.h | ||
22 | +++ b/target/loongarch/helper.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
24 | DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
25 | DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
26 | |||
27 | +DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
28 | DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
29 | +DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
30 | +DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
31 | |||
32 | DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
33 | DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
34 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/loongarch/insns.decode | ||
37 | +++ b/target/loongarch/insns.decode | ||
38 | @@ -XXX,XX +XXX,XX @@ xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv | ||
39 | xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv | ||
40 | xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv | ||
41 | xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv | ||
42 | + | ||
43 | +xvshuf_b 0000 11010110 ..... ..... ..... ..... @vvvv | ||
44 | +xvshuf_h 0111 01010111 10101 ..... ..... ..... @vvv | ||
45 | +xvshuf_w 0111 01010111 10110 ..... ..... ..... @vvv | ||
46 | +xvshuf_d 0111 01010111 10111 ..... ..... ..... @vvv | ||
47 | + | ||
48 | +xvperm_w 0111 01010111 11010 ..... ..... ..... @vvv | ||
49 | + | ||
50 | +xvshuf4i_b 0111 01111001 00 ........ ..... ..... @vv_ui8 | ||
51 | +xvshuf4i_h 0111 01111001 01 ........ ..... ..... @vv_ui8 | ||
52 | +xvshuf4i_w 0111 01111001 10 ........ ..... ..... @vv_ui8 | ||
53 | +xvshuf4i_d 0111 01111001 11 ........ ..... ..... @vv_ui8 | ||
54 | + | ||
55 | +xvpermi_w 0111 01111110 01 ........ ..... ..... @vv_ui8 | ||
56 | +xvpermi_d 0111 01111110 10 ........ ..... ..... @vv_ui8 | ||
57 | +xvpermi_q 0111 01111110 11 ........ ..... ..... @vv_ui8 | ||
58 | + | ||
59 | +xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8 | ||
60 | +xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8 | ||
61 | +xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8 | ||
62 | +xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8 | ||
63 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/loongarch/disas.c | ||
66 | +++ b/target/loongarch/disas.c | ||
67 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvilvh_b, vvv) | ||
68 | INSN_LASX(xvilvh_h, vvv) | ||
69 | INSN_LASX(xvilvh_w, vvv) | ||
70 | INSN_LASX(xvilvh_d, vvv) | ||
71 | + | ||
72 | +INSN_LASX(xvshuf_b, vvvv) | ||
73 | +INSN_LASX(xvshuf_h, vvv) | ||
74 | +INSN_LASX(xvshuf_w, vvv) | ||
75 | +INSN_LASX(xvshuf_d, vvv) | ||
76 | + | ||
77 | +INSN_LASX(xvperm_w, vvv) | ||
78 | + | ||
79 | +INSN_LASX(xvshuf4i_b, vv_i) | ||
80 | +INSN_LASX(xvshuf4i_h, vv_i) | ||
81 | +INSN_LASX(xvshuf4i_w, vv_i) | ||
82 | +INSN_LASX(xvshuf4i_d, vv_i) | ||
83 | + | ||
84 | +INSN_LASX(xvpermi_w, vv_i) | ||
85 | +INSN_LASX(xvpermi_d, vv_i) | ||
86 | +INSN_LASX(xvpermi_q, vv_i) | ||
87 | + | ||
88 | +INSN_LASX(xvextrins_d, vv_i) | ||
89 | +INSN_LASX(xvextrins_w, vv_i) | ||
90 | +INSN_LASX(xvextrins_h, vv_i) | ||
91 | +INSN_LASX(xvextrins_b, vv_i) | ||
92 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/target/loongarch/vec_helper.c | ||
95 | +++ b/target/loongarch/vec_helper.c | ||
96 | @@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H) | ||
97 | VILVH(vilvh_w, 64, W) | ||
98 | VILVH(vilvh_d, 128, D) | ||
99 | |||
100 | +#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03)) | ||
101 | + | ||
102 | void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) | ||
103 | { | ||
104 | - int i, m; | ||
105 | - VReg temp; | ||
106 | + int i, j, m; | ||
107 | + VReg temp = {}; | ||
108 | VReg *Vd = (VReg *)vd; | ||
109 | VReg *Vj = (VReg *)vj; | ||
110 | VReg *Vk = (VReg *)vk; | ||
111 | VReg *Va = (VReg *)va; | ||
112 | + int oprsz = simd_oprsz(desc); | ||
113 | |||
114 | - m = LSX_LEN/8; | ||
115 | - for (i = 0; i < m ; i++) { | ||
116 | + m = LSX_LEN / 8; | ||
117 | + for (i = 0; i < (oprsz / 16) * m; i++) { | ||
118 | + j = i < m ? 0 : 1; | ||
119 | uint64_t k = (uint8_t)Va->B(i) % (2 * m); | ||
120 | - temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m); | ||
121 | + temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m); | ||
122 | } | ||
123 | *Vd = temp; | ||
124 | } | ||
125 | |||
126 | -#define VSHUF(NAME, BIT, E) \ | ||
127 | -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
128 | -{ \ | ||
129 | - int i, m; \ | ||
130 | - VReg temp; \ | ||
131 | - VReg *Vd = (VReg *)vd; \ | ||
132 | - VReg *Vj = (VReg *)vj; \ | ||
133 | - VReg *Vk = (VReg *)vk; \ | ||
134 | - \ | ||
135 | - m = LSX_LEN/BIT; \ | ||
136 | - for (i = 0; i < m; i++) { \ | ||
137 | - uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ | ||
138 | - temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ | ||
139 | - } \ | ||
140 | - *Vd = temp; \ | ||
141 | +#define VSHUF(NAME, BIT, E) \ | ||
142 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
143 | +{ \ | ||
144 | + int i, j, m; \ | ||
145 | + VReg temp = {}; \ | ||
146 | + VReg *Vd = (VReg *)vd; \ | ||
147 | + VReg *Vj = (VReg *)vj; \ | ||
148 | + VReg *Vk = (VReg *)vk; \ | ||
149 | + int oprsz = simd_oprsz(desc); \ | ||
150 | + \ | ||
151 | + m = LSX_LEN / BIT; \ | ||
152 | + for (i = 0; i < (oprsz / 16) * m; i++) { \ | ||
153 | + j = i < m ? 0 : 1; \ | ||
154 | + uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \ | ||
155 | + temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \ | ||
156 | + } \ | ||
157 | + *Vd = temp; \ | ||
158 | } | ||
159 | |||
160 | VSHUF(vshuf_h, 16, H) | ||
161 | VSHUF(vshuf_w, 32, W) | ||
162 | VSHUF(vshuf_d, 64, D) | ||
163 | |||
164 | -#define VSHUF4I(NAME, BIT, E) \ | ||
165 | -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
166 | -{ \ | ||
167 | - int i; \ | ||
168 | - VReg temp; \ | ||
169 | - VReg *Vd = (VReg *)vd; \ | ||
170 | - VReg *Vj = (VReg *)vj; \ | ||
171 | - \ | ||
172 | - for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
173 | - temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ | ||
174 | - (2 * ((i) & 0x03))) & 0x03)); \ | ||
175 | - } \ | ||
176 | - *Vd = temp; \ | ||
177 | +#define VSHUF4I(NAME, BIT, E) \ | ||
178 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
179 | +{ \ | ||
180 | + int i, j, max; \ | ||
181 | + VReg temp = {}; \ | ||
182 | + VReg *Vd = (VReg *)vd; \ | ||
183 | + VReg *Vj = (VReg *)vj; \ | ||
184 | + int oprsz = simd_oprsz(desc); \ | ||
185 | + \ | ||
186 | + max = LSX_LEN / BIT; \ | ||
187 | + for (i = 0; i < oprsz / (BIT / 8); i++) { \ | ||
188 | + j = i < max ? 1 : 2; \ | ||
189 | + temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \ | ||
190 | + } \ | ||
191 | + *Vd = temp; \ | ||
192 | } | ||
193 | |||
194 | VSHUF4I(vshuf4i_b, 8, B) | ||
195 | @@ -XXX,XX +XXX,XX @@ VSHUF4I(vshuf4i_w, 32, W) | ||
196 | |||
197 | void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
198 | { | ||
199 | + int i; | ||
200 | + VReg temp = {}; | ||
201 | VReg *Vd = (VReg *)vd; | ||
202 | VReg *Vj = (VReg *)vj; | ||
203 | + int oprsz = simd_oprsz(desc); | ||
204 | |||
205 | - VReg temp; | ||
206 | - temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); | ||
207 | - temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); | ||
208 | + for (i = 0; i < oprsz / 16; i++) { | ||
209 | + temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i); | ||
210 | + temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i); | ||
211 | + } | ||
212 | + *Vd = temp; | ||
213 | +} | ||
214 | + | ||
215 | +void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc) | ||
216 | +{ | ||
217 | + int i, m; | ||
218 | + VReg temp = {}; | ||
219 | + VReg *Vd = (VReg *)vd; | ||
220 | + VReg *Vj = (VReg *)vj; | ||
221 | + VReg *Vk = (VReg *)vk; | ||
222 | + | ||
223 | + m = LASX_LEN / 32; | ||
224 | + for (i = 0; i < m ; i++) { | ||
225 | + uint64_t k = (uint8_t)Vk->W(i) % 8; | ||
226 | + temp.W(i) = Vj->W(k); | ||
227 | + } | ||
228 | *Vd = temp; | ||
229 | } | ||
230 | |||
231 | void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
232 | { | ||
233 | + int i; | ||
234 | + VReg temp = {}; | ||
235 | + VReg *Vd = (VReg *)vd; | ||
236 | + VReg *Vj = (VReg *)vj; | ||
237 | + int oprsz = simd_oprsz(desc); | ||
238 | + | ||
239 | + for (i = 0; i < oprsz / 16; i++) { | ||
240 | + temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i); | ||
241 | + temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i); | ||
242 | + temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i); | ||
243 | + temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i); | ||
244 | + } | ||
245 | + *Vd = temp; | ||
246 | +} | ||
247 | + | ||
248 | +void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
249 | +{ | ||
250 | + VReg temp = {}; | ||
251 | + VReg *Vd = (VReg *)vd; | ||
252 | + VReg *Vj = (VReg *)vj; | ||
253 | + | ||
254 | + temp.D(0) = Vj->D(imm & 0x3); | ||
255 | + temp.D(1) = Vj->D((imm >> 2) & 0x3); | ||
256 | + temp.D(2) = Vj->D((imm >> 4) & 0x3); | ||
257 | + temp.D(3) = Vj->D((imm >> 6) & 0x3); | ||
258 | + *Vd = temp; | ||
259 | +} | ||
260 | + | ||
261 | +void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) | ||
262 | +{ | ||
263 | + int i; | ||
264 | VReg temp; | ||
265 | VReg *Vd = (VReg *)vd; | ||
266 | VReg *Vj = (VReg *)vj; | ||
267 | |||
268 | - temp.W(0) = Vj->W(imm & 0x3); | ||
269 | - temp.W(1) = Vj->W((imm >> 2) & 0x3); | ||
270 | - temp.W(2) = Vd->W((imm >> 4) & 0x3); | ||
271 | - temp.W(3) = Vd->W((imm >> 6) & 0x3); | ||
272 | + for (i = 0; i < 2; i++, imm >>= 4) { | ||
273 | + temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1); | ||
274 | + } | ||
275 | *Vd = temp; | ||
276 | } | ||
277 | |||
278 | #define VEXTRINS(NAME, BIT, E, MASK) \ | ||
279 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
280 | { \ | ||
281 | - int ins, extr; \ | ||
282 | + int i, ins, extr, max; \ | ||
283 | VReg *Vd = (VReg *)vd; \ | ||
284 | VReg *Vj = (VReg *)vj; \ | ||
285 | + int oprsz = simd_oprsz(desc); \ | ||
286 | \ | ||
287 | + max = LSX_LEN / BIT; \ | ||
288 | ins = (imm >> 4) & MASK; \ | ||
289 | extr = imm & MASK; \ | ||
290 | - Vd->E(ins) = Vj->E(extr); \ | ||
291 | + for (i = 0; i < oprsz / 16; i++) { \ | ||
292 | + Vd->E(ins + i * max) = Vj->E(extr + i * max); \ | ||
293 | + } \ | ||
294 | } | ||
295 | |||
296 | VEXTRINS(vextrins_b, 8, B, 0xf) | ||
297 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
298 | index XXXXXXX..XXXXXXX 100644 | ||
299 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
300 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
301 | @@ -XXX,XX +XXX,XX @@ static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a, | ||
302 | static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
303 | gen_helper_gvec_4 *fn) | ||
304 | { | ||
305 | + if (!check_vec(ctx, oprsz)) { | ||
306 | + return true; | ||
307 | + } | ||
308 | + | ||
309 | tcg_gen_gvec_4_ool(vec_full_offset(a->vd), | ||
310 | vec_full_offset(a->vj), | ||
311 | vec_full_offset(a->vk), | ||
312 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, | ||
313 | static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
314 | gen_helper_gvec_4 *fn) | ||
315 | { | ||
316 | - if (!check_vec(ctx, 16)) { | ||
317 | - return true; | ||
318 | - } | ||
319 | - | ||
320 | return gen_vvvv_vl(ctx, a, 16, fn); | ||
321 | } | ||
322 | |||
323 | +static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a, | ||
324 | + gen_helper_gvec_4 *fn) | ||
325 | +{ | ||
326 | + return gen_vvvv_vl(ctx, a, 32, fn); | ||
327 | +} | ||
328 | + | ||
329 | static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, | ||
330 | gen_helper_gvec_3_ptr *fn) | ||
331 | { | ||
332 | @@ -XXX,XX +XXX,XX @@ TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b) | ||
333 | TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h) | ||
334 | TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w) | ||
335 | TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d) | ||
336 | +TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b) | ||
337 | +TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h) | ||
338 | +TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w) | ||
339 | +TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d) | ||
340 | TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b) | ||
341 | TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h) | ||
342 | TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w) | ||
343 | TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d) | ||
344 | +TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b) | ||
345 | +TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h) | ||
346 | +TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w) | ||
347 | +TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d) | ||
348 | |||
349 | +TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w) | ||
350 | TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w) | ||
351 | +TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w) | ||
352 | +TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d) | ||
353 | +TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q) | ||
354 | |||
355 | TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b) | ||
356 | TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h) | ||
357 | TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w) | ||
358 | TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d) | ||
359 | +TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b) | ||
360 | +TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h) | ||
361 | +TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w) | ||
362 | +TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d) | ||
363 | |||
364 | static bool trans_vld(DisasContext *ctx, arg_vr_i *a) | ||
365 | { | ||
366 | -- | ||
367 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - XVLD[X], XVST[X]; | ||
3 | - XVLDREPL.{B/H/W/D}; | ||
4 | - XVSTELM.{B/H/W/D}. | ||
1 | 5 | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230914022645.1151356-56-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/insns.decode | 18 ++ | ||
11 | target/loongarch/disas.c | 24 +++ | ||
12 | target/loongarch/insn_trans/trans_vec.c.inc | 212 ++++++++++++++------ | ||
13 | 3 files changed, 194 insertions(+), 60 deletions(-) | ||
14 | |||
15 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/insns.decode | ||
18 | +++ b/target/loongarch/insns.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
20 | @vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2 | ||
21 | @vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1 | ||
22 | @vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii | ||
23 | +@vr_i8i2x .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s3 | ||
24 | +@vr_i8i3x .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s2 | ||
25 | +@vr_i8i4x .... ...... imm2:4 ........ rj:5 vd:5 &vr_ii imm=%i8s1 | ||
26 | +@vr_i8i5x .... ..... imm2:5 imm:s8 rj:5 vd:5 &vr_ii | ||
27 | @vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr | ||
28 | @v_i13 .... ........ .. imm:13 vd:5 &v_i | ||
29 | |||
30 | @@ -XXX,XX +XXX,XX @@ xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8 | ||
31 | xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8 | ||
32 | xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8 | ||
33 | xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8 | ||
34 | + | ||
35 | +xvld 0010 110010 ............ ..... ..... @vr_i12 | ||
36 | +xvst 0010 110011 ............ ..... ..... @vr_i12 | ||
37 | +xvldx 0011 10000100 10000 ..... ..... ..... @vrr | ||
38 | +xvstx 0011 10000100 11000 ..... ..... ..... @vrr | ||
39 | + | ||
40 | +xvldrepl_d 0011 00100001 0 ......... ..... ..... @vr_i9 | ||
41 | +xvldrepl_w 0011 00100010 .......... ..... ..... @vr_i10 | ||
42 | +xvldrepl_h 0011 0010010 ........... ..... ..... @vr_i11 | ||
43 | +xvldrepl_b 0011 001010 ............ ..... ..... @vr_i12 | ||
44 | +xvstelm_d 0011 00110001 .. ........ ..... ..... @vr_i8i2x | ||
45 | +xvstelm_w 0011 0011001 ... ........ ..... ..... @vr_i8i3x | ||
46 | +xvstelm_h 0011 001101 .... ........ ..... ..... @vr_i8i4x | ||
47 | +xvstelm_b 0011 00111 ..... ........ ..... ..... @vr_i8i5x | ||
48 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/loongarch/disas.c | ||
51 | +++ b/target/loongarch/disas.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic) | ||
53 | output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk); | ||
54 | } | ||
55 | |||
56 | +static void output_vrr_x(DisasContext *ctx, arg_vrr *a, const char *mnemonic) | ||
57 | +{ | ||
58 | + output(ctx, mnemonic, "x%d, r%d, r%d", a->vd, a->rj, a->rk); | ||
59 | +} | ||
60 | + | ||
61 | +static void output_vr_ii_x(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic) | ||
62 | +{ | ||
63 | + output(ctx, mnemonic, "x%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2); | ||
64 | +} | ||
65 | + | ||
66 | INSN_LASX(xvadd_b, vvv) | ||
67 | INSN_LASX(xvadd_h, vvv) | ||
68 | INSN_LASX(xvadd_w, vvv) | ||
69 | @@ -XXX,XX +XXX,XX @@ INSN_LASX(xvextrins_d, vv_i) | ||
70 | INSN_LASX(xvextrins_w, vv_i) | ||
71 | INSN_LASX(xvextrins_h, vv_i) | ||
72 | INSN_LASX(xvextrins_b, vv_i) | ||
73 | + | ||
74 | +INSN_LASX(xvld, vr_i) | ||
75 | +INSN_LASX(xvst, vr_i) | ||
76 | +INSN_LASX(xvldx, vrr) | ||
77 | +INSN_LASX(xvstx, vrr) | ||
78 | + | ||
79 | +INSN_LASX(xvldrepl_d, vr_i) | ||
80 | +INSN_LASX(xvldrepl_w, vr_i) | ||
81 | +INSN_LASX(xvldrepl_h, vr_i) | ||
82 | +INSN_LASX(xvldrepl_b, vr_i) | ||
83 | +INSN_LASX(xvstelm_d, vr_ii) | ||
84 | +INSN_LASX(xvstelm_w, vr_ii) | ||
85 | +INSN_LASX(xvstelm_h, vr_ii) | ||
86 | +INSN_LASX(xvstelm_b, vr_ii) | ||
87 | diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/loongarch/insn_trans/trans_vec.c.inc | ||
90 | +++ b/target/loongarch/insn_trans/trans_vec.c.inc | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a) | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | -#define VLDREPL(NAME, MO) \ | ||
96 | -static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \ | ||
97 | -{ \ | ||
98 | - TCGv addr; \ | ||
99 | - TCGv_i64 val; \ | ||
100 | - \ | ||
101 | - if (!avail_LSX(ctx)) { \ | ||
102 | - return false; \ | ||
103 | - } \ | ||
104 | - \ | ||
105 | - if (!check_vec(ctx, 16)) { \ | ||
106 | - return true; \ | ||
107 | - } \ | ||
108 | - \ | ||
109 | - addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
110 | - val = tcg_temp_new_i64(); \ | ||
111 | - \ | ||
112 | - addr = make_address_i(ctx, addr, a->imm); \ | ||
113 | - \ | ||
114 | - tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \ | ||
115 | - tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \ | ||
116 | - \ | ||
117 | - return true; \ | ||
118 | -} | ||
119 | - | ||
120 | -VLDREPL(vldrepl_b, MO_8) | ||
121 | -VLDREPL(vldrepl_h, MO_16) | ||
122 | -VLDREPL(vldrepl_w, MO_32) | ||
123 | -VLDREPL(vldrepl_d, MO_64) | ||
124 | - | ||
125 | -#define VSTELM(NAME, MO, E) \ | ||
126 | -static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \ | ||
127 | -{ \ | ||
128 | - TCGv addr; \ | ||
129 | - TCGv_i64 val; \ | ||
130 | - \ | ||
131 | - if (!avail_LSX(ctx)) { \ | ||
132 | - return false; \ | ||
133 | - } \ | ||
134 | - \ | ||
135 | - if (!check_vec(ctx, 16)) { \ | ||
136 | - return true; \ | ||
137 | - } \ | ||
138 | - \ | ||
139 | - addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
140 | - val = tcg_temp_new_i64(); \ | ||
141 | - \ | ||
142 | - addr = make_address_i(ctx, addr, a->imm); \ | ||
143 | - \ | ||
144 | - tcg_gen_ld_i64(val, cpu_env, \ | ||
145 | - offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \ | ||
146 | - tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \ | ||
147 | - \ | ||
148 | - return true; \ | ||
149 | -} | ||
150 | - | ||
151 | -VSTELM(vstelm_b, MO_8, B) | ||
152 | -VSTELM(vstelm_h, MO_16, H) | ||
153 | -VSTELM(vstelm_w, MO_32, W) | ||
154 | -VSTELM(vstelm_d, MO_64, D) | ||
155 | +static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a, | ||
156 | + uint32_t oprsz, MemOp mop) | ||
157 | +{ | ||
158 | + TCGv addr; | ||
159 | + TCGv_i64 val; | ||
160 | + | ||
161 | + if (!check_vec(ctx, oprsz)) { | ||
162 | + return true; | ||
163 | + } | ||
164 | + | ||
165 | + addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
166 | + val = tcg_temp_new_i64(); | ||
167 | + | ||
168 | + addr = make_address_i(ctx, addr, a->imm); | ||
169 | + | ||
170 | + tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop); | ||
171 | + tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val); | ||
172 | + | ||
173 | + return true; | ||
174 | +} | ||
175 | + | ||
176 | +static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) | ||
177 | +{ | ||
178 | + return do_vldrepl_vl(ctx, a, 16, mop); | ||
179 | +} | ||
180 | + | ||
181 | +static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) | ||
182 | +{ | ||
183 | + return do_vldrepl_vl(ctx, a, 32, mop); | ||
184 | +} | ||
185 | + | ||
186 | +TRANS(vldrepl_b, LSX, do_vldrepl, MO_8) | ||
187 | +TRANS(vldrepl_h, LSX, do_vldrepl, MO_16) | ||
188 | +TRANS(vldrepl_w, LSX, do_vldrepl, MO_32) | ||
189 | +TRANS(vldrepl_d, LSX, do_vldrepl, MO_64) | ||
190 | +TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8) | ||
191 | +TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16) | ||
192 | +TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32) | ||
193 | +TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64) | ||
194 | + | ||
195 | +static bool do_vstelm_vl(DisasContext *ctx, | ||
196 | + arg_vr_ii *a, uint32_t oprsz, MemOp mop) | ||
197 | +{ | ||
198 | + TCGv addr; | ||
199 | + TCGv_i64 val; | ||
200 | + | ||
201 | + if (!check_vec(ctx, oprsz)) { | ||
202 | + return true; | ||
203 | + } | ||
204 | + | ||
205 | + addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
206 | + val = tcg_temp_new_i64(); | ||
207 | + | ||
208 | + addr = make_address_i(ctx, addr, a->imm); | ||
209 | + tcg_gen_ld_i64(val, cpu_env, vec_reg_offset(a->vd, a->imm2, mop)); | ||
210 | + tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop); | ||
211 | + return true; | ||
212 | +} | ||
213 | + | ||
214 | +static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) | ||
215 | +{ | ||
216 | + return do_vstelm_vl(ctx, a, 16, mop); | ||
217 | +} | ||
218 | + | ||
219 | +static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) | ||
220 | +{ | ||
221 | + return do_vstelm_vl(ctx, a, 32, mop); | ||
222 | +} | ||
223 | + | ||
224 | +TRANS(vstelm_b, LSX, do_vstelm, MO_8) | ||
225 | +TRANS(vstelm_h, LSX, do_vstelm, MO_16) | ||
226 | +TRANS(vstelm_w, LSX, do_vstelm, MO_32) | ||
227 | +TRANS(vstelm_d, LSX, do_vstelm, MO_64) | ||
228 | +TRANS(xvstelm_b, LASX, do_xvstelm, MO_8) | ||
229 | +TRANS(xvstelm_h, LASX, do_xvstelm, MO_16) | ||
230 | +TRANS(xvstelm_w, LASX, do_xvstelm, MO_32) | ||
231 | +TRANS(xvstelm_d, LASX, do_xvstelm, MO_64) | ||
232 | + | ||
233 | +static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a, | ||
234 | + void (*func)(DisasContext *, int, TCGv)) | ||
235 | +{ | ||
236 | + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
237 | + TCGv temp = NULL; | ||
238 | + | ||
239 | + if (!check_vec(ctx, 32)) { | ||
240 | + return true; | ||
241 | + } | ||
242 | + | ||
243 | + if (a->imm) { | ||
244 | + temp = tcg_temp_new(); | ||
245 | + tcg_gen_addi_tl(temp, addr, a->imm); | ||
246 | + addr = temp; | ||
247 | + } | ||
248 | + | ||
249 | + func(ctx, a->vd, addr); | ||
250 | + return true; | ||
251 | +} | ||
252 | + | ||
253 | +static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr) | ||
254 | +{ | ||
255 | + int i; | ||
256 | + TCGv temp = tcg_temp_new(); | ||
257 | + TCGv dest = tcg_temp_new(); | ||
258 | + | ||
259 | + tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ); | ||
260 | + set_vreg64(dest, vreg, 0); | ||
261 | + | ||
262 | + for (i = 1; i < 4; i++) { | ||
263 | + tcg_gen_addi_tl(temp, addr, 8 * i); | ||
264 | + tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ); | ||
265 | + set_vreg64(dest, vreg, i); | ||
266 | + } | ||
267 | +} | ||
268 | + | ||
269 | +static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr) | ||
270 | +{ | ||
271 | + int i; | ||
272 | + TCGv temp = tcg_temp_new(); | ||
273 | + TCGv dest = tcg_temp_new(); | ||
274 | + | ||
275 | + get_vreg64(dest, vreg, 0); | ||
276 | + tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ); | ||
277 | + | ||
278 | + for (i = 1; i < 4; i++) { | ||
279 | + tcg_gen_addi_tl(temp, addr, 8 * i); | ||
280 | + get_vreg64(dest, vreg, i); | ||
281 | + tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ); | ||
282 | + } | ||
283 | +} | ||
284 | + | ||
285 | +TRANS(xvld, LASX, gen_lasx_memory, gen_xvld) | ||
286 | +TRANS(xvst, LASX, gen_lasx_memory, gen_xvst) | ||
287 | + | ||
288 | +static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a, | ||
289 | + void (*func)(DisasContext*, int, TCGv)) | ||
290 | +{ | ||
291 | + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
292 | + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
293 | + TCGv addr = tcg_temp_new(); | ||
294 | + | ||
295 | + if (!check_vec(ctx, 32)) { | ||
296 | + return true; | ||
297 | + } | ||
298 | + | ||
299 | + tcg_gen_add_tl(addr, src1, src2); | ||
300 | + func(ctx, a->vd, addr); | ||
301 | + | ||
302 | + return true; | ||
303 | +} | ||
304 | + | ||
305 | +TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld) | ||
306 | +TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst) | ||
307 | -- | ||
308 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-57-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/vec.h | 42 ++++++++++++++++++++++++++++++ | ||
6 | target/loongarch/vec_helper.c | 48 ----------------------------------- | ||
7 | 2 files changed, 42 insertions(+), 48 deletions(-) | ||
1 | 8 | ||
9 | diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/target/loongarch/vec.h | ||
12 | +++ b/target/loongarch/vec.h | ||
13 | @@ -XXX,XX +XXX,XX @@ | ||
14 | #define Q(x) Q[x] | ||
15 | #endif /* HOST_BIG_ENDIAN */ | ||
16 | |||
17 | +#define DO_ADD(a, b) (a + b) | ||
18 | +#define DO_SUB(a, b) (a - b) | ||
19 | +#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) | ||
20 | +#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) | ||
21 | +#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) | ||
22 | +#define DO_VABS(a) ((a < 0) ? (-a) : (a)) | ||
23 | +#define DO_MIN(a, b) (a < b ? a : b) | ||
24 | +#define DO_MAX(a, b) (a > b ? a : b) | ||
25 | +#define DO_MUL(a, b) (a * b) | ||
26 | +#define DO_MADD(a, b, c) (a + b * c) | ||
27 | +#define DO_MSUB(a, b, c) (a - b * c) | ||
28 | + | ||
29 | +#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) | ||
30 | +#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) | ||
31 | +#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ | ||
32 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) | ||
33 | +#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ | ||
34 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | ||
35 | + | ||
36 | +#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) | ||
37 | + | ||
38 | +#define R_SHIFT(a, b) (a >> b) | ||
39 | + | ||
40 | +#define DO_CLO_B(N) (clz32(~N & 0xff) - 24) | ||
41 | +#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) | ||
42 | +#define DO_CLO_W(N) (clz32(~N)) | ||
43 | +#define DO_CLO_D(N) (clz64(~N)) | ||
44 | +#define DO_CLZ_B(N) (clz32(N) - 24) | ||
45 | +#define DO_CLZ_H(N) (clz32(N) - 16) | ||
46 | +#define DO_CLZ_W(N) (clz32(N)) | ||
47 | +#define DO_CLZ_D(N) (clz64(N)) | ||
48 | + | ||
49 | +#define DO_BITCLR(a, bit) (a & ~(1ull << bit)) | ||
50 | +#define DO_BITSET(a, bit) (a | 1ull << bit) | ||
51 | +#define DO_BITREV(a, bit) (a ^ (1ull << bit)) | ||
52 | + | ||
53 | +#define VSEQ(a, b) (a == b ? -1 : 0) | ||
54 | +#define VSLE(a, b) (a <= b ? -1 : 0) | ||
55 | +#define VSLT(a, b) (a < b ? -1 : 0) | ||
56 | + | ||
57 | +#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03)) | ||
58 | + | ||
59 | #endif /* LOONGARCH_VEC_H */ | ||
60 | diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/vec_helper.c | ||
63 | +++ b/target/loongarch/vec_helper.c | ||
64 | @@ -XXX,XX +XXX,XX @@ | ||
65 | #include "vec.h" | ||
66 | #include "tcg/tcg-gvec-desc.h" | ||
67 | |||
68 | -#define DO_ADD(a, b) (a + b) | ||
69 | -#define DO_SUB(a, b) (a - b) | ||
70 | - | ||
71 | #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
72 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
73 | { \ | ||
74 | @@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
75 | DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) | ||
76 | DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
77 | |||
78 | -#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) | ||
79 | -#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) | ||
80 | - | ||
81 | #define DO_3OP(NAME, BIT, E, DO_OP) \ | ||
82 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
83 | { \ | ||
84 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) | ||
85 | DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) | ||
86 | DO_3OP(vavgr_du, 64, UD, DO_VAVGR) | ||
87 | |||
88 | -#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) | ||
89 | - | ||
90 | DO_3OP(vabsd_b, 8, B, DO_VABSD) | ||
91 | DO_3OP(vabsd_h, 16, H, DO_VABSD) | ||
92 | DO_3OP(vabsd_w, 32, W, DO_VABSD) | ||
93 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_hu, 16, UH, DO_VABSD) | ||
94 | DO_3OP(vabsd_wu, 32, UW, DO_VABSD) | ||
95 | DO_3OP(vabsd_du, 64, UD, DO_VABSD) | ||
96 | |||
97 | -#define DO_VABS(a) ((a < 0) ? (-a) : (a)) | ||
98 | - | ||
99 | #define DO_VADDA(NAME, BIT, E) \ | ||
100 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
101 | { \ | ||
102 | @@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_h, 16, H) | ||
103 | DO_VADDA(vadda_w, 32, W) | ||
104 | DO_VADDA(vadda_d, 64, D) | ||
105 | |||
106 | -#define DO_MIN(a, b) (a < b ? a : b) | ||
107 | -#define DO_MAX(a, b) (a > b ? a : b) | ||
108 | - | ||
109 | #define VMINMAXI(NAME, BIT, E, DO_OP) \ | ||
110 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
111 | { \ | ||
112 | @@ -XXX,XX +XXX,XX @@ DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) | ||
113 | DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) | ||
114 | DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) | ||
115 | |||
116 | -#define DO_MUL(a, b) (a * b) | ||
117 | - | ||
118 | DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) | ||
119 | DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) | ||
120 | DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) | ||
121 | @@ -XXX,XX +XXX,XX @@ DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
122 | DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
123 | DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
124 | |||
125 | -#define DO_MADD(a, b, c) (a + b * c) | ||
126 | -#define DO_MSUB(a, b, c) (a - b * c) | ||
127 | - | ||
128 | #define VMADDSUB(NAME, BIT, E, DO_OP) \ | ||
129 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
130 | { \ | ||
131 | @@ -XXX,XX +XXX,XX @@ VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
132 | VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
133 | VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
134 | |||
135 | -#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) | ||
136 | -#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) | ||
137 | -#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ | ||
138 | - unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) | ||
139 | -#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ | ||
140 | - unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | ||
141 | - | ||
142 | #define VDIV(NAME, BIT, E, DO_OP) \ | ||
143 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
144 | { \ | ||
145 | @@ -XXX,XX +XXX,XX @@ VEXT2XV(vext2xv_wu_hu, 32, UW, UH) | ||
146 | VEXT2XV(vext2xv_du_hu, 64, UD, UH) | ||
147 | VEXT2XV(vext2xv_du_wu, 64, UD, UW) | ||
148 | |||
149 | -#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) | ||
150 | - | ||
151 | DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) | ||
152 | DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) | ||
153 | DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) | ||
154 | @@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_h, 16, H) | ||
155 | VSRARI(vsrari_w, 32, W) | ||
156 | VSRARI(vsrari_d, 64, D) | ||
157 | |||
158 | -#define R_SHIFT(a, b) (a >> b) | ||
159 | - | ||
160 | #define VSRLN(NAME, BIT, E1, E2) \ | ||
161 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
162 | { \ | ||
163 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ | ||
164 | } \ | ||
165 | } | ||
166 | |||
167 | -#define DO_CLO_B(N) (clz32(~N & 0xff) - 24) | ||
168 | -#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) | ||
169 | -#define DO_CLO_W(N) (clz32(~N)) | ||
170 | -#define DO_CLO_D(N) (clz64(~N)) | ||
171 | -#define DO_CLZ_B(N) (clz32(N) - 24) | ||
172 | -#define DO_CLZ_H(N) (clz32(N) - 16) | ||
173 | -#define DO_CLZ_W(N) (clz32(N)) | ||
174 | -#define DO_CLZ_D(N) (clz64(N)) | ||
175 | - | ||
176 | DO_2OP(vclo_b, 8, UB, DO_CLO_B) | ||
177 | DO_2OP(vclo_h, 16, UH, DO_CLO_H) | ||
178 | DO_2OP(vclo_w, 32, UW, DO_CLO_W) | ||
179 | @@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_h, 16, UH, ctpop16) | ||
180 | VPCNT(vpcnt_w, 32, UW, ctpop32) | ||
181 | VPCNT(vpcnt_d, 64, UD, ctpop64) | ||
182 | |||
183 | -#define DO_BITCLR(a, bit) (a & ~(1ull << bit)) | ||
184 | -#define DO_BITSET(a, bit) (a | 1ull << bit) | ||
185 | -#define DO_BITREV(a, bit) (a ^ (1ull << bit)) | ||
186 | - | ||
187 | #define DO_BIT(NAME, BIT, E, DO_OP) \ | ||
188 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ | ||
189 | { \ | ||
190 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, | ||
191 | *Vd = temp; | ||
192 | } | ||
193 | |||
194 | -#define VSEQ(a, b) (a == b ? -1 : 0) | ||
195 | -#define VSLE(a, b) (a <= b ? -1 : 0) | ||
196 | -#define VSLT(a, b) (a < b ? -1 : 0) | ||
197 | - | ||
198 | #define VCMPI(NAME, BIT, E, DO_OP) \ | ||
199 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ | ||
200 | { \ | ||
201 | @@ -XXX,XX +XXX,XX @@ VILVH(vilvh_h, 32, H) | ||
202 | VILVH(vilvh_w, 64, W) | ||
203 | VILVH(vilvh_d, 128, D) | ||
204 | |||
205 | -#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03)) | ||
206 | - | ||
207 | void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) | ||
208 | { | ||
209 | int i, j, m; | ||
210 | -- | ||
211 | 2.39.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
2 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | Message-Id: <20230914022645.1151356-58-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/cpu.c | 1 + | ||
6 | 1 file changed, 1 insertion(+) | ||
1 | 7 | ||
8 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/loongarch/cpu.c | ||
11 | +++ b/target/loongarch/cpu.c | ||
12 | @@ -XXX,XX +XXX,XX @@ static void loongarch_la464_initfn(Object *obj) | ||
13 | data = FIELD_DP32(data, CPUCFG2, FP_DP, 1); | ||
14 | data = FIELD_DP32(data, CPUCFG2, FP_VER, 1); | ||
15 | data = FIELD_DP32(data, CPUCFG2, LSX, 1), | ||
16 | + data = FIELD_DP32(data, CPUCFG2, LASX, 1), | ||
17 | data = FIELD_DP32(data, CPUCFG2, LLFTP, 1); | ||
18 | data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1); | ||
19 | data = FIELD_DP32(data, CPUCFG2, LSPW, 1); | ||
20 | -- | ||
21 | 2.39.1 | diff view generated by jsdifflib |