1 | The following changes since commit f6b761bdbd8ba63cee7428d52fb6b46e4224ddab: | 1 | The following changes since commit 7433709a147706ad7d1956b15669279933d0f82b: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'qga-pull-2023-05-04' of https://github.com/kostyanf14/qemu into staging (2023-05-04 12:08:00 +0100) | 3 | Merge tag 'hw-misc-20250113' of https://github.com/philmd/qemu into staging (2025-01-14 12:46:56 -0500) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230505 | 7 | https://gitlab.com/bibo-mao/qemu.git tags/pull-loongarch-20250116 |
8 | 8 | ||
9 | for you to fetch changes up to 9dd207d409cc2eb08fe52965b9d1fd4a12a82bd5: | 9 | for you to fetch changes up to bb81f237401b5f89f6bba21d9d4f50e0073372a6: |
10 | 10 | ||
11 | hw/intc: don't use target_ulong for LoongArch ipi (2023-05-05 10:00:47 +0800) | 11 | hw/intc/loongarch_ipi: Use alternative implemation for cpu_by_arch_id (2025-01-15 14:36:19 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Add LoongArch LSX instructions. | 14 | pull-loongarch-20250116 queue |
15 | 15 | ||
16 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
17 | Alex Bennée (1): | 17 | Bibo Mao (7): |
18 | hw/intc: don't use target_ulong for LoongArch ipi | 18 | hw/intc/loongarch_ipi: Implement realize interface |
19 | hw/intc/loongson_ipi: Remove num_cpu from loongson_ipi_common | ||
20 | hw/intc/loongson_ipi: Remove property num_cpu from loongson_ipi_common | ||
21 | hw/intc/loongarch_ipi: Get cpu number from possible_cpu_arch_ids | ||
22 | hw/intc/loongarch_ipi: Remove property num-cpu | ||
23 | hw/intc/loongson_ipi: Add more input parameter for cpu_by_arch_id | ||
24 | hw/intc/loongarch_ipi: Use alternative implemation for cpu_by_arch_id | ||
19 | 25 | ||
20 | Song Gao (44): | 26 | Miao Hao (1): |
21 | target/loongarch: Add LSX data type VReg | 27 | target/loongarch: Add page table walker support for debugger usage |
22 | target/loongarch: meson.build support build LSX | ||
23 | target/loongarch: Add CHECK_SXE maccro for check LSX enable | ||
24 | target/loongarch: Implement vadd/vsub | ||
25 | target/loongarch: Implement vaddi/vsubi | ||
26 | target/loongarch: Implement vneg | ||
27 | target/loongarch: Implement vsadd/vssub | ||
28 | target/loongarch: Implement vhaddw/vhsubw | ||
29 | target/loongarch: Implement vaddw/vsubw | ||
30 | target/loongarch: Implement vavg/vavgr | ||
31 | target/loongarch: Implement vabsd | ||
32 | target/loongarch: Implement vadda | ||
33 | target/loongarch: Implement vmax/vmin | ||
34 | target/loongarch: Implement vmul/vmuh/vmulw{ev/od} | ||
35 | target/loongarch: Implement vmadd/vmsub/vmaddw{ev/od} | ||
36 | target/loongarch: Implement vdiv/vmod | ||
37 | target/loongarch: Implement vsat | ||
38 | target/loongarch: Implement vexth | ||
39 | target/loongarch: Implement vsigncov | ||
40 | target/loongarch: Implement vmskltz/vmskgez/vmsknz | ||
41 | target/loongarch: Implement LSX logic instructions | ||
42 | target/loongarch: Implement vsll vsrl vsra vrotr | ||
43 | target/loongarch: Implement vsllwil vextl | ||
44 | target/loongarch: Implement vsrlr vsrar | ||
45 | target/loongarch: Implement vsrln vsran | ||
46 | target/loongarch: Implement vsrlrn vsrarn | ||
47 | target/loongarch: Implement vssrln vssran | ||
48 | target/loongarch: Implement vssrlrn vssrarn | ||
49 | target/loongarch: Implement vclo vclz | ||
50 | target/loongarch: Implement vpcnt | ||
51 | target/loongarch: Implement vbitclr vbitset vbitrev | ||
52 | target/loongarch: Implement vfrstp | ||
53 | target/loongarch: Implement LSX fpu arith instructions | ||
54 | target/loongarch: Implement LSX fpu fcvt instructions | ||
55 | target/loongarch: Implement vseq vsle vslt | ||
56 | target/loongarch: Implement vfcmp | ||
57 | target/loongarch: Implement vbitsel vset | ||
58 | target/loongarch: Implement vinsgr2vr vpickve2gr vreplgr2vr | ||
59 | target/loongarch: Implement vreplve vpack vpick | ||
60 | target/loongarch: Implement vilvl vilvh vextrins vshuf | ||
61 | target/loongarch: Implement vld vst | ||
62 | target/loongarch: Implement vldi | ||
63 | target/loongarch: Use {set/get}_gpr replace to cpu_fpr | ||
64 | target/loongarch: CPUCFG support LSX | ||
65 | 28 | ||
66 | hw/intc/loongarch_ipi.c | 2 +- | 29 | hw/intc/loongarch_ipi.c | 69 ++++++++++++++++++------- |
67 | linux-user/loongarch64/signal.c | 4 +- | 30 | hw/intc/loongson_ipi.c | 43 +++++++++++++++- |
68 | target/loongarch/cpu.c | 5 +- | 31 | hw/intc/loongson_ipi_common.c | 41 +++++---------- |
69 | target/loongarch/cpu.h | 27 +- | 32 | hw/loongarch/virt.c | 1 - |
70 | target/loongarch/disas.c | 911 +++++ | 33 | include/hw/intc/loongarch_ipi.h | 1 + |
71 | target/loongarch/fpu_helper.c | 2 +- | 34 | include/hw/intc/loongson_ipi_common.h | 5 +- |
72 | target/loongarch/gdbstub.c | 4 +- | 35 | target/loongarch/cpu_helper.c | 94 +++++++++++++++++++++++++++++++++-- |
73 | target/loongarch/helper.h | 566 +++ | 36 | target/loongarch/internals.h | 4 +- |
74 | target/loongarch/insn_trans/trans_farith.c.inc | 72 +- | 37 | target/loongarch/tcg/tlb_helper.c | 4 +- |
75 | target/loongarch/insn_trans/trans_fcmp.c.inc | 12 +- | 38 | 9 files changed, 203 insertions(+), 59 deletions(-) |
76 | target/loongarch/insn_trans/trans_fmemory.c.inc | 37 +- | ||
77 | target/loongarch/insn_trans/trans_fmov.c.inc | 31 +- | ||
78 | target/loongarch/insn_trans/trans_lsx.c.inc | 4400 +++++++++++++++++++++++ | ||
79 | target/loongarch/insns.decode | 811 +++++ | ||
80 | target/loongarch/internals.h | 23 + | ||
81 | target/loongarch/lsx_helper.c | 3004 ++++++++++++++++ | ||
82 | target/loongarch/machine.c | 79 +- | ||
83 | target/loongarch/meson.build | 1 + | ||
84 | target/loongarch/translate.c | 55 +- | ||
85 | target/loongarch/translate.h | 1 + | ||
86 | 20 files changed, 9989 insertions(+), 58 deletions(-) | ||
87 | create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc | ||
88 | create mode 100644 target/loongarch/lsx_helper.c | ||
89 | |||
90 | diff view generated by jsdifflib |
1 | This patch includes: | 1 | From: Miao Hao <haomiao23s@ict.ac.cn> |
---|---|---|---|
2 | - VF{ADD/SUB/MUL/DIV}.{S/D}; | ||
3 | - VF{MADD/MSUB/NMADD/NMSUB}.{S/D}; | ||
4 | - VF{MAX/MIN}.{S/D}; | ||
5 | - VF{MAXA/MINA}.{S/D}; | ||
6 | - VFLOGB.{S/D}; | ||
7 | - VFCLASS.{S/D}; | ||
8 | - VF{SQRT/RECIP/RSQRT}.{S/D}. | ||
9 | 2 | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 3 | When dump memory content with gva address, software page table walker is |
11 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 4 | necessary to get responding gpa address. |
12 | Message-Id: <20230504122810.4094787-34-gaosong@loongson.cn> | 5 | |
6 | Here page table walker is added for debugger usage. | ||
7 | |||
8 | Signed-off-by: Miao Hao <haomiao23s@ict.ac.cn> | ||
9 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> | ||
10 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> | ||
13 | --- | 11 | --- |
14 | target/loongarch/cpu.h | 4 + | 12 | target/loongarch/cpu_helper.c | 94 +++++++++++++++++++++++++++++-- |
15 | target/loongarch/disas.c | 46 +++++ | 13 | target/loongarch/internals.h | 4 +- |
16 | target/loongarch/fpu_helper.c | 2 +- | 14 | target/loongarch/tcg/tlb_helper.c | 4 +- |
17 | target/loongarch/helper.h | 41 +++++ | 15 | 3 files changed, 94 insertions(+), 8 deletions(-) |
18 | target/loongarch/insn_trans/trans_lsx.c.inc | 55 ++++++ | ||
19 | target/loongarch/insns.decode | 43 +++++ | ||
20 | target/loongarch/internals.h | 1 + | ||
21 | target/loongarch/lsx_helper.c | 186 ++++++++++++++++++++ | ||
22 | 8 files changed, 377 insertions(+), 1 deletion(-) | ||
23 | 16 | ||
24 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | 17 | diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c |
25 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/loongarch/cpu.h | 19 | --- a/target/loongarch/cpu_helper.c |
27 | +++ b/target/loongarch/cpu.h | 20 | +++ b/target/loongarch/cpu_helper.c |
28 | @@ -XXX,XX +XXX,XX @@ FIELD(FCSR0, CAUSE, 24, 5) | 21 | @@ -XXX,XX +XXX,XX @@ bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, |
29 | do { \ | 22 | return false; |
30 | (REG) = FIELD_DP32(REG, FCSR0, CAUSE, V); \ | ||
31 | } while (0) | ||
32 | +#define UPDATE_FP_CAUSE(REG, V) \ | ||
33 | + do { \ | ||
34 | + (REG) |= FIELD_DP32(0, FCSR0, CAUSE, V); \ | ||
35 | + } while (0) | ||
36 | |||
37 | #define GET_FP_ENABLES(REG) FIELD_EX32(REG, FCSR0, ENABLES) | ||
38 | #define SET_FP_ENABLES(REG, V) \ | ||
39 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/loongarch/disas.c | ||
42 | +++ b/target/loongarch/disas.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static void output_vv(DisasContext *ctx, arg_vv *a, const char *mnemonic) | ||
44 | output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj); | ||
45 | } | 23 | } |
46 | 24 | ||
47 | +static void output_vvvv(DisasContext *ctx, arg_vvvv *a, const char *mnemonic) | 25 | +static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical, |
26 | + int *prot, target_ulong address) | ||
48 | +{ | 27 | +{ |
49 | + output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va); | 28 | + CPUState *cs = env_cpu(env); |
29 | + target_ulong index, phys; | ||
30 | + uint64_t dir_base, dir_width; | ||
31 | + uint64_t base; | ||
32 | + int level; | ||
33 | + | ||
34 | + if ((address >> 63) & 0x1) { | ||
35 | + base = env->CSR_PGDH; | ||
36 | + } else { | ||
37 | + base = env->CSR_PGDL; | ||
38 | + } | ||
39 | + base &= TARGET_PHYS_MASK; | ||
40 | + | ||
41 | + for (level = 4; level > 0; level--) { | ||
42 | + get_dir_base_width(env, &dir_base, &dir_width, level); | ||
43 | + | ||
44 | + if (dir_width == 0) { | ||
45 | + continue; | ||
46 | + } | ||
47 | + | ||
48 | + /* get next level page directory */ | ||
49 | + index = (address >> dir_base) & ((1 << dir_width) - 1); | ||
50 | + phys = base | index << 3; | ||
51 | + base = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; | ||
52 | + if (FIELD_EX64(base, TLBENTRY, HUGE)) { | ||
53 | + /* base is a huge pte */ | ||
54 | + break; | ||
55 | + } | ||
56 | + } | ||
57 | + | ||
58 | + /* pte */ | ||
59 | + if (FIELD_EX64(base, TLBENTRY, HUGE)) { | ||
60 | + /* Huge Page. base is pte */ | ||
61 | + base = FIELD_DP64(base, TLBENTRY, LEVEL, 0); | ||
62 | + base = FIELD_DP64(base, TLBENTRY, HUGE, 0); | ||
63 | + if (FIELD_EX64(base, TLBENTRY, HGLOBAL)) { | ||
64 | + base = FIELD_DP64(base, TLBENTRY, HGLOBAL, 0); | ||
65 | + base = FIELD_DP64(base, TLBENTRY, G, 1); | ||
66 | + } | ||
67 | + } else { | ||
68 | + /* Normal Page. base points to pte */ | ||
69 | + get_dir_base_width(env, &dir_base, &dir_width, 0); | ||
70 | + index = (address >> dir_base) & ((1 << dir_width) - 1); | ||
71 | + phys = base | index << 3; | ||
72 | + base = ldq_phys(cs->as, phys); | ||
73 | + } | ||
74 | + | ||
75 | + /* TODO: check plv and other bits? */ | ||
76 | + | ||
77 | + /* base is pte, in normal pte format */ | ||
78 | + if (!FIELD_EX64(base, TLBENTRY, V)) { | ||
79 | + return TLBRET_NOMATCH; | ||
80 | + } | ||
81 | + | ||
82 | + if (!FIELD_EX64(base, TLBENTRY, D)) { | ||
83 | + *prot = PAGE_READ; | ||
84 | + } else { | ||
85 | + *prot = PAGE_READ | PAGE_WRITE; | ||
86 | + } | ||
87 | + | ||
88 | + /* get TARGET_PAGE_SIZE aligned physical address */ | ||
89 | + base += (address & TARGET_PHYS_MASK) & ((1 << dir_base) - 1); | ||
90 | + /* mask RPLV, NX, NR bits */ | ||
91 | + base = FIELD_DP64(base, TLBENTRY_64, RPLV, 0); | ||
92 | + base = FIELD_DP64(base, TLBENTRY_64, NX, 0); | ||
93 | + base = FIELD_DP64(base, TLBENTRY_64, NR, 0); | ||
94 | + /* mask other attribute bits */ | ||
95 | + *physical = base & TARGET_PAGE_MASK; | ||
96 | + | ||
97 | + return 0; | ||
50 | +} | 98 | +} |
51 | + | 99 | + |
52 | INSN_LSX(vadd_b, vvv) | 100 | static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, |
53 | INSN_LSX(vadd_h, vvv) | 101 | int *prot, target_ulong address, |
54 | INSN_LSX(vadd_w, vvv) | 102 | - MMUAccessType access_type, int mmu_idx) |
55 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vfrstp_b, vvv) | 103 | + MMUAccessType access_type, int mmu_idx, |
56 | INSN_LSX(vfrstp_h, vvv) | 104 | + int is_debug) |
57 | INSN_LSX(vfrstpi_b, vv_i) | 105 | { |
58 | INSN_LSX(vfrstpi_h, vv_i) | 106 | int index, match; |
59 | + | 107 | |
60 | +INSN_LSX(vfadd_s, vvv) | 108 | @@ -XXX,XX +XXX,XX @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, |
61 | +INSN_LSX(vfadd_d, vvv) | 109 | if (match) { |
62 | +INSN_LSX(vfsub_s, vvv) | 110 | return loongarch_map_tlb_entry(env, physical, prot, |
63 | +INSN_LSX(vfsub_d, vvv) | 111 | address, access_type, index, mmu_idx); |
64 | +INSN_LSX(vfmul_s, vvv) | 112 | + } else if (is_debug) { |
65 | +INSN_LSX(vfmul_d, vvv) | 113 | + /* |
66 | +INSN_LSX(vfdiv_s, vvv) | 114 | + * For debugger memory access, we want to do the map when there is a |
67 | +INSN_LSX(vfdiv_d, vvv) | 115 | + * legal mapping, even if the mapping is not yet in TLB. return 0 if |
68 | + | 116 | + * there is a valid map, else none zero. |
69 | +INSN_LSX(vfmadd_s, vvvv) | 117 | + */ |
70 | +INSN_LSX(vfmadd_d, vvvv) | 118 | + return loongarch_page_table_walker(env, physical, prot, address); |
71 | +INSN_LSX(vfmsub_s, vvvv) | 119 | } |
72 | +INSN_LSX(vfmsub_d, vvvv) | 120 | |
73 | +INSN_LSX(vfnmadd_s, vvvv) | 121 | return TLBRET_NOMATCH; |
74 | +INSN_LSX(vfnmadd_d, vvvv) | 122 | @@ -XXX,XX +XXX,XX @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, |
75 | +INSN_LSX(vfnmsub_s, vvvv) | 123 | #else |
76 | +INSN_LSX(vfnmsub_d, vvvv) | 124 | static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, |
77 | + | 125 | int *prot, target_ulong address, |
78 | +INSN_LSX(vfmax_s, vvv) | 126 | - MMUAccessType access_type, int mmu_idx) |
79 | +INSN_LSX(vfmax_d, vvv) | 127 | + MMUAccessType access_type, int mmu_idx, |
80 | +INSN_LSX(vfmin_s, vvv) | 128 | + int is_debug) |
81 | +INSN_LSX(vfmin_d, vvv) | 129 | { |
82 | + | 130 | return TLBRET_NOMATCH; |
83 | +INSN_LSX(vfmaxa_s, vvv) | ||
84 | +INSN_LSX(vfmaxa_d, vvv) | ||
85 | +INSN_LSX(vfmina_s, vvv) | ||
86 | +INSN_LSX(vfmina_d, vvv) | ||
87 | + | ||
88 | +INSN_LSX(vflogb_s, vv) | ||
89 | +INSN_LSX(vflogb_d, vv) | ||
90 | + | ||
91 | +INSN_LSX(vfclass_s, vv) | ||
92 | +INSN_LSX(vfclass_d, vv) | ||
93 | + | ||
94 | +INSN_LSX(vfsqrt_s, vv) | ||
95 | +INSN_LSX(vfsqrt_d, vv) | ||
96 | +INSN_LSX(vfrecip_s, vv) | ||
97 | +INSN_LSX(vfrecip_d, vv) | ||
98 | +INSN_LSX(vfrsqrt_s, vv) | ||
99 | +INSN_LSX(vfrsqrt_d, vv) | ||
100 | diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/target/loongarch/fpu_helper.c | ||
103 | +++ b/target/loongarch/fpu_helper.c | ||
104 | @@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env) | ||
105 | set_flush_to_zero(0, &env->fp_status); | ||
106 | } | 131 | } |
107 | 132 | @@ -XXX,XX +XXX,XX @@ static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, | |
108 | -static int ieee_ex_to_loongarch(int xcpt) | 133 | |
109 | +int ieee_ex_to_loongarch(int xcpt) | 134 | int get_physical_address(CPULoongArchState *env, hwaddr *physical, |
135 | int *prot, target_ulong address, | ||
136 | - MMUAccessType access_type, int mmu_idx) | ||
137 | + MMUAccessType access_type, int mmu_idx, int is_debug) | ||
110 | { | 138 | { |
111 | int ret = 0; | 139 | int user_mode = mmu_idx == MMU_USER_IDX; |
112 | if (xcpt & float_flag_invalid) { | 140 | int kernel_mode = mmu_idx == MMU_KERNEL_IDX; |
113 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | 141 | @@ -XXX,XX +XXX,XX @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical, |
114 | index XXXXXXX..XXXXXXX 100644 | 142 | |
115 | --- a/target/loongarch/helper.h | 143 | /* Mapped address */ |
116 | +++ b/target/loongarch/helper.h | 144 | return loongarch_map_address(env, physical, prot, address, |
117 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32) | 145 | - access_type, mmu_idx); |
118 | DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32) | 146 | + access_type, mmu_idx, is_debug); |
119 | DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) | 147 | } |
120 | DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) | 148 | |
121 | + | 149 | hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) |
122 | +DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32) | 150 | @@ -XXX,XX +XXX,XX @@ hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) |
123 | +DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32) | 151 | int prot; |
124 | +DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32) | 152 | |
125 | +DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32) | 153 | if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, |
126 | +DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32) | 154 | - cpu_mmu_index(cs, false)) != 0) { |
127 | +DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32) | 155 | + cpu_mmu_index(cs, false), 1) != 0) { |
128 | +DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32) | 156 | return -1; |
129 | +DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32) | 157 | } |
130 | + | 158 | return phys_addr; |
131 | +DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32) | ||
132 | +DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32) | ||
133 | +DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32) | ||
134 | +DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32) | ||
135 | +DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32) | ||
136 | +DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32) | ||
137 | +DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32) | ||
138 | +DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32) | ||
139 | + | ||
140 | +DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32) | ||
141 | +DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32) | ||
142 | +DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32) | ||
143 | +DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32) | ||
144 | + | ||
145 | +DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32) | ||
146 | +DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32) | ||
147 | +DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32) | ||
148 | +DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32) | ||
149 | + | ||
150 | +DEF_HELPER_3(vflogb_s, void, env, i32, i32) | ||
151 | +DEF_HELPER_3(vflogb_d, void, env, i32, i32) | ||
152 | + | ||
153 | +DEF_HELPER_3(vfclass_s, void, env, i32, i32) | ||
154 | +DEF_HELPER_3(vfclass_d, void, env, i32, i32) | ||
155 | + | ||
156 | +DEF_HELPER_3(vfsqrt_s, void, env, i32, i32) | ||
157 | +DEF_HELPER_3(vfsqrt_d, void, env, i32, i32) | ||
158 | +DEF_HELPER_3(vfrecip_s, void, env, i32, i32) | ||
159 | +DEF_HELPER_3(vfrecip_d, void, env, i32, i32) | ||
160 | +DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32) | ||
161 | +DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32) | ||
162 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
165 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
166 | @@ -XXX,XX +XXX,XX @@ | ||
167 | #define CHECK_SXE | ||
168 | #endif | ||
169 | |||
170 | +static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, | ||
171 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, | ||
172 | + TCGv_i32, TCGv_i32)) | ||
173 | +{ | ||
174 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
175 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
176 | + TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
177 | + TCGv_i32 va = tcg_constant_i32(a->va); | ||
178 | + | ||
179 | + CHECK_SXE; | ||
180 | + func(cpu_env, vd, vj, vk, va); | ||
181 | + return true; | ||
182 | +} | ||
183 | + | ||
184 | static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
185 | void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
186 | { | ||
187 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b) | ||
188 | TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h) | ||
189 | TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b) | ||
190 | TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h) | ||
191 | + | ||
192 | +TRANS(vfadd_s, gen_vvv, gen_helper_vfadd_s) | ||
193 | +TRANS(vfadd_d, gen_vvv, gen_helper_vfadd_d) | ||
194 | +TRANS(vfsub_s, gen_vvv, gen_helper_vfsub_s) | ||
195 | +TRANS(vfsub_d, gen_vvv, gen_helper_vfsub_d) | ||
196 | +TRANS(vfmul_s, gen_vvv, gen_helper_vfmul_s) | ||
197 | +TRANS(vfmul_d, gen_vvv, gen_helper_vfmul_d) | ||
198 | +TRANS(vfdiv_s, gen_vvv, gen_helper_vfdiv_s) | ||
199 | +TRANS(vfdiv_d, gen_vvv, gen_helper_vfdiv_d) | ||
200 | + | ||
201 | +TRANS(vfmadd_s, gen_vvvv, gen_helper_vfmadd_s) | ||
202 | +TRANS(vfmadd_d, gen_vvvv, gen_helper_vfmadd_d) | ||
203 | +TRANS(vfmsub_s, gen_vvvv, gen_helper_vfmsub_s) | ||
204 | +TRANS(vfmsub_d, gen_vvvv, gen_helper_vfmsub_d) | ||
205 | +TRANS(vfnmadd_s, gen_vvvv, gen_helper_vfnmadd_s) | ||
206 | +TRANS(vfnmadd_d, gen_vvvv, gen_helper_vfnmadd_d) | ||
207 | +TRANS(vfnmsub_s, gen_vvvv, gen_helper_vfnmsub_s) | ||
208 | +TRANS(vfnmsub_d, gen_vvvv, gen_helper_vfnmsub_d) | ||
209 | + | ||
210 | +TRANS(vfmax_s, gen_vvv, gen_helper_vfmax_s) | ||
211 | +TRANS(vfmax_d, gen_vvv, gen_helper_vfmax_d) | ||
212 | +TRANS(vfmin_s, gen_vvv, gen_helper_vfmin_s) | ||
213 | +TRANS(vfmin_d, gen_vvv, gen_helper_vfmin_d) | ||
214 | + | ||
215 | +TRANS(vfmaxa_s, gen_vvv, gen_helper_vfmaxa_s) | ||
216 | +TRANS(vfmaxa_d, gen_vvv, gen_helper_vfmaxa_d) | ||
217 | +TRANS(vfmina_s, gen_vvv, gen_helper_vfmina_s) | ||
218 | +TRANS(vfmina_d, gen_vvv, gen_helper_vfmina_d) | ||
219 | + | ||
220 | +TRANS(vflogb_s, gen_vv, gen_helper_vflogb_s) | ||
221 | +TRANS(vflogb_d, gen_vv, gen_helper_vflogb_d) | ||
222 | + | ||
223 | +TRANS(vfclass_s, gen_vv, gen_helper_vfclass_s) | ||
224 | +TRANS(vfclass_d, gen_vv, gen_helper_vfclass_d) | ||
225 | + | ||
226 | +TRANS(vfsqrt_s, gen_vv, gen_helper_vfsqrt_s) | ||
227 | +TRANS(vfsqrt_d, gen_vv, gen_helper_vfsqrt_d) | ||
228 | +TRANS(vfrecip_s, gen_vv, gen_helper_vfrecip_s) | ||
229 | +TRANS(vfrecip_d, gen_vv, gen_helper_vfrecip_d) | ||
230 | +TRANS(vfrsqrt_s, gen_vv, gen_helper_vfrsqrt_s) | ||
231 | +TRANS(vfrsqrt_d, gen_vv, gen_helper_vfrsqrt_d) | ||
232 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
233 | index XXXXXXX..XXXXXXX 100644 | ||
234 | --- a/target/loongarch/insns.decode | ||
235 | +++ b/target/loongarch/insns.decode | ||
236 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
237 | &vv vd vj | ||
238 | &vvv vd vj vk | ||
239 | &vv_i vd vj imm | ||
240 | +&vvvv vd vj vk va | ||
241 | |||
242 | # | ||
243 | # LSX Formats | ||
244 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
245 | @vv_ui7 .... ........ ... imm:7 vj:5 vd:5 &vv_i | ||
246 | @vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i | ||
247 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
248 | +@vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv | ||
249 | |||
250 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
251 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
252 | @@ -XXX,XX +XXX,XX @@ vfrstp_b 0111 00010010 10110 ..... ..... ..... @vvv | ||
253 | vfrstp_h 0111 00010010 10111 ..... ..... ..... @vvv | ||
254 | vfrstpi_b 0111 00101001 10100 ..... ..... ..... @vv_ui5 | ||
255 | vfrstpi_h 0111 00101001 10101 ..... ..... ..... @vv_ui5 | ||
256 | + | ||
257 | +vfadd_s 0111 00010011 00001 ..... ..... ..... @vvv | ||
258 | +vfadd_d 0111 00010011 00010 ..... ..... ..... @vvv | ||
259 | +vfsub_s 0111 00010011 00101 ..... ..... ..... @vvv | ||
260 | +vfsub_d 0111 00010011 00110 ..... ..... ..... @vvv | ||
261 | +vfmul_s 0111 00010011 10001 ..... ..... ..... @vvv | ||
262 | +vfmul_d 0111 00010011 10010 ..... ..... ..... @vvv | ||
263 | +vfdiv_s 0111 00010011 10101 ..... ..... ..... @vvv | ||
264 | +vfdiv_d 0111 00010011 10110 ..... ..... ..... @vvv | ||
265 | + | ||
266 | +vfmadd_s 0000 10010001 ..... ..... ..... ..... @vvvv | ||
267 | +vfmadd_d 0000 10010010 ..... ..... ..... ..... @vvvv | ||
268 | +vfmsub_s 0000 10010101 ..... ..... ..... ..... @vvvv | ||
269 | +vfmsub_d 0000 10010110 ..... ..... ..... ..... @vvvv | ||
270 | +vfnmadd_s 0000 10011001 ..... ..... ..... ..... @vvvv | ||
271 | +vfnmadd_d 0000 10011010 ..... ..... ..... ..... @vvvv | ||
272 | +vfnmsub_s 0000 10011101 ..... ..... ..... ..... @vvvv | ||
273 | +vfnmsub_d 0000 10011110 ..... ..... ..... ..... @vvvv | ||
274 | + | ||
275 | +vfmax_s 0111 00010011 11001 ..... ..... ..... @vvv | ||
276 | +vfmax_d 0111 00010011 11010 ..... ..... ..... @vvv | ||
277 | +vfmin_s 0111 00010011 11101 ..... ..... ..... @vvv | ||
278 | +vfmin_d 0111 00010011 11110 ..... ..... ..... @vvv | ||
279 | + | ||
280 | +vfmaxa_s 0111 00010100 00001 ..... ..... ..... @vvv | ||
281 | +vfmaxa_d 0111 00010100 00010 ..... ..... ..... @vvv | ||
282 | +vfmina_s 0111 00010100 00101 ..... ..... ..... @vvv | ||
283 | +vfmina_d 0111 00010100 00110 ..... ..... ..... @vvv | ||
284 | + | ||
285 | +vflogb_s 0111 00101001 11001 10001 ..... ..... @vv | ||
286 | +vflogb_d 0111 00101001 11001 10010 ..... ..... @vv | ||
287 | + | ||
288 | +vfclass_s 0111 00101001 11001 10101 ..... ..... @vv | ||
289 | +vfclass_d 0111 00101001 11001 10110 ..... ..... @vv | ||
290 | + | ||
291 | +vfsqrt_s 0111 00101001 11001 11001 ..... ..... @vv | ||
292 | +vfsqrt_d 0111 00101001 11001 11010 ..... ..... @vv | ||
293 | +vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv | ||
294 | +vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv | ||
295 | +vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv | ||
296 | +vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv | ||
297 | diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h | 159 | diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h |
298 | index XXXXXXX..XXXXXXX 100644 | 160 | index XXXXXXX..XXXXXXX 100644 |
299 | --- a/target/loongarch/internals.h | 161 | --- a/target/loongarch/internals.h |
300 | +++ b/target/loongarch/internals.h | 162 | +++ b/target/loongarch/internals.h |
301 | @@ -XXX,XX +XXX,XX @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, | 163 | @@ -XXX,XX +XXX,XX @@ bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, |
302 | 164 | int *index); | |
303 | const char *loongarch_exception_name(int32_t exception); | 165 | int get_physical_address(CPULoongArchState *env, hwaddr *physical, |
304 | 166 | int *prot, target_ulong address, | |
305 | +int ieee_ex_to_loongarch(int xcpt); | 167 | - MMUAccessType access_type, int mmu_idx); |
306 | void restore_fp_status(CPULoongArchState *env); | 168 | + MMUAccessType access_type, int mmu_idx, int is_debug); |
307 | 169 | +void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, | |
308 | #ifndef CONFIG_USER_ONLY | 170 | + uint64_t *dir_width, target_ulong level); |
309 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | 171 | hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); |
172 | |||
173 | #ifdef CONFIG_TCG | ||
174 | diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c | ||
310 | index XXXXXXX..XXXXXXX 100644 | 175 | index XXXXXXX..XXXXXXX 100644 |
311 | --- a/target/loongarch/lsx_helper.c | 176 | --- a/target/loongarch/tcg/tlb_helper.c |
312 | +++ b/target/loongarch/lsx_helper.c | 177 | +++ b/target/loongarch/tcg/tlb_helper.c |
313 | @@ -XXX,XX +XXX,XX @@ | 178 | @@ -XXX,XX +XXX,XX @@ |
314 | #include "cpu.h" | 179 | #include "exec/log.h" |
315 | #include "exec/exec-all.h" | 180 | #include "cpu-csr.h" |
316 | #include "exec/helper-proto.h" | 181 | |
317 | +#include "fpu/softfloat.h" | 182 | -static void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, |
318 | +#include "internals.h" | 183 | +void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, |
319 | 184 | uint64_t *dir_width, target_ulong level) | |
320 | #define DO_ADD(a, b) (a + b) | 185 | { |
321 | #define DO_SUB(a, b) (a - b) | 186 | switch (level) { |
322 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \ | 187 | @@ -XXX,XX +XXX,XX @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, |
323 | 188 | ||
324 | VFRSTPI(vfrstpi_b, 8, B) | 189 | /* Data access */ |
325 | VFRSTPI(vfrstpi_h, 16, H) | 190 | ret = get_physical_address(env, &physical, &prot, address, |
326 | + | 191 | - access_type, mmu_idx); |
327 | +static void vec_update_fcsr0_mask(CPULoongArchState *env, | 192 | + access_type, mmu_idx, 0); |
328 | + uintptr_t pc, int mask) | 193 | |
329 | +{ | 194 | if (ret == TLBRET_MATCH) { |
330 | + int flags = get_float_exception_flags(&env->fp_status); | 195 | tlb_set_page(cs, address & TARGET_PAGE_MASK, |
331 | + | ||
332 | + set_float_exception_flags(0, &env->fp_status); | ||
333 | + | ||
334 | + flags &= ~mask; | ||
335 | + | ||
336 | + if (flags) { | ||
337 | + flags = ieee_ex_to_loongarch(flags); | ||
338 | + UPDATE_FP_CAUSE(env->fcsr0, flags); | ||
339 | + } | ||
340 | + | ||
341 | + if (GET_FP_ENABLES(env->fcsr0) & flags) { | ||
342 | + do_raise_exception(env, EXCCODE_FPE, pc); | ||
343 | + } else { | ||
344 | + UPDATE_FP_FLAGS(env->fcsr0, flags); | ||
345 | + } | ||
346 | +} | ||
347 | + | ||
348 | +static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) | ||
349 | +{ | ||
350 | + vec_update_fcsr0_mask(env, pc, 0); | ||
351 | +} | ||
352 | + | ||
353 | +static inline void vec_clear_cause(CPULoongArchState *env) | ||
354 | +{ | ||
355 | + SET_FP_CAUSE(env->fcsr0, 0); | ||
356 | +} | ||
357 | + | ||
358 | +#define DO_3OP_F(NAME, BIT, E, FN) \ | ||
359 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
360 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
361 | +{ \ | ||
362 | + int i; \ | ||
363 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
364 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
365 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
366 | + \ | ||
367 | + vec_clear_cause(env); \ | ||
368 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
369 | + Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | ||
370 | + vec_update_fcsr0(env, GETPC()); \ | ||
371 | + } \ | ||
372 | +} | ||
373 | + | ||
374 | +DO_3OP_F(vfadd_s, 32, UW, float32_add) | ||
375 | +DO_3OP_F(vfadd_d, 64, UD, float64_add) | ||
376 | +DO_3OP_F(vfsub_s, 32, UW, float32_sub) | ||
377 | +DO_3OP_F(vfsub_d, 64, UD, float64_sub) | ||
378 | +DO_3OP_F(vfmul_s, 32, UW, float32_mul) | ||
379 | +DO_3OP_F(vfmul_d, 64, UD, float64_mul) | ||
380 | +DO_3OP_F(vfdiv_s, 32, UW, float32_div) | ||
381 | +DO_3OP_F(vfdiv_d, 64, UD, float64_div) | ||
382 | +DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) | ||
383 | +DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) | ||
384 | +DO_3OP_F(vfmin_s, 32, UW, float32_minnum) | ||
385 | +DO_3OP_F(vfmin_d, 64, UD, float64_minnum) | ||
386 | +DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) | ||
387 | +DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) | ||
388 | +DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) | ||
389 | +DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) | ||
390 | + | ||
391 | +#define DO_4OP_F(NAME, BIT, E, FN, flags) \ | ||
392 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
393 | + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \ | ||
394 | +{ \ | ||
395 | + int i; \ | ||
396 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
397 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
398 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
399 | + VReg *Va = &(env->fpr[va].vreg); \ | ||
400 | + \ | ||
401 | + vec_clear_cause(env); \ | ||
402 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
403 | + Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ | ||
404 | + vec_update_fcsr0(env, GETPC()); \ | ||
405 | + } \ | ||
406 | +} | ||
407 | + | ||
408 | +DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) | ||
409 | +DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) | ||
410 | +DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) | ||
411 | +DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) | ||
412 | +DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) | ||
413 | +DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) | ||
414 | +DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, | ||
415 | + float_muladd_negate_c | float_muladd_negate_result) | ||
416 | +DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, | ||
417 | + float_muladd_negate_c | float_muladd_negate_result) | ||
418 | + | ||
419 | +#define DO_2OP_F(NAME, BIT, E, FN) \ | ||
420 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
421 | +{ \ | ||
422 | + int i; \ | ||
423 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
424 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
425 | + \ | ||
426 | + vec_clear_cause(env); \ | ||
427 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
428 | + Vd->E(i) = FN(env, Vj->E(i)); \ | ||
429 | + } \ | ||
430 | +} | ||
431 | + | ||
432 | +#define FLOGB(BIT, T) \ | ||
433 | +static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ | ||
434 | +{ \ | ||
435 | + T fp, fd; \ | ||
436 | + float_status *status = &env->fp_status; \ | ||
437 | + FloatRoundMode old_mode = get_float_rounding_mode(status); \ | ||
438 | + \ | ||
439 | + set_float_rounding_mode(float_round_down, status); \ | ||
440 | + fp = float ## BIT ##_log2(fj, status); \ | ||
441 | + fd = float ## BIT ##_round_to_int(fp, status); \ | ||
442 | + set_float_rounding_mode(old_mode, status); \ | ||
443 | + vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ | ||
444 | + return fd; \ | ||
445 | +} | ||
446 | + | ||
447 | +FLOGB(32, uint32_t) | ||
448 | +FLOGB(64, uint64_t) | ||
449 | + | ||
450 | +#define FCLASS(NAME, BIT, E, FN) \ | ||
451 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
452 | +{ \ | ||
453 | + int i; \ | ||
454 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
455 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
456 | + \ | ||
457 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
458 | + Vd->E(i) = FN(env, Vj->E(i)); \ | ||
459 | + } \ | ||
460 | +} | ||
461 | + | ||
462 | +FCLASS(vfclass_s, 32, UW, helper_fclass_s) | ||
463 | +FCLASS(vfclass_d, 64, UD, helper_fclass_d) | ||
464 | + | ||
465 | +#define FSQRT(BIT, T) \ | ||
466 | +static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ | ||
467 | +{ \ | ||
468 | + T fd; \ | ||
469 | + fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ | ||
470 | + vec_update_fcsr0(env, GETPC()); \ | ||
471 | + return fd; \ | ||
472 | +} | ||
473 | + | ||
474 | +FSQRT(32, uint32_t) | ||
475 | +FSQRT(64, uint64_t) | ||
476 | + | ||
477 | +#define FRECIP(BIT, T) \ | ||
478 | +static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ | ||
479 | +{ \ | ||
480 | + T fd; \ | ||
481 | + fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ | ||
482 | + vec_update_fcsr0(env, GETPC()); \ | ||
483 | + return fd; \ | ||
484 | +} | ||
485 | + | ||
486 | +FRECIP(32, uint32_t) | ||
487 | +FRECIP(64, uint64_t) | ||
488 | + | ||
489 | +#define FRSQRT(BIT, T) \ | ||
490 | +static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ | ||
491 | +{ \ | ||
492 | + T fd, fp; \ | ||
493 | + fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ | ||
494 | + fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ | ||
495 | + vec_update_fcsr0(env, GETPC()); \ | ||
496 | + return fd; \ | ||
497 | +} | ||
498 | + | ||
499 | +FRSQRT(32, uint32_t) | ||
500 | +FRSQRT(64, uint64_t) | ||
501 | + | ||
502 | +DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) | ||
503 | +DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) | ||
504 | +DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) | ||
505 | +DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) | ||
506 | +DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) | ||
507 | +DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) | ||
508 | +DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) | ||
509 | +DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) | ||
510 | -- | 196 | -- |
511 | 2.31.1 | 197 | 2.43.5 | diff view generated by jsdifflib |
1 | This patch includes: | 1 | Add realize interface for loongarch ipi device. |
---|---|---|---|
2 | - VBITSEL.V; | ||
3 | - VBITSELI.B; | ||
4 | - VSET{EQZ/NEZ}.V; | ||
5 | - VSETANYEQZ.{B/H/W/D}; | ||
6 | - VSETALLNEZ.{B/H/W/D}. | ||
7 | 2 | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> |
9 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 4 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> |
10 | Message-Id: <20230504122810.4094787-38-gaosong@loongson.cn> | ||
11 | --- | 5 | --- |
12 | target/loongarch/disas.c | 20 ++++++ | 6 | hw/intc/loongarch_ipi.c | 19 +++++++++++++++++++ |
13 | target/loongarch/helper.h | 11 +++ | 7 | include/hw/intc/loongarch_ipi.h | 1 + |
14 | target/loongarch/insn_trans/trans_lsx.c.inc | 74 +++++++++++++++++++++ | 8 | 2 files changed, 20 insertions(+) |
15 | target/loongarch/insns.decode | 17 +++++ | ||
16 | target/loongarch/lsx_helper.c | 52 +++++++++++++++ | ||
17 | 5 files changed, 174 insertions(+) | ||
18 | 9 | ||
19 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | 10 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
20 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/loongarch/disas.c | 12 | --- a/hw/intc/loongarch_ipi.c |
22 | +++ b/target/loongarch/disas.c | 13 | +++ b/hw/intc/loongarch_ipi.c |
23 | @@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \ | 14 | @@ -XXX,XX +XXX,XX @@ |
24 | return true; \ | 15 | |
16 | #include "qemu/osdep.h" | ||
17 | #include "hw/boards.h" | ||
18 | +#include "qapi/error.h" | ||
19 | #include "hw/intc/loongarch_ipi.h" | ||
20 | #include "target/loongarch/cpu.h" | ||
21 | |||
22 | @@ -XXX,XX +XXX,XX @@ static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id) | ||
23 | return NULL; | ||
25 | } | 24 | } |
26 | 25 | ||
27 | +static void output_cv(DisasContext *ctx, arg_cv *a, | 26 | +static void loongarch_ipi_realize(DeviceState *dev, Error **errp) |
28 | + const char *mnemonic) | ||
29 | +{ | 27 | +{ |
30 | + output(ctx, mnemonic, "fcc%d, v%d", a->cd, a->vj); | 28 | + LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev); |
31 | +} | 29 | + Error *local_err = NULL; |
32 | + | 30 | + |
33 | static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic) | 31 | + lic->parent_realize(dev, &local_err); |
34 | { | 32 | + if (local_err) { |
35 | output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk); | 33 | + error_propagate(errp, local_err); |
36 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, \ | 34 | + return; |
37 | |||
38 | LSX_FCMP_INSN(s) | ||
39 | LSX_FCMP_INSN(d) | ||
40 | + | ||
41 | +INSN_LSX(vbitsel_v, vvvv) | ||
42 | +INSN_LSX(vbitseli_b, vv_i) | ||
43 | + | ||
44 | +INSN_LSX(vseteqz_v, cv) | ||
45 | +INSN_LSX(vsetnez_v, cv) | ||
46 | +INSN_LSX(vsetanyeqz_b, cv) | ||
47 | +INSN_LSX(vsetanyeqz_h, cv) | ||
48 | +INSN_LSX(vsetanyeqz_w, cv) | ||
49 | +INSN_LSX(vsetanyeqz_d, cv) | ||
50 | +INSN_LSX(vsetallnez_b, cv) | ||
51 | +INSN_LSX(vsetallnez_h, cv) | ||
52 | +INSN_LSX(vsetallnez_w, cv) | ||
53 | +INSN_LSX(vsetallnez_d, cv) | ||
54 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/loongarch/helper.h | ||
57 | +++ b/target/loongarch/helper.h | ||
58 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32) | ||
59 | DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32) | ||
60 | DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32) | ||
61 | DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32) | ||
62 | + | ||
63 | +DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
64 | + | ||
65 | +DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32) | ||
66 | +DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32) | ||
67 | +DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32) | ||
68 | +DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32) | ||
69 | +DEF_HELPER_3(vsetallnez_b, void, env, i32, i32) | ||
70 | +DEF_HELPER_3(vsetallnez_h, void, env, i32, i32) | ||
71 | +DEF_HELPER_3(vsetallnez_w, void, env, i32, i32) | ||
72 | +DEF_HELPER_3(vsetallnez_d, void, env, i32, i32) | ||
73 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
76 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, | ||
78 | return true; | ||
79 | } | ||
80 | |||
81 | +static bool gen_cv(DisasContext *ctx, arg_cv *a, | ||
82 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
83 | +{ | ||
84 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
85 | + TCGv_i32 cd = tcg_constant_i32(a->cd); | ||
86 | + | ||
87 | + CHECK_SXE; | ||
88 | + func(cpu_env, cd, vj); | ||
89 | + return true; | ||
90 | +} | ||
91 | + | ||
92 | static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
93 | void (*func)(unsigned, uint32_t, uint32_t, | ||
94 | uint32_t, uint32_t, uint32_t)) | ||
95 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a) | ||
96 | |||
97 | return true; | ||
98 | } | ||
99 | + | ||
100 | +static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a) | ||
101 | +{ | ||
102 | + CHECK_SXE; | ||
103 | + | ||
104 | + tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va), | ||
105 | + vec_full_offset(a->vk), vec_full_offset(a->vj), | ||
106 | + 16, ctx->vl/8); | ||
107 | + return true; | ||
108 | +} | ||
109 | + | ||
110 | +static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm) | ||
111 | +{ | ||
112 | + tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b); | ||
113 | +} | ||
114 | + | ||
115 | +static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a) | ||
116 | +{ | ||
117 | + static const GVecGen2i op = { | ||
118 | + .fniv = gen_vbitseli, | ||
119 | + .fnoi = gen_helper_vbitseli_b, | ||
120 | + .vece = MO_8, | ||
121 | + .load_dest = true | ||
122 | + }; | ||
123 | + | ||
124 | + CHECK_SXE; | ||
125 | + | ||
126 | + tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj), | ||
127 | + 16, ctx->vl/8, a->imm, &op); | ||
128 | + return true; | ||
129 | +} | ||
130 | + | ||
131 | +#define VSET(NAME, COND) \ | ||
132 | +static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \ | ||
133 | +{ \ | ||
134 | + TCGv_i64 t1, al, ah; \ | ||
135 | + \ | ||
136 | + al = tcg_temp_new_i64(); \ | ||
137 | + ah = tcg_temp_new_i64(); \ | ||
138 | + t1 = tcg_temp_new_i64(); \ | ||
139 | + \ | ||
140 | + get_vreg64(ah, a->vj, 1); \ | ||
141 | + get_vreg64(al, a->vj, 0); \ | ||
142 | + \ | ||
143 | + CHECK_SXE; \ | ||
144 | + tcg_gen_or_i64(t1, al, ah); \ | ||
145 | + tcg_gen_setcondi_i64(COND, t1, t1, 0); \ | ||
146 | + tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ | ||
147 | + \ | ||
148 | + return true; \ | ||
149 | +} | ||
150 | + | ||
151 | +VSET(vseteqz_v, TCG_COND_EQ) | ||
152 | +VSET(vsetnez_v, TCG_COND_NE) | ||
153 | + | ||
154 | +TRANS(vsetanyeqz_b, gen_cv, gen_helper_vsetanyeqz_b) | ||
155 | +TRANS(vsetanyeqz_h, gen_cv, gen_helper_vsetanyeqz_h) | ||
156 | +TRANS(vsetanyeqz_w, gen_cv, gen_helper_vsetanyeqz_w) | ||
157 | +TRANS(vsetanyeqz_d, gen_cv, gen_helper_vsetanyeqz_d) | ||
158 | +TRANS(vsetallnez_b, gen_cv, gen_helper_vsetallnez_b) | ||
159 | +TRANS(vsetallnez_h, gen_cv, gen_helper_vsetallnez_h) | ||
160 | +TRANS(vsetallnez_w, gen_cv, gen_helper_vsetallnez_w) | ||
161 | +TRANS(vsetallnez_d, gen_cv, gen_helper_vsetallnez_d) | ||
162 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/loongarch/insns.decode | ||
165 | +++ b/target/loongarch/insns.decode | ||
166 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
167 | # | ||
168 | |||
169 | &vv vd vj | ||
170 | +&cv cd vj | ||
171 | &vvv vd vj vk | ||
172 | &vv_i vd vj imm | ||
173 | &vvvv vd vj vk va | ||
174 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
175 | # LSX Formats | ||
176 | # | ||
177 | @vv .... ........ ..... ..... vj:5 vd:5 &vv | ||
178 | +@cv .... ........ ..... ..... vj:5 .. cd:3 &cv | ||
179 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
180 | @vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i | ||
181 | @vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i | ||
182 | @@ -XXX,XX +XXX,XX @@ vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5 | ||
183 | |||
184 | vfcmp_cond_s 0000 11000101 ..... ..... ..... ..... @vvv_fcond | ||
185 | vfcmp_cond_d 0000 11000110 ..... ..... ..... ..... @vvv_fcond | ||
186 | + | ||
187 | +vbitsel_v 0000 11010001 ..... ..... ..... ..... @vvvv | ||
188 | + | ||
189 | +vbitseli_b 0111 00111100 01 ........ ..... ..... @vv_ui8 | ||
190 | + | ||
191 | +vseteqz_v 0111 00101001 11001 00110 ..... 00 ... @cv | ||
192 | +vsetnez_v 0111 00101001 11001 00111 ..... 00 ... @cv | ||
193 | +vsetanyeqz_b 0111 00101001 11001 01000 ..... 00 ... @cv | ||
194 | +vsetanyeqz_h 0111 00101001 11001 01001 ..... 00 ... @cv | ||
195 | +vsetanyeqz_w 0111 00101001 11001 01010 ..... 00 ... @cv | ||
196 | +vsetanyeqz_d 0111 00101001 11001 01011 ..... 00 ... @cv | ||
197 | +vsetallnez_b 0111 00101001 11001 01100 ..... 00 ... @cv | ||
198 | +vsetallnez_h 0111 00101001 11001 01101 ..... 00 ... @cv | ||
199 | +vsetallnez_w 0111 00101001 11001 01110 ..... 00 ... @cv | ||
200 | +vsetallnez_d 0111 00101001 11001 01111 ..... 00 ... @cv | ||
201 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
202 | index XXXXXXX..XXXXXXX 100644 | ||
203 | --- a/target/loongarch/lsx_helper.c | ||
204 | +++ b/target/loongarch/lsx_helper.c | ||
205 | @@ -XXX,XX +XXX,XX @@ | ||
206 | #include "exec/helper-proto.h" | ||
207 | #include "fpu/softfloat.h" | ||
208 | #include "internals.h" | ||
209 | +#include "tcg/tcg.h" | ||
210 | |||
211 | #define DO_ADD(a, b) (a + b) | ||
212 | #define DO_SUB(a, b) (a - b) | ||
213 | @@ -XXX,XX +XXX,XX @@ VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) | ||
214 | VFCMP(vfcmp_s_s, 32, UW, float32_compare) | ||
215 | VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) | ||
216 | VFCMP(vfcmp_s_d, 64, UD, float64_compare) | ||
217 | + | ||
218 | +void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
219 | +{ | ||
220 | + int i; | ||
221 | + VReg *Vd = (VReg *)vd; | ||
222 | + VReg *Vj = (VReg *)vj; | ||
223 | + | ||
224 | + for (i = 0; i < 16; i++) { | ||
225 | + Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); | ||
226 | + } | 35 | + } |
227 | +} | 36 | +} |
228 | + | 37 | + |
229 | +/* Copy from target/arm/tcg/sve_helper.c */ | 38 | static void loongarch_ipi_class_init(ObjectClass *klass, void *data) |
230 | +static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) | 39 | { |
231 | +{ | 40 | LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass); |
232 | + uint64_t bits = 8 << esz; | 41 | + LoongarchIPIClass *lic = LOONGARCH_IPI_CLASS(klass); |
233 | + uint64_t ones = dup_const(esz, 1); | 42 | + DeviceClass *dc = DEVICE_CLASS(klass); |
234 | + uint64_t signs = ones << (bits - 1); | 43 | |
235 | + uint64_t cmp0, cmp1; | 44 | + device_class_set_parent_realize(dc, loongarch_ipi_realize, |
236 | + | 45 | + &lic->parent_realize); |
237 | + cmp1 = dup_const(esz, n); | 46 | licc->get_iocsr_as = get_iocsr_as; |
238 | + cmp0 = cmp1 ^ m0; | 47 | licc->cpu_by_arch_id = loongarch_cpu_by_arch_id; |
239 | + cmp1 = cmp1 ^ m1; | 48 | } |
240 | + cmp0 = (cmp0 - ones) & ~cmp0; | 49 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo loongarch_ipi_types[] = { |
241 | + cmp1 = (cmp1 - ones) & ~cmp1; | 50 | { |
242 | + return (cmp0 | cmp1) & signs; | 51 | .name = TYPE_LOONGARCH_IPI, |
243 | +} | 52 | .parent = TYPE_LOONGSON_IPI_COMMON, |
244 | + | 53 | + .instance_size = sizeof(LoongarchIPIState), |
245 | +#define SETANYEQZ(NAME, MO) \ | 54 | + .class_size = sizeof(LoongarchIPIClass), |
246 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | 55 | .class_init = loongarch_ipi_class_init, |
247 | +{ \ | 56 | } |
248 | + VReg *Vj = &(env->fpr[vj].vreg); \ | 57 | }; |
249 | + \ | 58 | diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h |
250 | + env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ | 59 | index XXXXXXX..XXXXXXX 100644 |
251 | +} | 60 | --- a/include/hw/intc/loongarch_ipi.h |
252 | +SETANYEQZ(vsetanyeqz_b, MO_8) | 61 | +++ b/include/hw/intc/loongarch_ipi.h |
253 | +SETANYEQZ(vsetanyeqz_h, MO_16) | 62 | @@ -XXX,XX +XXX,XX @@ struct LoongarchIPIState { |
254 | +SETANYEQZ(vsetanyeqz_w, MO_32) | 63 | |
255 | +SETANYEQZ(vsetanyeqz_d, MO_64) | 64 | struct LoongarchIPIClass { |
256 | + | 65 | LoongsonIPICommonClass parent_class; |
257 | +#define SETALLNEZ(NAME, MO) \ | 66 | + DeviceRealize parent_realize; |
258 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ | 67 | }; |
259 | +{ \ | 68 | |
260 | + VReg *Vj = &(env->fpr[vj].vreg); \ | 69 | #endif |
261 | + \ | ||
262 | + env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ | ||
263 | +} | ||
264 | +SETALLNEZ(vsetallnez_b, MO_8) | ||
265 | +SETALLNEZ(vsetallnez_h, MO_16) | ||
266 | +SETALLNEZ(vsetallnez_w, MO_32) | ||
267 | +SETALLNEZ(vsetallnez_d, MO_64) | ||
268 | -- | 70 | -- |
269 | 2.31.1 | 71 | 2.43.5 | diff view generated by jsdifflib |
1 | This patch includes: | 1 | With mips64 loongson ipi, num_cpu property is used. With loongarch |
---|---|---|---|
2 | - VREPLVE[I].{B/H/W/D}; | 2 | ipi, num_cpu can be acquired from possible_cpu_arch_ids. |
3 | - VBSLL.V, VBSRL.V; | ||
4 | - VPACK{EV/OD}.{B/H/W/D}; | ||
5 | - VPICK{EV/OD}.{B/H/W/D}. | ||
6 | 3 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Here remove num_cpu setting from loongson_ipi_common, and this piece |
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 5 | of code is put into loongson and loongarch ipi separately. |
9 | Message-Id: <20230504122810.4094787-40-gaosong@loongson.cn> | 6 | |
7 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> | ||
8 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> | ||
10 | --- | 9 | --- |
11 | target/loongarch/disas.c | 35 +++++ | 10 | hw/intc/loongarch_ipi.c | 13 +++++++++++++ |
12 | target/loongarch/helper.h | 18 +++ | 11 | hw/intc/loongson_ipi.c | 14 +++++++++++++- |
13 | target/loongarch/insn_trans/trans_lsx.c.inc | 144 ++++++++++++++++++++ | 12 | hw/intc/loongson_ipi_common.c | 14 -------------- |
14 | target/loongarch/insns.decode | 34 +++++ | 13 | 3 files changed, 26 insertions(+), 15 deletions(-) |
15 | target/loongarch/lsx_helper.c | 88 ++++++++++++ | ||
16 | 5 files changed, 319 insertions(+) | ||
17 | 14 | ||
18 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | 15 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/loongarch/disas.c | 17 | --- a/hw/intc/loongarch_ipi.c |
21 | +++ b/target/loongarch/disas.c | 18 | +++ b/hw/intc/loongarch_ipi.c |
22 | @@ -XXX,XX +XXX,XX @@ static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic) | 19 | @@ -XXX,XX +XXX,XX @@ static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id) |
23 | output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj); | 20 | |
24 | } | 21 | static void loongarch_ipi_realize(DeviceState *dev, Error **errp) |
25 | 22 | { | |
26 | +static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic) | 23 | + LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev); |
27 | +{ | 24 | LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev); |
28 | + output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk); | 25 | Error *local_err = NULL; |
29 | +} | 26 | + int i; |
27 | |||
28 | lic->parent_realize(dev, &local_err); | ||
29 | if (local_err) { | ||
30 | error_propagate(errp, local_err); | ||
31 | return; | ||
32 | } | ||
30 | + | 33 | + |
31 | INSN_LSX(vadd_b, vvv) | 34 | + if (lics->num_cpu == 0) { |
32 | INSN_LSX(vadd_h, vvv) | 35 | + error_setg(errp, "num-cpu must be at least 1"); |
33 | INSN_LSX(vadd_w, vvv) | 36 | + return; |
34 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vreplgr2vr_b, vr) | ||
35 | INSN_LSX(vreplgr2vr_h, vr) | ||
36 | INSN_LSX(vreplgr2vr_w, vr) | ||
37 | INSN_LSX(vreplgr2vr_d, vr) | ||
38 | + | ||
39 | +INSN_LSX(vreplve_b, vvr) | ||
40 | +INSN_LSX(vreplve_h, vvr) | ||
41 | +INSN_LSX(vreplve_w, vvr) | ||
42 | +INSN_LSX(vreplve_d, vvr) | ||
43 | +INSN_LSX(vreplvei_b, vv_i) | ||
44 | +INSN_LSX(vreplvei_h, vv_i) | ||
45 | +INSN_LSX(vreplvei_w, vv_i) | ||
46 | +INSN_LSX(vreplvei_d, vv_i) | ||
47 | + | ||
48 | +INSN_LSX(vbsll_v, vv_i) | ||
49 | +INSN_LSX(vbsrl_v, vv_i) | ||
50 | + | ||
51 | +INSN_LSX(vpackev_b, vvv) | ||
52 | +INSN_LSX(vpackev_h, vvv) | ||
53 | +INSN_LSX(vpackev_w, vvv) | ||
54 | +INSN_LSX(vpackev_d, vvv) | ||
55 | +INSN_LSX(vpackod_b, vvv) | ||
56 | +INSN_LSX(vpackod_h, vvv) | ||
57 | +INSN_LSX(vpackod_w, vvv) | ||
58 | +INSN_LSX(vpackod_d, vvv) | ||
59 | + | ||
60 | +INSN_LSX(vpickev_b, vvv) | ||
61 | +INSN_LSX(vpickev_h, vvv) | ||
62 | +INSN_LSX(vpickev_w, vvv) | ||
63 | +INSN_LSX(vpickev_d, vvv) | ||
64 | +INSN_LSX(vpickod_b, vvv) | ||
65 | +INSN_LSX(vpickod_h, vvv) | ||
66 | +INSN_LSX(vpickod_w, vvv) | ||
67 | +INSN_LSX(vpickod_d, vvv) | ||
68 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/helper.h | ||
71 | +++ b/target/loongarch/helper.h | ||
72 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsetallnez_b, void, env, i32, i32) | ||
73 | DEF_HELPER_3(vsetallnez_h, void, env, i32, i32) | ||
74 | DEF_HELPER_3(vsetallnez_w, void, env, i32, i32) | ||
75 | DEF_HELPER_3(vsetallnez_d, void, env, i32, i32) | ||
76 | + | ||
77 | +DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32) | ||
78 | +DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32) | ||
79 | +DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32) | ||
80 | +DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32) | ||
81 | +DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32) | ||
82 | +DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32) | ||
83 | +DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32) | ||
84 | +DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32) | ||
85 | + | ||
86 | +DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32) | ||
87 | +DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32) | ||
88 | +DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32) | ||
89 | +DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32) | ||
90 | +DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32) | ||
91 | +DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32) | ||
92 | +DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32) | ||
93 | +DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32) | ||
94 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
95 | index XXXXXXX..XXXXXXX 100644 | ||
96 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
97 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
98 | @@ -XXX,XX +XXX,XX @@ TRANS(vreplgr2vr_b, gvec_dup, MO_8) | ||
99 | TRANS(vreplgr2vr_h, gvec_dup, MO_16) | ||
100 | TRANS(vreplgr2vr_w, gvec_dup, MO_32) | ||
101 | TRANS(vreplgr2vr_d, gvec_dup, MO_64) | ||
102 | + | ||
103 | +static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) | ||
104 | +{ | ||
105 | + CHECK_SXE; | ||
106 | + tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd), | ||
107 | + offsetof(CPULoongArchState, | ||
108 | + fpr[a->vj].vreg.B((a->imm))), | ||
109 | + 16, ctx->vl/8); | ||
110 | + return true; | ||
111 | +} | ||
112 | + | ||
113 | +static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a) | ||
114 | +{ | ||
115 | + CHECK_SXE; | ||
116 | + tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd), | ||
117 | + offsetof(CPULoongArchState, | ||
118 | + fpr[a->vj].vreg.H((a->imm))), | ||
119 | + 16, ctx->vl/8); | ||
120 | + return true; | ||
121 | +} | ||
122 | +static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a) | ||
123 | +{ | ||
124 | + CHECK_SXE; | ||
125 | + tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd), | ||
126 | + offsetof(CPULoongArchState, | ||
127 | + fpr[a->vj].vreg.W((a->imm))), | ||
128 | + 16, ctx->vl/8); | ||
129 | + return true; | ||
130 | +} | ||
131 | +static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) | ||
132 | +{ | ||
133 | + CHECK_SXE; | ||
134 | + tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd), | ||
135 | + offsetof(CPULoongArchState, | ||
136 | + fpr[a->vj].vreg.D((a->imm))), | ||
137 | + 16, ctx->vl/8); | ||
138 | + return true; | ||
139 | +} | ||
140 | + | ||
141 | +static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, | ||
142 | + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) | ||
143 | +{ | ||
144 | + TCGv_i64 t0 = tcg_temp_new_i64(); | ||
145 | + TCGv_ptr t1 = tcg_temp_new_ptr(); | ||
146 | + TCGv_i64 t2 = tcg_temp_new_i64(); | ||
147 | + | ||
148 | + CHECK_SXE; | ||
149 | + | ||
150 | + tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1); | ||
151 | + tcg_gen_shli_i64(t0, t0, vece); | ||
152 | + if (HOST_BIG_ENDIAN) { | ||
153 | + tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1)); | ||
154 | + } | 37 | + } |
155 | + | 38 | + |
156 | + tcg_gen_trunc_i64_ptr(t1, t0); | 39 | + lics->cpu = g_new0(IPICore, lics->num_cpu); |
157 | + tcg_gen_add_ptr(t1, t1, cpu_env); | 40 | + for (i = 0; i < lics->num_cpu; i++) { |
158 | + func(t2, t1, vec_full_offset(a->vj)); | 41 | + lics->cpu[i].ipi = lics; |
159 | + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2); | 42 | + qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1); |
160 | + | 43 | + } |
161 | + return true; | 44 | } |
162 | +} | 45 | |
163 | + | 46 | static void loongarch_ipi_class_init(ObjectClass *klass, void *data) |
164 | +TRANS(vreplve_b, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64) | 47 | diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c |
165 | +TRANS(vreplve_h, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64) | 48 | index XXXXXXX..XXXXXXX 100644 |
166 | +TRANS(vreplve_w, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64) | 49 | --- a/hw/intc/loongson_ipi.c |
167 | +TRANS(vreplve_d, gen_vreplve, MO_64, 64, tcg_gen_ld_i64) | 50 | +++ b/hw/intc/loongson_ipi.c |
168 | + | 51 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp) |
169 | +static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a) | 52 | LoongsonIPIClass *lic = LOONGSON_IPI_GET_CLASS(dev); |
170 | +{ | 53 | SysBusDevice *sbd = SYS_BUS_DEVICE(dev); |
171 | + int ofs; | 54 | Error *local_err = NULL; |
172 | + TCGv_i64 desthigh, destlow, high, low; | 55 | + int i; |
173 | + | 56 | |
174 | + CHECK_SXE; | 57 | lic->parent_realize(dev, &local_err); |
175 | + | 58 | if (local_err) { |
176 | + desthigh = tcg_temp_new_i64(); | 59 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp) |
177 | + destlow = tcg_temp_new_i64(); | 60 | return; |
178 | + high = tcg_temp_new_i64(); | 61 | } |
179 | + low = tcg_temp_new_i64(); | 62 | |
180 | + | 63 | + if (sc->num_cpu == 0) { |
181 | + get_vreg64(low, a->vj, 0); | 64 | + error_setg(errp, "num-cpu must be at least 1"); |
182 | + | 65 | + return; |
183 | + ofs = ((a->imm) & 0xf) * 8; | ||
184 | + if (ofs < 64) { | ||
185 | + get_vreg64(high, a->vj, 1); | ||
186 | + tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); | ||
187 | + tcg_gen_shli_i64(destlow, low, ofs); | ||
188 | + } else { | ||
189 | + tcg_gen_shli_i64(desthigh, low, ofs - 64); | ||
190 | + destlow = tcg_constant_i64(0); | ||
191 | + } | 66 | + } |
192 | + | 67 | + |
193 | + set_vreg64(desthigh, a->vd, 1); | 68 | + sc->cpu = g_new0(IPICore, sc->num_cpu); |
194 | + set_vreg64(destlow, a->vd, 0); | 69 | + for (i = 0; i < sc->num_cpu; i++) { |
195 | + | 70 | + sc->cpu[i].ipi = sc; |
196 | + return true; | 71 | + qdev_init_gpio_out(dev, &sc->cpu[i].irq, 1); |
197 | +} | ||
198 | + | ||
199 | +static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a) | ||
200 | +{ | ||
201 | + TCGv_i64 desthigh, destlow, high, low; | ||
202 | + int ofs; | ||
203 | + | ||
204 | + CHECK_SXE; | ||
205 | + | ||
206 | + desthigh = tcg_temp_new_i64(); | ||
207 | + destlow = tcg_temp_new_i64(); | ||
208 | + high = tcg_temp_new_i64(); | ||
209 | + low = tcg_temp_new_i64(); | ||
210 | + | ||
211 | + get_vreg64(high, a->vj, 1); | ||
212 | + | ||
213 | + ofs = ((a->imm) & 0xf) * 8; | ||
214 | + if (ofs < 64) { | ||
215 | + get_vreg64(low, a->vj, 0); | ||
216 | + tcg_gen_extract2_i64(destlow, low, high, ofs); | ||
217 | + tcg_gen_shri_i64(desthigh, high, ofs); | ||
218 | + } else { | ||
219 | + tcg_gen_shri_i64(destlow, high, ofs - 64); | ||
220 | + desthigh = tcg_constant_i64(0); | ||
221 | + } | 72 | + } |
222 | + | 73 | + |
223 | + set_vreg64(desthigh, a->vd, 1); | 74 | s->ipi_mmio_mem = g_new0(MemoryRegion, sc->num_cpu); |
224 | + set_vreg64(destlow, a->vd, 0); | 75 | - for (unsigned i = 0; i < sc->num_cpu; i++) { |
225 | + | 76 | + for (i = 0; i < sc->num_cpu; i++) { |
226 | + return true; | 77 | g_autofree char *name = g_strdup_printf("loongson_ipi_cpu%d_mmio", i); |
227 | +} | 78 | |
228 | + | 79 | memory_region_init_io(&s->ipi_mmio_mem[i], OBJECT(dev), |
229 | +TRANS(vpackev_b, gen_vvv, gen_helper_vpackev_b) | 80 | diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c |
230 | +TRANS(vpackev_h, gen_vvv, gen_helper_vpackev_h) | ||
231 | +TRANS(vpackev_w, gen_vvv, gen_helper_vpackev_w) | ||
232 | +TRANS(vpackev_d, gen_vvv, gen_helper_vpackev_d) | ||
233 | +TRANS(vpackod_b, gen_vvv, gen_helper_vpackod_b) | ||
234 | +TRANS(vpackod_h, gen_vvv, gen_helper_vpackod_h) | ||
235 | +TRANS(vpackod_w, gen_vvv, gen_helper_vpackod_w) | ||
236 | +TRANS(vpackod_d, gen_vvv, gen_helper_vpackod_d) | ||
237 | + | ||
238 | +TRANS(vpickev_b, gen_vvv, gen_helper_vpickev_b) | ||
239 | +TRANS(vpickev_h, gen_vvv, gen_helper_vpickev_h) | ||
240 | +TRANS(vpickev_w, gen_vvv, gen_helper_vpickev_w) | ||
241 | +TRANS(vpickev_d, gen_vvv, gen_helper_vpickev_d) | ||
242 | +TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b) | ||
243 | +TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h) | ||
244 | +TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w) | ||
245 | +TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d) | ||
246 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
247 | index XXXXXXX..XXXXXXX 100644 | 81 | index XXXXXXX..XXXXXXX 100644 |
248 | --- a/target/loongarch/insns.decode | 82 | --- a/hw/intc/loongson_ipi_common.c |
249 | +++ b/target/loongarch/insns.decode | 83 | +++ b/hw/intc/loongson_ipi_common.c |
250 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | 84 | @@ -XXX,XX +XXX,XX @@ |
251 | &vr_i vd rj imm | 85 | #include "hw/intc/loongson_ipi_common.h" |
252 | &rv_i rd vj imm | 86 | #include "hw/irq.h" |
253 | &vr vd rj | 87 | #include "hw/qdev-properties.h" |
254 | +&vvr vd vj rk | 88 | -#include "qapi/error.h" |
255 | 89 | #include "qemu/log.h" | |
256 | # | 90 | #include "migration/vmstate.h" |
257 | # LSX Formats | 91 | #include "trace.h" |
258 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | 92 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp) |
259 | @vv .... ........ ..... ..... vj:5 vd:5 &vv | 93 | { |
260 | @cv .... ........ ..... ..... vj:5 .. cd:3 &cv | 94 | LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev); |
261 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | 95 | SysBusDevice *sbd = SYS_BUS_DEVICE(dev); |
262 | +@vv_ui1 .... ........ ..... .... imm:1 vj:5 vd:5 &vv_i | 96 | - int i; |
263 | +@vv_ui2 .... ........ ..... ... imm:2 vj:5 vd:5 &vv_i | 97 | - |
264 | @vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i | 98 | - if (s->num_cpu == 0) { |
265 | @vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i | 99 | - error_setg(errp, "num-cpu must be at least 1"); |
266 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | 100 | - return; |
267 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | 101 | - } |
268 | @rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i | 102 | |
269 | @rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i | 103 | memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), |
270 | @vr .... ........ ..... ..... rj:5 vd:5 &vr | 104 | &loongson_ipi_iocsr_ops, |
271 | +@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr | 105 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp) |
272 | 106 | &loongson_ipi64_ops, | |
273 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | 107 | s, "loongson_ipi64_iocsr", 0x118); |
274 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | 108 | sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); |
275 | @@ -XXX,XX +XXX,XX @@ vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr | 109 | - |
276 | vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr | 110 | - s->cpu = g_new0(IPICore, s->num_cpu); |
277 | vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr | 111 | - for (i = 0; i < s->num_cpu; i++) { |
278 | vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr | 112 | - s->cpu[i].ipi = s; |
279 | + | 113 | - |
280 | +vreplve_b 0111 00010010 00100 ..... ..... ..... @vvr | 114 | - qdev_init_gpio_out(dev, &s->cpu[i].irq, 1); |
281 | +vreplve_h 0111 00010010 00101 ..... ..... ..... @vvr | 115 | - } |
282 | +vreplve_w 0111 00010010 00110 ..... ..... ..... @vvr | 116 | } |
283 | +vreplve_d 0111 00010010 00111 ..... ..... ..... @vvr | 117 | |
284 | +vreplvei_b 0111 00101111 01111 0 .... ..... ..... @vv_ui4 | 118 | static void loongson_ipi_common_unrealize(DeviceState *dev) |
285 | +vreplvei_h 0111 00101111 01111 10 ... ..... ..... @vv_ui3 | ||
286 | +vreplvei_w 0111 00101111 01111 110 .. ..... ..... @vv_ui2 | ||
287 | +vreplvei_d 0111 00101111 01111 1110 . ..... ..... @vv_ui1 | ||
288 | + | ||
289 | +vbsll_v 0111 00101000 11100 ..... ..... ..... @vv_ui5 | ||
290 | +vbsrl_v 0111 00101000 11101 ..... ..... ..... @vv_ui5 | ||
291 | + | ||
292 | +vpackev_b 0111 00010001 01100 ..... ..... ..... @vvv | ||
293 | +vpackev_h 0111 00010001 01101 ..... ..... ..... @vvv | ||
294 | +vpackev_w 0111 00010001 01110 ..... ..... ..... @vvv | ||
295 | +vpackev_d 0111 00010001 01111 ..... ..... ..... @vvv | ||
296 | +vpackod_b 0111 00010001 10000 ..... ..... ..... @vvv | ||
297 | +vpackod_h 0111 00010001 10001 ..... ..... ..... @vvv | ||
298 | +vpackod_w 0111 00010001 10010 ..... ..... ..... @vvv | ||
299 | +vpackod_d 0111 00010001 10011 ..... ..... ..... @vvv | ||
300 | + | ||
301 | +vpickev_b 0111 00010001 11100 ..... ..... ..... @vvv | ||
302 | +vpickev_h 0111 00010001 11101 ..... ..... ..... @vvv | ||
303 | +vpickev_w 0111 00010001 11110 ..... ..... ..... @vvv | ||
304 | +vpickev_d 0111 00010001 11111 ..... ..... ..... @vvv | ||
305 | +vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv | ||
306 | +vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv | ||
307 | +vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv | ||
308 | +vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv | ||
309 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
310 | index XXXXXXX..XXXXXXX 100644 | ||
311 | --- a/target/loongarch/lsx_helper.c | ||
312 | +++ b/target/loongarch/lsx_helper.c | ||
313 | @@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_b, MO_8) | ||
314 | SETALLNEZ(vsetallnez_h, MO_16) | ||
315 | SETALLNEZ(vsetallnez_w, MO_32) | ||
316 | SETALLNEZ(vsetallnez_d, MO_64) | ||
317 | + | ||
318 | +#define VPACKEV(NAME, BIT, E) \ | ||
319 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
320 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
321 | +{ \ | ||
322 | + int i; \ | ||
323 | + VReg temp; \ | ||
324 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
325 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
326 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
327 | + \ | ||
328 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
329 | + temp.E(2 * i + 1) = Vj->E(2 * i); \ | ||
330 | + temp.E(2 *i) = Vk->E(2 * i); \ | ||
331 | + } \ | ||
332 | + *Vd = temp; \ | ||
333 | +} | ||
334 | + | ||
335 | +VPACKEV(vpackev_b, 16, B) | ||
336 | +VPACKEV(vpackev_h, 32, H) | ||
337 | +VPACKEV(vpackev_w, 64, W) | ||
338 | +VPACKEV(vpackev_d, 128, D) | ||
339 | + | ||
340 | +#define VPACKOD(NAME, BIT, E) \ | ||
341 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
342 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
343 | +{ \ | ||
344 | + int i; \ | ||
345 | + VReg temp; \ | ||
346 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
347 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
348 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
349 | + \ | ||
350 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
351 | + temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ | ||
352 | + temp.E(2 * i) = Vk->E(2 * i + 1); \ | ||
353 | + } \ | ||
354 | + *Vd = temp; \ | ||
355 | +} | ||
356 | + | ||
357 | +VPACKOD(vpackod_b, 16, B) | ||
358 | +VPACKOD(vpackod_h, 32, H) | ||
359 | +VPACKOD(vpackod_w, 64, W) | ||
360 | +VPACKOD(vpackod_d, 128, D) | ||
361 | + | ||
362 | +#define VPICKEV(NAME, BIT, E) \ | ||
363 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
364 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
365 | +{ \ | ||
366 | + int i; \ | ||
367 | + VReg temp; \ | ||
368 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
369 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
370 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
371 | + \ | ||
372 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
373 | + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ | ||
374 | + temp.E(i) = Vk->E(2 * i); \ | ||
375 | + } \ | ||
376 | + *Vd = temp; \ | ||
377 | +} | ||
378 | + | ||
379 | +VPICKEV(vpickev_b, 16, B) | ||
380 | +VPICKEV(vpickev_h, 32, H) | ||
381 | +VPICKEV(vpickev_w, 64, W) | ||
382 | +VPICKEV(vpickev_d, 128, D) | ||
383 | + | ||
384 | +#define VPICKOD(NAME, BIT, E) \ | ||
385 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
386 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
387 | +{ \ | ||
388 | + int i; \ | ||
389 | + VReg temp; \ | ||
390 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
391 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
392 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
393 | + \ | ||
394 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
395 | + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ | ||
396 | + temp.E(i) = Vk->E(2 * i + 1); \ | ||
397 | + } \ | ||
398 | + *Vd = temp; \ | ||
399 | +} | ||
400 | + | ||
401 | +VPICKOD(vpickod_b, 16, B) | ||
402 | +VPICKOD(vpickod_h, 32, H) | ||
403 | +VPICKOD(vpickod_w, 64, W) | ||
404 | +VPICKOD(vpickod_d, 128, D) | ||
405 | -- | 119 | -- |
406 | 2.31.1 | 120 | 2.43.5 | diff view generated by jsdifflib |
1 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 1 | With mips64 loongson ipi, num_cpu property is used. With loongarch |
---|---|---|---|
2 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 2 | ipi, num_cpu can be acquired from possible_cpu_arch_ids. |
3 | Message-Id: <20230504122810.4094787-2-gaosong@loongson.cn> | 3 | |
4 | Here remove property num_cpu from loongson_ipi_common, and put it into | ||
5 | loongson and loongarch ipi separately. | ||
6 | |||
7 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> | ||
8 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> | ||
4 | --- | 9 | --- |
5 | linux-user/loongarch64/signal.c | 4 +- | 10 | hw/intc/loongarch_ipi.c | 6 ++++++ |
6 | target/loongarch/cpu.c | 2 +- | 11 | hw/intc/loongson_ipi.c | 6 ++++++ |
7 | target/loongarch/cpu.h | 21 ++++++++- | 12 | hw/intc/loongson_ipi_common.c | 6 ------ |
8 | target/loongarch/gdbstub.c | 4 +- | 13 | 3 files changed, 12 insertions(+), 6 deletions(-) |
9 | target/loongarch/internals.h | 22 +++++++++ | ||
10 | target/loongarch/machine.c | 79 ++++++++++++++++++++++++++++++--- | ||
11 | 6 files changed, 119 insertions(+), 13 deletions(-) | ||
12 | 14 | ||
13 | diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c | 15 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/linux-user/loongarch64/signal.c | 17 | --- a/hw/intc/loongarch_ipi.c |
16 | +++ b/linux-user/loongarch64/signal.c | 18 | +++ b/hw/intc/loongarch_ipi.c |
17 | @@ -XXX,XX +XXX,XX @@ static void setup_sigframe(CPULoongArchState *env, | 19 | @@ -XXX,XX +XXX,XX @@ |
18 | 20 | #include "hw/boards.h" | |
19 | fpu_ctx = (struct target_fpu_context *)(info + 1); | 21 | #include "qapi/error.h" |
20 | for (i = 0; i < 32; ++i) { | 22 | #include "hw/intc/loongarch_ipi.h" |
21 | - __put_user(env->fpr[i], &fpu_ctx->regs[i]); | 23 | +#include "hw/qdev-properties.h" |
22 | + __put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]); | 24 | #include "target/loongarch/cpu.h" |
25 | |||
26 | static AddressSpace *get_iocsr_as(CPUState *cpu) | ||
27 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp) | ||
23 | } | 28 | } |
24 | __put_user(read_fcc(env), &fpu_ctx->fcc); | 29 | } |
25 | __put_user(env->fcsr0, &fpu_ctx->fcsr); | 30 | |
26 | @@ -XXX,XX +XXX,XX @@ static void restore_sigframe(CPULoongArchState *env, | 31 | +static const Property loongarch_ipi_properties[] = { |
27 | uint64_t fcc; | 32 | + DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), |
28 | |||
29 | for (i = 0; i < 32; ++i) { | ||
30 | - __get_user(env->fpr[i], &fpu_ctx->regs[i]); | ||
31 | + __get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]); | ||
32 | } | ||
33 | __get_user(fcc, &fpu_ctx->fcc); | ||
34 | write_fcc(env, fcc); | ||
35 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/loongarch/cpu.c | ||
38 | +++ b/target/loongarch/cpu.c | ||
39 | @@ -XXX,XX +XXX,XX @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) | ||
40 | /* fpr */ | ||
41 | if (flags & CPU_DUMP_FPU) { | ||
42 | for (i = 0; i < 32; i++) { | ||
43 | - qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], env->fpr[i]); | ||
44 | + qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], env->fpr[i].vreg.D(0)); | ||
45 | if ((i & 3) == 3) { | ||
46 | qemu_fprintf(f, "\n"); | ||
47 | } | ||
48 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/loongarch/cpu.h | ||
51 | +++ b/target/loongarch/cpu.h | ||
52 | @@ -XXX,XX +XXX,XX @@ | ||
53 | #ifndef LOONGARCH_CPU_H | ||
54 | #define LOONGARCH_CPU_H | ||
55 | |||
56 | +#include "qemu/int128.h" | ||
57 | #include "exec/cpu-defs.h" | ||
58 | #include "fpu/softfloat-types.h" | ||
59 | #include "hw/registerfields.h" | ||
60 | @@ -XXX,XX +XXX,XX @@ FIELD(TLB_MISC, ASID, 1, 10) | ||
61 | FIELD(TLB_MISC, VPPN, 13, 35) | ||
62 | FIELD(TLB_MISC, PS, 48, 6) | ||
63 | |||
64 | +#define LSX_LEN (128) | ||
65 | +typedef union VReg { | ||
66 | + int8_t B[LSX_LEN / 8]; | ||
67 | + int16_t H[LSX_LEN / 16]; | ||
68 | + int32_t W[LSX_LEN / 32]; | ||
69 | + int64_t D[LSX_LEN / 64]; | ||
70 | + uint8_t UB[LSX_LEN / 8]; | ||
71 | + uint16_t UH[LSX_LEN / 16]; | ||
72 | + uint32_t UW[LSX_LEN / 32]; | ||
73 | + uint64_t UD[LSX_LEN / 64]; | ||
74 | + Int128 Q[LSX_LEN / 128]; | ||
75 | +}VReg; | ||
76 | + | ||
77 | +typedef union fpr_t fpr_t; | ||
78 | +union fpr_t { | ||
79 | + VReg vreg; | ||
80 | +}; | 33 | +}; |
81 | + | 34 | + |
82 | struct LoongArchTLB { | 35 | static void loongarch_ipi_class_init(ObjectClass *klass, void *data) |
83 | uint64_t tlb_misc; | 36 | { |
84 | /* Fields corresponding to CSR_TLBELO0/1 */ | 37 | LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass); |
85 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { | 38 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_class_init(ObjectClass *klass, void *data) |
86 | uint64_t gpr[32]; | 39 | |
87 | uint64_t pc; | 40 | device_class_set_parent_realize(dc, loongarch_ipi_realize, |
88 | 41 | &lic->parent_realize); | |
89 | - uint64_t fpr[32]; | 42 | + device_class_set_props(dc, loongarch_ipi_properties); |
90 | + fpr_t fpr[32]; | 43 | licc->get_iocsr_as = get_iocsr_as; |
91 | float_status fp_status; | 44 | licc->cpu_by_arch_id = loongarch_cpu_by_arch_id; |
92 | bool cf[8]; | 45 | } |
93 | 46 | diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c | |
94 | diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c | ||
95 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
96 | --- a/target/loongarch/gdbstub.c | 48 | --- a/hw/intc/loongson_ipi.c |
97 | +++ b/target/loongarch/gdbstub.c | 49 | +++ b/hw/intc/loongson_ipi.c |
98 | @@ -XXX,XX +XXX,XX @@ static int loongarch_gdb_get_fpu(CPULoongArchState *env, | ||
99 | GByteArray *mem_buf, int n) | ||
100 | { | ||
101 | if (0 <= n && n < 32) { | ||
102 | - return gdb_get_reg64(mem_buf, env->fpr[n]); | ||
103 | + return gdb_get_reg64(mem_buf, env->fpr[n].vreg.D(0)); | ||
104 | } else if (n == 32) { | ||
105 | uint64_t val = read_fcc(env); | ||
106 | return gdb_get_reg64(mem_buf, val); | ||
107 | @@ -XXX,XX +XXX,XX @@ static int loongarch_gdb_set_fpu(CPULoongArchState *env, | ||
108 | int length = 0; | ||
109 | |||
110 | if (0 <= n && n < 32) { | ||
111 | - env->fpr[n] = ldq_p(mem_buf); | ||
112 | + env->fpr[n].vreg.D(0) = ldq_p(mem_buf); | ||
113 | length = 8; | ||
114 | } else if (n == 32) { | ||
115 | uint64_t val = ldq_p(mem_buf); | ||
116 | diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/target/loongarch/internals.h | ||
119 | +++ b/target/loongarch/internals.h | ||
120 | @@ -XXX,XX +XXX,XX @@ | 50 | @@ -XXX,XX +XXX,XX @@ |
121 | /* Global bit for huge page */ | 51 | |
122 | #define LOONGARCH_HGLOBAL_SHIFT 12 | 52 | #include "qemu/osdep.h" |
123 | 53 | #include "hw/intc/loongson_ipi.h" | |
124 | +#if HOST_BIG_ENDIAN | 54 | +#include "hw/qdev-properties.h" |
125 | +#define B(x) B[15 - (x)] | 55 | #include "qapi/error.h" |
126 | +#define H(x) H[7 - (x)] | 56 | #include "target/mips/cpu.h" |
127 | +#define W(x) W[3 - (x)] | 57 | |
128 | +#define D(x) D[1 - (x)] | 58 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_unrealize(DeviceState *dev) |
129 | +#define UB(x) UB[15 - (x)] | 59 | k->parent_unrealize(dev); |
130 | +#define UH(x) UH[7 - (x)] | 60 | } |
131 | +#define UW(x) UW[3 - (x)] | 61 | |
132 | +#define UD(x) UD[1 -(x)] | 62 | +static const Property loongson_ipi_properties[] = { |
133 | +#define Q(x) Q[x] | 63 | + DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), |
134 | +#else | ||
135 | +#define B(x) B[x] | ||
136 | +#define H(x) H[x] | ||
137 | +#define W(x) W[x] | ||
138 | +#define D(x) D[x] | ||
139 | +#define UB(x) UB[x] | ||
140 | +#define UH(x) UH[x] | ||
141 | +#define UW(x) UW[x] | ||
142 | +#define UD(x) UD[x] | ||
143 | +#define Q(x) Q[x] | ||
144 | +#endif | ||
145 | + | ||
146 | void loongarch_translate_init(void); | ||
147 | |||
148 | void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags); | ||
149 | diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c | ||
150 | index XXXXXXX..XXXXXXX 100644 | ||
151 | --- a/target/loongarch/machine.c | ||
152 | +++ b/target/loongarch/machine.c | ||
153 | @@ -XXX,XX +XXX,XX @@ | ||
154 | #include "migration/cpu.h" | ||
155 | #include "internals.h" | ||
156 | |||
157 | +static const VMStateDescription vmstate_fpu_reg = { | ||
158 | + .name = "fpu_reg", | ||
159 | + .version_id = 1, | ||
160 | + .minimum_version_id = 1, | ||
161 | + .fields = (VMStateField[]) { | ||
162 | + VMSTATE_UINT64(UD(0), VReg), | ||
163 | + VMSTATE_END_OF_LIST() | ||
164 | + } | ||
165 | +}; | 64 | +}; |
166 | + | 65 | + |
167 | +#define VMSTATE_FPU_REGS(_field, _state, _start) \ | 66 | static void loongson_ipi_class_init(ObjectClass *klass, void *data) |
168 | + VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \ | 67 | { |
169 | + vmstate_fpu_reg, fpr_t) | 68 | DeviceClass *dc = DEVICE_CLASS(klass); |
170 | + | 69 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data) |
171 | +static bool fpu_needed(void *opaque) | 70 | &lic->parent_realize); |
172 | +{ | 71 | device_class_set_parent_unrealize(dc, loongson_ipi_unrealize, |
173 | + LoongArchCPU *cpu = opaque; | 72 | &lic->parent_unrealize); |
174 | + | 73 | + device_class_set_props(dc, loongson_ipi_properties); |
175 | + return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, FP); | 74 | licc->get_iocsr_as = get_iocsr_as; |
176 | +} | 75 | licc->cpu_by_arch_id = cpu_by_arch_id; |
177 | + | 76 | } |
178 | +static const VMStateDescription vmstate_fpu = { | 77 | diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c |
179 | + .name = "cpu/fpu", | 78 | index XXXXXXX..XXXXXXX 100644 |
180 | + .version_id = 1, | 79 | --- a/hw/intc/loongson_ipi_common.c |
181 | + .minimum_version_id = 1, | 80 | +++ b/hw/intc/loongson_ipi_common.c |
182 | + .needed = fpu_needed, | 81 | @@ -XXX,XX +XXX,XX @@ |
183 | + .fields = (VMStateField[]) { | 82 | #include "hw/sysbus.h" |
184 | + VMSTATE_FPU_REGS(env.fpr, LoongArchCPU, 0), | 83 | #include "hw/intc/loongson_ipi_common.h" |
185 | + VMSTATE_UINT32(env.fcsr0, LoongArchCPU), | 84 | #include "hw/irq.h" |
186 | + VMSTATE_BOOL_ARRAY(env.cf, LoongArchCPU, 8), | 85 | -#include "hw/qdev-properties.h" |
187 | + VMSTATE_END_OF_LIST() | 86 | #include "qemu/log.h" |
188 | + }, | 87 | #include "migration/vmstate.h" |
189 | +}; | 88 | #include "trace.h" |
190 | + | 89 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_loongson_ipi_common = { |
191 | +static const VMStateDescription vmstate_lsxh_reg = { | 90 | } |
192 | + .name = "lsxh_reg", | ||
193 | + .version_id = 1, | ||
194 | + .minimum_version_id = 1, | ||
195 | + .fields = (VMStateField[]) { | ||
196 | + VMSTATE_UINT64(UD(1), VReg), | ||
197 | + VMSTATE_END_OF_LIST() | ||
198 | + } | ||
199 | +}; | ||
200 | + | ||
201 | +#define VMSTATE_LSXH_REGS(_field, _state, _start) \ | ||
202 | + VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \ | ||
203 | + vmstate_lsxh_reg, fpr_t) | ||
204 | + | ||
205 | +static bool lsx_needed(void *opaque) | ||
206 | +{ | ||
207 | + LoongArchCPU *cpu = opaque; | ||
208 | + | ||
209 | + return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LSX); | ||
210 | +} | ||
211 | + | ||
212 | +static const VMStateDescription vmstate_lsx = { | ||
213 | + .name = "cpu/lsx", | ||
214 | + .version_id = 1, | ||
215 | + .minimum_version_id = 1, | ||
216 | + .needed = lsx_needed, | ||
217 | + .fields = (VMStateField[]) { | ||
218 | + VMSTATE_LSXH_REGS(env.fpr, LoongArchCPU, 0), | ||
219 | + VMSTATE_END_OF_LIST() | ||
220 | + }, | ||
221 | +}; | ||
222 | + | ||
223 | /* TLB state */ | ||
224 | const VMStateDescription vmstate_tlb = { | ||
225 | .name = "cpu/tlb", | ||
226 | @@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_tlb = { | ||
227 | }; | 91 | }; |
228 | 92 | ||
229 | /* LoongArch CPU state */ | 93 | -static const Property ipi_common_properties[] = { |
94 | - DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), | ||
95 | -}; | ||
230 | - | 96 | - |
231 | const VMStateDescription vmstate_loongarch_cpu = { | 97 | static void loongson_ipi_common_class_init(ObjectClass *klass, void *data) |
232 | .name = "cpu", | 98 | { |
233 | - .version_id = 0, | 99 | DeviceClass *dc = DEVICE_CLASS(klass); |
234 | - .minimum_version_id = 0, | 100 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_class_init(ObjectClass *klass, void *data) |
235 | + .version_id = 1, | 101 | &licc->parent_realize); |
236 | + .minimum_version_id = 1, | 102 | device_class_set_parent_unrealize(dc, loongson_ipi_common_unrealize, |
237 | .fields = (VMStateField[]) { | 103 | &licc->parent_unrealize); |
238 | - | 104 | - device_class_set_props(dc, ipi_common_properties); |
239 | VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), | 105 | dc->vmsd = &vmstate_loongson_ipi_common; |
240 | VMSTATE_UINTTL(env.pc, LoongArchCPU), | 106 | } |
241 | - VMSTATE_UINT64_ARRAY(env.fpr, LoongArchCPU, 32), | 107 | |
242 | - VMSTATE_UINT32(env.fcsr0, LoongArchCPU), | ||
243 | - VMSTATE_BOOL_ARRAY(env.cf, LoongArchCPU, 8), | ||
244 | |||
245 | /* Remaining CSRs */ | ||
246 | VMSTATE_UINT64(env.CSR_CRMD, LoongArchCPU), | ||
247 | @@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_loongarch_cpu = { | ||
248 | |||
249 | VMSTATE_END_OF_LIST() | ||
250 | }, | ||
251 | + .subsections = (const VMStateDescription*[]) { | ||
252 | + &vmstate_fpu, | ||
253 | + &vmstate_lsx, | ||
254 | + } | ||
255 | }; | ||
256 | -- | 108 | -- |
257 | 2.31.1 | 109 | 2.43.5 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
3 | Message-Id: <20230504122810.4094787-3-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++++ | ||
6 | target/loongarch/lsx_helper.c | 6 ++++++ | ||
7 | target/loongarch/meson.build | 1 + | ||
8 | target/loongarch/translate.c | 1 + | ||
9 | 4 files changed, 13 insertions(+) | ||
10 | create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc | ||
11 | create mode 100644 target/loongarch/lsx_helper.c | ||
12 | 1 | ||
13 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
14 | new file mode 100644 | ||
15 | index XXXXXXX..XXXXXXX | ||
16 | --- /dev/null | ||
17 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
18 | @@ -XXX,XX +XXX,XX @@ | ||
19 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
20 | +/* | ||
21 | + * LSX translate functions | ||
22 | + * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
23 | + */ | ||
24 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
25 | new file mode 100644 | ||
26 | index XXXXXXX..XXXXXXX | ||
27 | --- /dev/null | ||
28 | +++ b/target/loongarch/lsx_helper.c | ||
29 | @@ -XXX,XX +XXX,XX @@ | ||
30 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
31 | +/* | ||
32 | + * QEMU LoongArch LSX helper functions. | ||
33 | + * | ||
34 | + * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
35 | + */ | ||
36 | diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/loongarch/meson.build | ||
39 | +++ b/target/loongarch/meson.build | ||
40 | @@ -XXX,XX +XXX,XX @@ loongarch_tcg_ss.add(files( | ||
41 | 'op_helper.c', | ||
42 | 'translate.c', | ||
43 | 'gdbstub.c', | ||
44 | + 'lsx_helper.c', | ||
45 | )) | ||
46 | loongarch_tcg_ss.add(zlib) | ||
47 | |||
48 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/loongarch/translate.c | ||
51 | +++ b/target/loongarch/translate.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext) | ||
53 | #include "insn_trans/trans_fmemory.c.inc" | ||
54 | #include "insn_trans/trans_branch.c.inc" | ||
55 | #include "insn_trans/trans_privileged.c.inc" | ||
56 | +#include "insn_trans/trans_lsx.c.inc" | ||
57 | |||
58 | static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) | ||
59 | { | ||
60 | -- | ||
61 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
3 | Message-Id: <20230504122810.4094787-4-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/cpu.c | 2 ++ | ||
6 | target/loongarch/cpu.h | 2 ++ | ||
7 | target/loongarch/insn_trans/trans_lsx.c.inc | 11 +++++++++++ | ||
8 | 3 files changed, 15 insertions(+) | ||
9 | 1 | ||
10 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/loongarch/cpu.c | ||
13 | +++ b/target/loongarch/cpu.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static const char * const excp_names[] = { | ||
15 | [EXCCODE_FPE] = "Floating Point Exception", | ||
16 | [EXCCODE_DBP] = "Debug breakpoint", | ||
17 | [EXCCODE_BCE] = "Bound Check Exception", | ||
18 | + [EXCCODE_SXD] = "128 bit vector instructions Disable exception", | ||
19 | }; | ||
20 | |||
21 | const char *loongarch_exception_name(int32_t exception) | ||
22 | @@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_do_interrupt(CPUState *cs) | ||
23 | case EXCCODE_FPD: | ||
24 | case EXCCODE_FPE: | ||
25 | case EXCCODE_BCE: | ||
26 | + case EXCCODE_SXD: | ||
27 | env->CSR_BADV = env->pc; | ||
28 | QEMU_FALLTHROUGH; | ||
29 | case EXCCODE_ADEM: | ||
30 | diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/loongarch/cpu.h | ||
33 | +++ b/target/loongarch/cpu.h | ||
34 | @@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch) | ||
35 | #define HW_FLAGS_PLV_MASK R_CSR_CRMD_PLV_MASK /* 0x03 */ | ||
36 | #define HW_FLAGS_CRMD_PG R_CSR_CRMD_PG_MASK /* 0x10 */ | ||
37 | #define HW_FLAGS_EUEN_FPE 0x04 | ||
38 | +#define HW_FLAGS_EUEN_SXE 0x08 | ||
39 | |||
40 | static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, | ||
41 | target_ulong *pc, | ||
42 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, | ||
43 | *cs_base = 0; | ||
44 | *flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); | ||
45 | *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; | ||
46 | + *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; | ||
47 | } | ||
48 | |||
49 | void loongarch_cpu_list(void); | ||
50 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
53 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | * LSX translate functions | ||
56 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
57 | */ | ||
58 | + | ||
59 | +#ifndef CONFIG_USER_ONLY | ||
60 | +#define CHECK_SXE do { \ | ||
61 | + if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \ | ||
62 | + generate_exception(ctx, EXCCODE_SXD); \ | ||
63 | + return true; \ | ||
64 | + } \ | ||
65 | +} while (0) | ||
66 | +#else | ||
67 | +#define CHECK_SXE | ||
68 | +#endif | ||
69 | -- | ||
70 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VADD.{B/H/W/D/Q}; | ||
3 | - VSUB.{B/H/W/D/Q}. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-5-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 23 +++++++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 69 +++++++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 22 +++++++ | ||
12 | target/loongarch/translate.c | 24 +++++++ | ||
13 | target/loongarch/translate.h | 1 + | ||
14 | 5 files changed, 139 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ PCADD_INSN(pcaddi) | ||
21 | PCADD_INSN(pcalau12i) | ||
22 | PCADD_INSN(pcaddu12i) | ||
23 | PCADD_INSN(pcaddu18i) | ||
24 | + | ||
25 | +#define INSN_LSX(insn, type) \ | ||
26 | +static bool trans_##insn(DisasContext *ctx, arg_##type * a) \ | ||
27 | +{ \ | ||
28 | + output_##type(ctx, a, #insn); \ | ||
29 | + return true; \ | ||
30 | +} | ||
31 | + | ||
32 | +static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic) | ||
33 | +{ | ||
34 | + output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk); | ||
35 | +} | ||
36 | + | ||
37 | +INSN_LSX(vadd_b, vvv) | ||
38 | +INSN_LSX(vadd_h, vvv) | ||
39 | +INSN_LSX(vadd_w, vvv) | ||
40 | +INSN_LSX(vadd_d, vvv) | ||
41 | +INSN_LSX(vadd_q, vvv) | ||
42 | +INSN_LSX(vsub_b, vvv) | ||
43 | +INSN_LSX(vsub_h, vvv) | ||
44 | +INSN_LSX(vsub_w, vvv) | ||
45 | +INSN_LSX(vsub_d, vvv) | ||
46 | +INSN_LSX(vsub_q, vvv) | ||
47 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | #else | ||
53 | #define CHECK_SXE | ||
54 | #endif | ||
55 | + | ||
56 | +static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
57 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
58 | +{ | ||
59 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
60 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
61 | + TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
62 | + | ||
63 | + CHECK_SXE; | ||
64 | + | ||
65 | + func(cpu_env, vd, vj, vk); | ||
66 | + return true; | ||
67 | +} | ||
68 | + | ||
69 | +static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
70 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
71 | + uint32_t, uint32_t, uint32_t)) | ||
72 | +{ | ||
73 | + uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
74 | + | ||
75 | + CHECK_SXE; | ||
76 | + | ||
77 | + vd_ofs = vec_full_offset(a->vd); | ||
78 | + vj_ofs = vec_full_offset(a->vj); | ||
79 | + vk_ofs = vec_full_offset(a->vk); | ||
80 | + | ||
81 | + func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8); | ||
82 | + return true; | ||
83 | +} | ||
84 | + | ||
85 | +TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add) | ||
86 | +TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add) | ||
87 | +TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add) | ||
88 | +TRANS(vadd_d, gvec_vvv, MO_64, tcg_gen_gvec_add) | ||
89 | + | ||
90 | +#define VADDSUB_Q(NAME) \ | ||
91 | +static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \ | ||
92 | +{ \ | ||
93 | + TCGv_i64 rh, rl, ah, al, bh, bl; \ | ||
94 | + \ | ||
95 | + CHECK_SXE; \ | ||
96 | + \ | ||
97 | + rh = tcg_temp_new_i64(); \ | ||
98 | + rl = tcg_temp_new_i64(); \ | ||
99 | + ah = tcg_temp_new_i64(); \ | ||
100 | + al = tcg_temp_new_i64(); \ | ||
101 | + bh = tcg_temp_new_i64(); \ | ||
102 | + bl = tcg_temp_new_i64(); \ | ||
103 | + \ | ||
104 | + get_vreg64(ah, a->vj, 1); \ | ||
105 | + get_vreg64(al, a->vj, 0); \ | ||
106 | + get_vreg64(bh, a->vk, 1); \ | ||
107 | + get_vreg64(bl, a->vk, 0); \ | ||
108 | + \ | ||
109 | + tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \ | ||
110 | + \ | ||
111 | + set_vreg64(rh, a->vd, 1); \ | ||
112 | + set_vreg64(rl, a->vd, 0); \ | ||
113 | + \ | ||
114 | + return true; \ | ||
115 | +} | ||
116 | + | ||
117 | +VADDSUB_Q(add) | ||
118 | +VADDSUB_Q(sub) | ||
119 | + | ||
120 | +TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub) | ||
121 | +TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub) | ||
122 | +TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub) | ||
123 | +TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub) | ||
124 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/target/loongarch/insns.decode | ||
127 | +++ b/target/loongarch/insns.decode | ||
128 | @@ -XXX,XX +XXX,XX @@ ldpte 0000 01100100 01 ........ ..... 00000 @j_i | ||
129 | ertn 0000 01100100 10000 01110 00000 00000 @empty | ||
130 | idle 0000 01100100 10001 ............... @i15 | ||
131 | dbcl 0000 00000010 10101 ............... @i15 | ||
132 | + | ||
133 | +# | ||
134 | +# LSX Argument sets | ||
135 | +# | ||
136 | + | ||
137 | +&vvv vd vj vk | ||
138 | + | ||
139 | +# | ||
140 | +# LSX Formats | ||
141 | +# | ||
142 | +@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
143 | + | ||
144 | +vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
145 | +vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
146 | +vadd_w 0111 00000000 10110 ..... ..... ..... @vvv | ||
147 | +vadd_d 0111 00000000 10111 ..... ..... ..... @vvv | ||
148 | +vadd_q 0111 00010010 11010 ..... ..... ..... @vvv | ||
149 | +vsub_b 0111 00000000 11000 ..... ..... ..... @vvv | ||
150 | +vsub_h 0111 00000000 11001 ..... ..... ..... @vvv | ||
151 | +vsub_w 0111 00000000 11010 ..... ..... ..... @vvv | ||
152 | +vsub_d 0111 00000000 11011 ..... ..... ..... @vvv | ||
153 | +vsub_q 0111 00010010 11011 ..... ..... ..... @vvv | ||
154 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
155 | index XXXXXXX..XXXXXXX 100644 | ||
156 | --- a/target/loongarch/translate.c | ||
157 | +++ b/target/loongarch/translate.c | ||
158 | @@ -XXX,XX +XXX,XX @@ | ||
159 | #include "qemu/osdep.h" | ||
160 | #include "cpu.h" | ||
161 | #include "tcg/tcg-op.h" | ||
162 | +#include "tcg/tcg-op-gvec.h" | ||
163 | + | ||
164 | #include "exec/translator.h" | ||
165 | #include "exec/helper-proto.h" | ||
166 | #include "exec/helper-gen.h" | ||
167 | @@ -XXX,XX +XXX,XX @@ TCGv_i64 cpu_fpr[32]; | ||
168 | #define DISAS_EXIT DISAS_TARGET_1 | ||
169 | #define DISAS_EXIT_UPDATE DISAS_TARGET_2 | ||
170 | |||
171 | +static inline int vec_full_offset(int regno) | ||
172 | +{ | ||
173 | + return offsetof(CPULoongArchState, fpr[regno]); | ||
174 | +} | ||
175 | + | ||
176 | +static inline void get_vreg64(TCGv_i64 dest, int regno, int index) | ||
177 | +{ | ||
178 | + tcg_gen_ld_i64(dest, cpu_env, | ||
179 | + offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); | ||
180 | +} | ||
181 | + | ||
182 | +static inline void set_vreg64(TCGv_i64 src, int regno, int index) | ||
183 | +{ | ||
184 | + tcg_gen_st_i64(src, cpu_env, | ||
185 | + offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); | ||
186 | +} | ||
187 | + | ||
188 | static inline int plus_1(DisasContext *ctx, int x) | ||
189 | { | ||
190 | return x + 1; | ||
191 | @@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase, | ||
192 | CPUState *cs) | ||
193 | { | ||
194 | int64_t bound; | ||
195 | + CPULoongArchState *env = cs->env_ptr; | ||
196 | DisasContext *ctx = container_of(dcbase, DisasContext, base); | ||
197 | |||
198 | ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK; | ||
199 | @@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase, | ||
200 | bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4; | ||
201 | ctx->base.max_insns = MIN(ctx->base.max_insns, bound); | ||
202 | |||
203 | + if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) { | ||
204 | + ctx->vl = LSX_LEN; | ||
205 | + } | ||
206 | + | ||
207 | ctx->zero = tcg_constant_tl(0); | ||
208 | } | ||
209 | |||
210 | diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h | ||
211 | index XXXXXXX..XXXXXXX 100644 | ||
212 | --- a/target/loongarch/translate.h | ||
213 | +++ b/target/loongarch/translate.h | ||
214 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
215 | uint32_t opcode; | ||
216 | uint16_t mem_idx; | ||
217 | uint16_t plv; | ||
218 | + int vl; /* Vector length */ | ||
219 | TCGv zero; | ||
220 | } DisasContext; | ||
221 | |||
222 | -- | ||
223 | 2.31.1 | diff view generated by jsdifflib |
1 | Introduce set_fpr() and get_fpr() and remove cpu_fpr. | 1 | Supported CPU number can be acquired from function |
---|---|---|---|
2 | possible_cpu_arch_ids(), cpu-num property is not necessary and can | ||
3 | be removed. | ||
2 | 4 | ||
3 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> |
4 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 6 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> |
5 | Message-Id: <20230504122810.4094787-44-gaosong@loongson.cn> | ||
6 | --- | 7 | --- |
7 | .../loongarch/insn_trans/trans_farith.c.inc | 72 +++++++++++++++---- | 8 | hw/intc/loongarch_ipi.c | 13 ++++++++----- |
8 | target/loongarch/insn_trans/trans_fcmp.c.inc | 12 ++-- | 9 | include/hw/intc/loongson_ipi_common.h | 2 ++ |
9 | .../loongarch/insn_trans/trans_fmemory.c.inc | 37 ++++++---- | 10 | 2 files changed, 10 insertions(+), 5 deletions(-) |
10 | target/loongarch/insn_trans/trans_fmov.c.inc | 31 +++++--- | ||
11 | target/loongarch/translate.c | 20 ++++-- | ||
12 | 5 files changed, 129 insertions(+), 43 deletions(-) | ||
13 | 11 | ||
14 | diff --git a/target/loongarch/insn_trans/trans_farith.c.inc b/target/loongarch/insn_trans/trans_farith.c.inc | 12 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/loongarch/insn_trans/trans_farith.c.inc | 14 | --- a/hw/intc/loongarch_ipi.c |
17 | +++ b/target/loongarch/insn_trans/trans_farith.c.inc | 15 | +++ b/hw/intc/loongarch_ipi.c |
18 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp) |
19 | static bool gen_fff(DisasContext *ctx, arg_fff *a, | ||
20 | void (*func)(TCGv, TCGv_env, TCGv, TCGv)) | ||
21 | { | 17 | { |
22 | + TCGv dest = get_fpr(ctx, a->fd); | 18 | LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev); |
23 | + TCGv src1 = get_fpr(ctx, a->fj); | 19 | LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev); |
24 | + TCGv src2 = get_fpr(ctx, a->fk); | 20 | + MachineState *machine = MACHINE(qdev_get_machine()); |
25 | + | 21 | + MachineClass *mc = MACHINE_GET_CLASS(machine); |
26 | CHECK_FPE; | 22 | + const CPUArchIdList *id_list; |
27 | 23 | Error *local_err = NULL; | |
28 | - func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk]); | 24 | int i; |
29 | + func(dest, cpu_env, src1, src2); | 25 | |
30 | + set_fpr(a->fd, dest); | 26 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp) |
31 | + | 27 | return; |
32 | return true; | ||
33 | } | ||
34 | |||
35 | static bool gen_ff(DisasContext *ctx, arg_ff *a, | ||
36 | void (*func)(TCGv, TCGv_env, TCGv)) | ||
37 | { | ||
38 | + TCGv dest = get_fpr(ctx, a->fd); | ||
39 | + TCGv src = get_fpr(ctx, a->fj); | ||
40 | + | ||
41 | CHECK_FPE; | ||
42 | |||
43 | - func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj]); | ||
44 | + func(dest, cpu_env, src); | ||
45 | + set_fpr(a->fd, dest); | ||
46 | + | ||
47 | return true; | ||
48 | } | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ static bool gen_muladd(DisasContext *ctx, arg_ffff *a, | ||
51 | int flag) | ||
52 | { | ||
53 | TCGv_i32 tflag = tcg_constant_i32(flag); | ||
54 | + TCGv dest = get_fpr(ctx, a->fd); | ||
55 | + TCGv src1 = get_fpr(ctx, a->fj); | ||
56 | + TCGv src2 = get_fpr(ctx, a->fk); | ||
57 | + TCGv src3 = get_fpr(ctx, a->fa); | ||
58 | |||
59 | CHECK_FPE; | ||
60 | |||
61 | - func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj], | ||
62 | - cpu_fpr[a->fk], cpu_fpr[a->fa], tflag); | ||
63 | + func(dest, cpu_env, src1, src2, src3, tflag); | ||
64 | + set_fpr(a->fd, dest); | ||
65 | + | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a) | ||
70 | { | ||
71 | + TCGv dest = get_fpr(ctx, a->fd); | ||
72 | + TCGv src1 = get_fpr(ctx, a->fk); | ||
73 | + TCGv src2 = get_fpr(ctx, a->fj); | ||
74 | + | ||
75 | CHECK_FPE; | ||
76 | |||
77 | - tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 31); | ||
78 | + tcg_gen_deposit_i64(dest, src1, src2, 0, 31); | ||
79 | + set_fpr(a->fd, dest); | ||
80 | + | ||
81 | return true; | ||
82 | } | ||
83 | |||
84 | static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a) | ||
85 | { | ||
86 | + TCGv dest = get_fpr(ctx, a->fd); | ||
87 | + TCGv src1 = get_fpr(ctx, a->fk); | ||
88 | + TCGv src2 = get_fpr(ctx, a->fj); | ||
89 | + | ||
90 | CHECK_FPE; | ||
91 | |||
92 | - tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 63); | ||
93 | + tcg_gen_deposit_i64(dest, src1, src2, 0, 63); | ||
94 | + set_fpr(a->fd, dest); | ||
95 | + | ||
96 | return true; | ||
97 | } | ||
98 | |||
99 | static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a) | ||
100 | { | ||
101 | + TCGv dest = get_fpr(ctx, a->fd); | ||
102 | + TCGv src = get_fpr(ctx, a->fj); | ||
103 | + | ||
104 | CHECK_FPE; | ||
105 | |||
106 | - tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 31)); | ||
107 | - gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]); | ||
108 | + tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31)); | ||
109 | + gen_nanbox_s(dest, dest); | ||
110 | + set_fpr(a->fd, dest); | ||
111 | + | ||
112 | return true; | ||
113 | } | ||
114 | |||
115 | static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a) | ||
116 | { | ||
117 | + TCGv dest = get_fpr(ctx, a->fd); | ||
118 | + TCGv src = get_fpr(ctx, a->fj); | ||
119 | + | ||
120 | CHECK_FPE; | ||
121 | |||
122 | - tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 63)); | ||
123 | + tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63)); | ||
124 | + set_fpr(a->fd, dest); | ||
125 | + | ||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a) | ||
130 | { | ||
131 | + TCGv dest = get_fpr(ctx, a->fd); | ||
132 | + TCGv src = get_fpr(ctx, a->fj); | ||
133 | + | ||
134 | CHECK_FPE; | ||
135 | |||
136 | - tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x80000000); | ||
137 | - gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]); | ||
138 | + tcg_gen_xori_i64(dest, src, 0x80000000); | ||
139 | + gen_nanbox_s(dest, dest); | ||
140 | + set_fpr(a->fd, dest); | ||
141 | + | ||
142 | return true; | ||
143 | } | ||
144 | |||
145 | static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a) | ||
146 | { | ||
147 | + TCGv dest = get_fpr(ctx, a->fd); | ||
148 | + TCGv src = get_fpr(ctx, a->fj); | ||
149 | + | ||
150 | CHECK_FPE; | ||
151 | |||
152 | - tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x8000000000000000LL); | ||
153 | + tcg_gen_xori_i64(dest, src, 0x8000000000000000LL); | ||
154 | + set_fpr(a->fd, dest); | ||
155 | + | ||
156 | return true; | ||
157 | } | ||
158 | |||
159 | diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/insn_trans/trans_fcmp.c.inc | ||
160 | index XXXXXXX..XXXXXXX 100644 | ||
161 | --- a/target/loongarch/insn_trans/trans_fcmp.c.inc | ||
162 | +++ b/target/loongarch/insn_trans/trans_fcmp.c.inc | ||
163 | @@ -XXX,XX +XXX,XX @@ static uint32_t get_fcmp_flags(int cond) | ||
164 | |||
165 | static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) | ||
166 | { | ||
167 | - TCGv var; | ||
168 | + TCGv var, src1, src2; | ||
169 | uint32_t flags; | ||
170 | void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); | ||
171 | |||
172 | CHECK_FPE; | ||
173 | |||
174 | var = tcg_temp_new(); | ||
175 | + src1 = get_fpr(ctx, a->fj); | ||
176 | + src2 = get_fpr(ctx, a->fk); | ||
177 | fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); | ||
178 | flags = get_fcmp_flags(a->fcond >> 1); | ||
179 | |||
180 | - fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags)); | ||
181 | + fn(var, cpu_env, src1, src2, tcg_constant_i32(flags)); | ||
182 | |||
183 | tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd])); | ||
184 | return true; | ||
185 | @@ -XXX,XX +XXX,XX @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) | ||
186 | |||
187 | static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) | ||
188 | { | ||
189 | - TCGv var; | ||
190 | + TCGv var, src1, src2; | ||
191 | uint32_t flags; | ||
192 | void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); | ||
193 | |||
194 | CHECK_FPE; | ||
195 | |||
196 | var = tcg_temp_new(); | ||
197 | + src1 = get_fpr(ctx, a->fj); | ||
198 | + src2 = get_fpr(ctx, a->fk); | ||
199 | fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); | ||
200 | flags = get_fcmp_flags(a->fcond >> 1); | ||
201 | |||
202 | - fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags)); | ||
203 | + fn(var, cpu_env, src1, src2, tcg_constant_i32(flags)); | ||
204 | |||
205 | tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd])); | ||
206 | return true; | ||
207 | diff --git a/target/loongarch/insn_trans/trans_fmemory.c.inc b/target/loongarch/insn_trans/trans_fmemory.c.inc | ||
208 | index XXXXXXX..XXXXXXX 100644 | ||
209 | --- a/target/loongarch/insn_trans/trans_fmemory.c.inc | ||
210 | +++ b/target/loongarch/insn_trans/trans_fmemory.c.inc | ||
211 | @@ -XXX,XX +XXX,XX @@ static void maybe_nanbox_load(TCGv freg, MemOp mop) | ||
212 | static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) | ||
213 | { | ||
214 | TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
215 | + TCGv dest = get_fpr(ctx, a->fd); | ||
216 | |||
217 | CHECK_FPE; | ||
218 | |||
219 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) | ||
220 | addr = temp; | ||
221 | } | 28 | } |
222 | 29 | ||
223 | - tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | 30 | - if (lics->num_cpu == 0) { |
224 | - maybe_nanbox_load(cpu_fpr[a->fd], mop); | 31 | - error_setg(errp, "num-cpu must be at least 1"); |
225 | + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); | 32 | - return; |
226 | + maybe_nanbox_load(dest, mop); | ||
227 | + set_fpr(a->fd, dest); | ||
228 | |||
229 | return true; | ||
230 | } | ||
231 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) | ||
232 | static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) | ||
233 | { | ||
234 | TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
235 | + TCGv src = get_fpr(ctx, a->fd); | ||
236 | |||
237 | CHECK_FPE; | ||
238 | |||
239 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) | ||
240 | addr = temp; | ||
241 | } | ||
242 | |||
243 | - tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
244 | + tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop); | ||
245 | + | ||
246 | return true; | ||
247 | } | ||
248 | |||
249 | @@ -XXX,XX +XXX,XX @@ static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
250 | { | ||
251 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
252 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
253 | + TCGv dest = get_fpr(ctx, a->fd); | ||
254 | TCGv addr; | ||
255 | |||
256 | CHECK_FPE; | ||
257 | |||
258 | addr = tcg_temp_new(); | ||
259 | tcg_gen_add_tl(addr, src1, src2); | ||
260 | - tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
261 | - maybe_nanbox_load(cpu_fpr[a->fd], mop); | ||
262 | + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); | ||
263 | + maybe_nanbox_load(dest, mop); | ||
264 | + set_fpr(a->fd, dest); | ||
265 | |||
266 | return true; | ||
267 | } | ||
268 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
269 | { | ||
270 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
271 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
272 | + TCGv src3 = get_fpr(ctx, a->fd); | ||
273 | TCGv addr; | ||
274 | |||
275 | CHECK_FPE; | ||
276 | |||
277 | addr = tcg_temp_new(); | ||
278 | tcg_gen_add_tl(addr, src1, src2); | ||
279 | - tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
280 | + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); | ||
281 | |||
282 | return true; | ||
283 | } | ||
284 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
285 | { | ||
286 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
287 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
288 | + TCGv dest = get_fpr(ctx, a->fd); | ||
289 | TCGv addr; | ||
290 | |||
291 | CHECK_FPE; | ||
292 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
293 | addr = tcg_temp_new(); | ||
294 | gen_helper_asrtgt_d(cpu_env, src1, src2); | ||
295 | tcg_gen_add_tl(addr, src1, src2); | ||
296 | - tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
297 | - maybe_nanbox_load(cpu_fpr[a->fd], mop); | ||
298 | + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); | ||
299 | + maybe_nanbox_load(dest, mop); | ||
300 | + set_fpr(a->fd, dest); | ||
301 | |||
302 | return true; | ||
303 | } | ||
304 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
305 | { | ||
306 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
307 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
308 | + TCGv src3 = get_fpr(ctx, a->fd); | ||
309 | TCGv addr; | ||
310 | |||
311 | CHECK_FPE; | ||
312 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
313 | addr = tcg_temp_new(); | ||
314 | gen_helper_asrtgt_d(cpu_env, src1, src2); | ||
315 | tcg_gen_add_tl(addr, src1, src2); | ||
316 | - tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
317 | + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); | ||
318 | |||
319 | return true; | ||
320 | } | ||
321 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
322 | { | ||
323 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
324 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
325 | + TCGv dest = get_fpr(ctx, a->fd); | ||
326 | TCGv addr; | ||
327 | |||
328 | CHECK_FPE; | ||
329 | @@ -XXX,XX +XXX,XX @@ static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
330 | addr = tcg_temp_new(); | ||
331 | gen_helper_asrtle_d(cpu_env, src1, src2); | ||
332 | tcg_gen_add_tl(addr, src1, src2); | ||
333 | - tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
334 | - maybe_nanbox_load(cpu_fpr[a->fd], mop); | ||
335 | + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); | ||
336 | + maybe_nanbox_load(dest, mop); | ||
337 | + set_fpr(a->fd, dest); | ||
338 | |||
339 | return true; | ||
340 | } | ||
341 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
342 | { | ||
343 | TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
344 | TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
345 | + TCGv src3 = get_fpr(ctx, a->fd); | ||
346 | TCGv addr; | ||
347 | |||
348 | CHECK_FPE; | ||
349 | @@ -XXX,XX +XXX,XX @@ static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop) | ||
350 | addr = tcg_temp_new(); | ||
351 | gen_helper_asrtle_d(cpu_env, src1, src2); | ||
352 | tcg_gen_add_tl(addr, src1, src2); | ||
353 | - tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop); | ||
354 | + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); | ||
355 | |||
356 | return true; | ||
357 | } | ||
358 | diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc | ||
359 | index XXXXXXX..XXXXXXX 100644 | ||
360 | --- a/target/loongarch/insn_trans/trans_fmov.c.inc | ||
361 | +++ b/target/loongarch/insn_trans/trans_fmov.c.inc | ||
362 | @@ -XXX,XX +XXX,XX @@ static const uint32_t fcsr_mask[4] = { | ||
363 | static bool trans_fsel(DisasContext *ctx, arg_fsel *a) | ||
364 | { | ||
365 | TCGv zero = tcg_constant_tl(0); | ||
366 | + TCGv dest = get_fpr(ctx, a->fd); | ||
367 | + TCGv src1 = get_fpr(ctx, a->fj); | ||
368 | + TCGv src2 = get_fpr(ctx, a->fk); | ||
369 | TCGv cond; | ||
370 | |||
371 | CHECK_FPE; | ||
372 | |||
373 | cond = tcg_temp_new(); | ||
374 | tcg_gen_ld8u_tl(cond, cpu_env, offsetof(CPULoongArchState, cf[a->ca])); | ||
375 | - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_fpr[a->fd], cond, zero, | ||
376 | - cpu_fpr[a->fj], cpu_fpr[a->fk]); | ||
377 | + tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2); | ||
378 | + set_fpr(a->fd, dest); | ||
379 | |||
380 | return true; | ||
381 | } | ||
382 | @@ -XXX,XX +XXX,XX @@ static bool trans_fsel(DisasContext *ctx, arg_fsel *a) | ||
383 | static bool gen_f2f(DisasContext *ctx, arg_ff *a, | ||
384 | void (*func)(TCGv, TCGv), bool nanbox) | ||
385 | { | ||
386 | - TCGv dest = cpu_fpr[a->fd]; | ||
387 | - TCGv src = cpu_fpr[a->fj]; | ||
388 | + TCGv dest = get_fpr(ctx, a->fd); | ||
389 | + TCGv src = get_fpr(ctx, a->fj); | ||
390 | |||
391 | CHECK_FPE; | ||
392 | |||
393 | func(dest, src); | ||
394 | if (nanbox) { | ||
395 | - gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]); | ||
396 | + gen_nanbox_s(dest, dest); | ||
397 | } | ||
398 | + set_fpr(a->fd, dest); | ||
399 | |||
400 | return true; | ||
401 | } | ||
402 | @@ -XXX,XX +XXX,XX @@ static bool gen_r2f(DisasContext *ctx, arg_fr *a, | ||
403 | void (*func)(TCGv, TCGv)) | ||
404 | { | ||
405 | TCGv src = gpr_src(ctx, a->rj, EXT_NONE); | ||
406 | + TCGv dest = get_fpr(ctx, a->fd); | ||
407 | |||
408 | CHECK_FPE; | ||
409 | |||
410 | - func(cpu_fpr[a->fd], src); | ||
411 | + func(dest, src); | ||
412 | + set_fpr(a->fd, dest); | ||
413 | + | ||
414 | return true; | ||
415 | } | ||
416 | |||
417 | @@ -XXX,XX +XXX,XX @@ static bool gen_f2r(DisasContext *ctx, arg_rf *a, | ||
418 | void (*func)(TCGv, TCGv)) | ||
419 | { | ||
420 | TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); | ||
421 | + TCGv src = get_fpr(ctx, a->fj); | ||
422 | |||
423 | CHECK_FPE; | ||
424 | |||
425 | - func(dest, cpu_fpr[a->fj]); | ||
426 | + func(dest, src); | ||
427 | gen_set_gpr(a->rd, dest, EXT_NONE); | ||
428 | |||
429 | return true; | ||
430 | @@ -XXX,XX +XXX,XX @@ static void gen_movfrh2gr_s(TCGv dest, TCGv src) | ||
431 | static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a) | ||
432 | { | ||
433 | TCGv t0; | ||
434 | + TCGv src = get_fpr(ctx, a->fj); | ||
435 | |||
436 | CHECK_FPE; | ||
437 | |||
438 | t0 = tcg_temp_new(); | ||
439 | - tcg_gen_andi_tl(t0, cpu_fpr[a->fj], 0x1); | ||
440 | + tcg_gen_andi_tl(t0, src, 0x1); | ||
441 | tcg_gen_st8_tl(t0, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); | ||
442 | |||
443 | return true; | ||
444 | @@ -XXX,XX +XXX,XX @@ static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a) | ||
445 | |||
446 | static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a) | ||
447 | { | ||
448 | + TCGv dest = get_fpr(ctx, a->fd); | ||
449 | + | ||
450 | CHECK_FPE; | ||
451 | |||
452 | - tcg_gen_ld8u_tl(cpu_fpr[a->fd], cpu_env, | ||
453 | + tcg_gen_ld8u_tl(dest, cpu_env, | ||
454 | offsetof(CPULoongArchState, cf[a->cj & 0x7])); | ||
455 | + set_fpr(a->fd, dest); | ||
456 | + | ||
457 | return true; | ||
458 | } | ||
459 | |||
460 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
461 | index XXXXXXX..XXXXXXX 100644 | ||
462 | --- a/target/loongarch/translate.c | ||
463 | +++ b/target/loongarch/translate.c | ||
464 | @@ -XXX,XX +XXX,XX @@ | ||
465 | /* Global register indices */ | ||
466 | TCGv cpu_gpr[32], cpu_pc; | ||
467 | static TCGv cpu_lladdr, cpu_llval; | ||
468 | -TCGv_i64 cpu_fpr[32]; | ||
469 | |||
470 | #include "exec/gen-icount.h" | ||
471 | |||
472 | @@ -XXX,XX +XXX,XX @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext) | ||
473 | } | ||
474 | } | ||
475 | |||
476 | +static TCGv get_fpr(DisasContext *ctx, int reg_num) | ||
477 | +{ | ||
478 | + TCGv t = tcg_temp_new(); | ||
479 | + tcg_gen_ld_i64(t, cpu_env, | ||
480 | + offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); | ||
481 | + return t; | ||
482 | +} | ||
483 | + | ||
484 | +static void set_fpr(int reg_num, TCGv val) | ||
485 | +{ | ||
486 | + tcg_gen_st_i64(val, cpu_env, | ||
487 | + offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); | ||
488 | +} | ||
489 | + | ||
490 | #include "decode-insns.c.inc" | ||
491 | #include "insn_trans/trans_arith.c.inc" | ||
492 | #include "insn_trans/trans_shift.c.inc" | ||
493 | @@ -XXX,XX +XXX,XX @@ void loongarch_translate_init(void) | ||
494 | regnames[i]); | ||
495 | } | ||
496 | |||
497 | - for (i = 0; i < 32; i++) { | ||
498 | - int off = offsetof(CPULoongArchState, fpr[i]); | ||
499 | - cpu_fpr[i] = tcg_global_mem_new_i64(cpu_env, off, fregnames[i]); | ||
500 | - } | 33 | - } |
501 | - | 34 | - |
502 | cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc"); | 35 | + assert(mc->possible_cpu_arch_ids); |
503 | cpu_lladdr = tcg_global_mem_new(cpu_env, | 36 | + id_list = mc->possible_cpu_arch_ids(machine); |
504 | offsetof(CPULoongArchState, lladdr), "lladdr"); | 37 | + lics->num_cpu = id_list->len; |
38 | lics->cpu = g_new0(IPICore, lics->num_cpu); | ||
39 | for (i = 0; i < lics->num_cpu; i++) { | ||
40 | + lics->cpu[i].arch_id = id_list->cpus[i].arch_id; | ||
41 | + lics->cpu[i].cpu = CPU(id_list->cpus[i].cpu); | ||
42 | lics->cpu[i].ipi = lics; | ||
43 | qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1); | ||
44 | } | ||
45 | diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/include/hw/intc/loongson_ipi_common.h | ||
48 | +++ b/include/hw/intc/loongson_ipi_common.h | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef struct IPICore { | ||
50 | /* 64bit buf divide into 2 32-bit buf */ | ||
51 | uint32_t buf[IPI_MBX_NUM * 2]; | ||
52 | qemu_irq irq; | ||
53 | + uint64_t arch_id; | ||
54 | + CPUState *cpu; | ||
55 | } IPICore; | ||
56 | |||
57 | struct LoongsonIPICommonState { | ||
505 | -- | 58 | -- |
506 | 2.31.1 | 59 | 2.43.5 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | Since cpu number can be acquired from possible_cpu_arch_ids(), |
---|---|---|---|
2 | num-cpu property is not necessary. Here remove num-cpu property | ||
3 | for object TYPE_LOONGARCH_IPI object. | ||
2 | 4 | ||
3 | The calling function is already working with hwaddr and uint64_t so | 5 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> |
4 | lets avoid bringing target_ulong in if we don't need to. | 6 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> |
5 | |||
6 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Reviewed-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230404132711.2563638-1-alex.bennee@linaro.org> | ||
10 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
11 | --- | 7 | --- |
12 | hw/intc/loongarch_ipi.c | 2 +- | 8 | hw/intc/loongarch_ipi.c | 5 ----- |
13 | 1 file changed, 1 insertion(+), 1 deletion(-) | 9 | hw/loongarch/virt.c | 1 - |
10 | 2 files changed, 6 deletions(-) | ||
14 | 11 | ||
15 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c | 12 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/intc/loongarch_ipi.c | 14 | --- a/hw/intc/loongarch_ipi.c |
18 | +++ b/hw/intc/loongarch_ipi.c | 15 | +++ b/hw/intc/loongarch_ipi.c |
19 | @@ -XXX,XX +XXX,XX @@ static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size) | 16 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp) |
20 | return ret; | 17 | } |
21 | } | 18 | } |
22 | 19 | ||
23 | -static void send_ipi_data(CPULoongArchState *env, target_ulong val, target_ulong addr) | 20 | -static const Property loongarch_ipi_properties[] = { |
24 | +static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) | 21 | - DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), |
22 | -}; | ||
23 | - | ||
24 | static void loongarch_ipi_class_init(ObjectClass *klass, void *data) | ||
25 | { | 25 | { |
26 | int i, mask = 0, data = 0; | 26 | LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass); |
27 | 27 | @@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_class_init(ObjectClass *klass, void *data) | |
28 | |||
29 | device_class_set_parent_realize(dc, loongarch_ipi_realize, | ||
30 | &lic->parent_realize); | ||
31 | - device_class_set_props(dc, loongarch_ipi_properties); | ||
32 | licc->get_iocsr_as = get_iocsr_as; | ||
33 | licc->cpu_by_arch_id = loongarch_cpu_by_arch_id; | ||
34 | } | ||
35 | diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/hw/loongarch/virt.c | ||
38 | +++ b/hw/loongarch/virt.c | ||
39 | @@ -XXX,XX +XXX,XX @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) | ||
40 | |||
41 | /* Create IPI device */ | ||
42 | ipi = qdev_new(TYPE_LOONGARCH_IPI); | ||
43 | - qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus); | ||
44 | sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); | ||
45 | |||
46 | /* IPI iocsr memory region */ | ||
28 | -- | 47 | -- |
29 | 2.31.1 | 48 | 2.43.5 |
30 | |||
31 | diff view generated by jsdifflib |
1 | This patch includes: | 1 | Add logic cpu index input parameter for function cpu_by_arch_id, |
---|---|---|---|
2 | - VLD[X], VST[X]; | 2 | CPUState::cpu_index is logic cpu slot index for possible_cpus. |
3 | - VLDREPL.{B/H/W/D}; | ||
4 | - VSTELM.{B/H/W/D}. | ||
5 | 3 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | At the same time it is logic index with LoongsonIPICommonState::IPICore, |
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 5 | here hide access for CPUState::cpu_index directly, it comes from |
8 | Message-Id: <20230504122810.4094787-42-gaosong@loongson.cn> | 6 | function cpu_by_arch_id(). |
7 | |||
8 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> | ||
9 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> | ||
9 | --- | 10 | --- |
10 | target/loongarch/disas.c | 34 +++++ | 11 | hw/intc/loongarch_ipi.c | 19 +++++++++++++++---- |
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 159 ++++++++++++++++++++ | 12 | hw/intc/loongson_ipi.c | 23 ++++++++++++++++++++++- |
12 | target/loongarch/insns.decode | 36 +++++ | 13 | hw/intc/loongson_ipi_common.c | 21 ++++++++++++--------- |
13 | target/loongarch/translate.c | 10 ++ | 14 | include/hw/intc/loongson_ipi_common.h | 3 ++- |
14 | 4 files changed, 239 insertions(+) | 15 | 4 files changed, 51 insertions(+), 15 deletions(-) |
15 | 16 | ||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | 17 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/loongarch/disas.c | 19 | --- a/hw/intc/loongarch_ipi.c |
19 | +++ b/target/loongarch/disas.c | 20 | +++ b/hw/intc/loongarch_ipi.c |
20 | @@ -XXX,XX +XXX,XX @@ static inline int plus_1(DisasContext *ctx, int x) | 21 | @@ -XXX,XX +XXX,XX @@ static CPUArchId *find_cpu_by_archid(MachineState *ms, uint32_t id) |
21 | return x + 1; | 22 | return found_cpu; |
22 | } | 23 | } |
23 | 24 | ||
24 | +static inline int shl_1(DisasContext *ctx, int x) | 25 | -static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id) |
26 | +static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics, | ||
27 | + int64_t arch_id, int *index, CPUState **pcs) | ||
28 | { | ||
29 | MachineState *machine = MACHINE(qdev_get_machine()); | ||
30 | CPUArchId *archid; | ||
31 | + CPUState *cs; | ||
32 | |||
33 | archid = find_cpu_by_archid(machine, arch_id); | ||
34 | - if (archid) { | ||
35 | - return CPU(archid->cpu); | ||
36 | + if (archid && archid->cpu) { | ||
37 | + cs = archid->cpu; | ||
38 | + if (index) { | ||
39 | + *index = cs->cpu_index; | ||
40 | + } | ||
41 | + | ||
42 | + if (pcs) { | ||
43 | + *pcs = cs; | ||
44 | + } | ||
45 | + | ||
46 | + return MEMTX_OK; | ||
47 | } | ||
48 | |||
49 | - return NULL; | ||
50 | + return MEMTX_ERROR; | ||
51 | } | ||
52 | |||
53 | static void loongarch_ipi_realize(DeviceState *dev, Error **errp) | ||
54 | diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/hw/intc/loongson_ipi.c | ||
57 | +++ b/hw/intc/loongson_ipi.c | ||
58 | @@ -XXX,XX +XXX,XX @@ static AddressSpace *get_iocsr_as(CPUState *cpu) | ||
59 | return NULL; | ||
60 | } | ||
61 | |||
62 | +static int loongson_cpu_by_arch_id(LoongsonIPICommonState *lics, | ||
63 | + int64_t arch_id, int *index, CPUState **pcs) | ||
25 | +{ | 64 | +{ |
26 | + return x << 1; | 65 | + CPUState *cs; |
66 | + | ||
67 | + cs = cpu_by_arch_id(arch_id); | ||
68 | + if (cs == NULL) { | ||
69 | + return MEMTX_ERROR; | ||
70 | + } | ||
71 | + | ||
72 | + if (index) { | ||
73 | + *index = cs->cpu_index; | ||
74 | + } | ||
75 | + | ||
76 | + if (pcs) { | ||
77 | + *pcs = cs; | ||
78 | + } | ||
79 | + | ||
80 | + return MEMTX_OK; | ||
27 | +} | 81 | +} |
28 | + | 82 | + |
29 | static inline int shl_2(DisasContext *ctx, int x) | 83 | static const MemoryRegionOps loongson_ipi_core_ops = { |
30 | { | 84 | .read_with_attrs = loongson_ipi_core_readl, |
31 | return x << 2; | 85 | .write_with_attrs = loongson_ipi_core_writel, |
86 | @@ -XXX,XX +XXX,XX @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data) | ||
87 | &lic->parent_unrealize); | ||
88 | device_class_set_props(dc, loongson_ipi_properties); | ||
89 | licc->get_iocsr_as = get_iocsr_as; | ||
90 | - licc->cpu_by_arch_id = cpu_by_arch_id; | ||
91 | + licc->cpu_by_arch_id = loongson_cpu_by_arch_id; | ||
32 | } | 92 | } |
33 | 93 | ||
34 | +static inline int shl_3(DisasContext *ctx, int x) | 94 | static const TypeInfo loongson_ipi_types[] = { |
35 | +{ | 95 | diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c |
36 | + return x << 3; | 96 | index XXXXXXX..XXXXXXX 100644 |
37 | +} | 97 | --- a/hw/intc/loongson_ipi_common.c |
38 | + | 98 | +++ b/hw/intc/loongson_ipi_common.c |
39 | #define CSR_NAME(REG) \ | 99 | @@ -XXX,XX +XXX,XX @@ static MemTxResult mail_send(LoongsonIPICommonState *ipi, |
40 | [LOONGARCH_CSR_##REG] = (#REG) | 100 | uint32_t cpuid; |
41 | 101 | hwaddr addr; | |
42 | @@ -XXX,XX +XXX,XX @@ static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic) | 102 | CPUState *cs; |
43 | output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm); | 103 | + int cpu, ret; |
104 | |||
105 | cpuid = extract32(val, 16, 10); | ||
106 | - cs = licc->cpu_by_arch_id(cpuid); | ||
107 | - if (cs == NULL) { | ||
108 | + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); | ||
109 | + if (ret != MEMTX_OK) { | ||
110 | return MEMTX_DECODE_ERROR; | ||
111 | } | ||
112 | |||
113 | /* override requester_id */ | ||
114 | addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c); | ||
115 | - attrs.requester_id = cs->cpu_index; | ||
116 | + attrs.requester_id = cpu; | ||
117 | return send_ipi_data(ipi, cs, val, addr, attrs); | ||
44 | } | 118 | } |
45 | 119 | ||
46 | +static void output_vr_ii(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic) | 120 | @@ -XXX,XX +XXX,XX @@ static MemTxResult any_send(LoongsonIPICommonState *ipi, |
47 | +{ | 121 | uint32_t cpuid; |
48 | + output(ctx, mnemonic, "v%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2); | 122 | hwaddr addr; |
49 | +} | 123 | CPUState *cs; |
50 | + | 124 | + int cpu, ret; |
51 | static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic) | 125 | |
52 | { | 126 | cpuid = extract32(val, 16, 10); |
53 | output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj, a->imm); | 127 | - cs = licc->cpu_by_arch_id(cpuid); |
54 | @@ -XXX,XX +XXX,XX @@ static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic) | 128 | - if (cs == NULL) { |
55 | output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk); | 129 | + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); |
130 | + if (ret != MEMTX_OK) { | ||
131 | return MEMTX_DECODE_ERROR; | ||
132 | } | ||
133 | |||
134 | /* override requester_id */ | ||
135 | addr = val & 0xffff; | ||
136 | - attrs.requester_id = cs->cpu_index; | ||
137 | + attrs.requester_id = cpu; | ||
138 | return send_ipi_data(ipi, cs, val, addr, attrs); | ||
56 | } | 139 | } |
57 | 140 | ||
58 | +static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic) | 141 | @@ -XXX,XX +XXX,XX @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val, |
59 | +{ | 142 | uint32_t cpuid; |
60 | + output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk); | 143 | uint8_t vector; |
61 | +} | 144 | CPUState *cs; |
62 | + | 145 | + int cpu, ret; |
63 | INSN_LSX(vadd_b, vvv) | 146 | |
64 | INSN_LSX(vadd_h, vvv) | 147 | addr &= 0xff; |
65 | INSN_LSX(vadd_w, vvv) | 148 | trace_loongson_ipi_write(size, (uint64_t)addr, val); |
66 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vextrins_d, vv_i) | 149 | @@ -XXX,XX +XXX,XX @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val, |
67 | INSN_LSX(vextrins_w, vv_i) | 150 | cpuid = extract32(val, 16, 10); |
68 | INSN_LSX(vextrins_h, vv_i) | 151 | /* IPI status vector */ |
69 | INSN_LSX(vextrins_b, vv_i) | 152 | vector = extract8(val, 0, 5); |
70 | + | 153 | - cs = licc->cpu_by_arch_id(cpuid); |
71 | +INSN_LSX(vld, vr_i) | 154 | - if (cs == NULL || cs->cpu_index >= ipi->num_cpu) { |
72 | +INSN_LSX(vst, vr_i) | 155 | + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); |
73 | +INSN_LSX(vldx, vrr) | 156 | + if (ret != MEMTX_OK || cpu >= ipi->num_cpu) { |
74 | +INSN_LSX(vstx, vrr) | 157 | return MEMTX_DECODE_ERROR; |
75 | + | 158 | } |
76 | +INSN_LSX(vldrepl_d, vr_i) | 159 | - loongson_ipi_core_writel(&ipi->cpu[cs->cpu_index], CORE_SET_OFF, |
77 | +INSN_LSX(vldrepl_w, vr_i) | 160 | + loongson_ipi_core_writel(&ipi->cpu[cpu], CORE_SET_OFF, |
78 | +INSN_LSX(vldrepl_h, vr_i) | 161 | BIT(vector), 4, attrs); |
79 | +INSN_LSX(vldrepl_b, vr_i) | 162 | break; |
80 | +INSN_LSX(vstelm_d, vr_ii) | 163 | default: |
81 | +INSN_LSX(vstelm_w, vr_ii) | 164 | diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h |
82 | +INSN_LSX(vstelm_h, vr_ii) | ||
83 | +INSN_LSX(vstelm_b, vr_ii) | ||
84 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
85 | index XXXXXXX..XXXXXXX 100644 | 165 | index XXXXXXX..XXXXXXX 100644 |
86 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | 166 | --- a/include/hw/intc/loongson_ipi_common.h |
87 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | 167 | +++ b/include/hw/intc/loongson_ipi_common.h |
88 | @@ -XXX,XX +XXX,XX @@ TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b) | 168 | @@ -XXX,XX +XXX,XX @@ struct LoongsonIPICommonClass { |
89 | TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h) | 169 | DeviceRealize parent_realize; |
90 | TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w) | 170 | DeviceUnrealize parent_unrealize; |
91 | TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d) | 171 | AddressSpace *(*get_iocsr_as)(CPUState *cpu); |
92 | + | 172 | - CPUState *(*cpu_by_arch_id)(int64_t id); |
93 | +static bool trans_vld(DisasContext *ctx, arg_vr_i *a) | 173 | + int (*cpu_by_arch_id)(LoongsonIPICommonState *lics, int64_t id, |
94 | +{ | 174 | + int *index, CPUState **pcs); |
95 | + TCGv addr, temp; | 175 | }; |
96 | + TCGv_i64 rl, rh; | 176 | |
97 | + TCGv_i128 val; | 177 | MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data, |
98 | + | ||
99 | + CHECK_SXE; | ||
100 | + | ||
101 | + addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
102 | + val = tcg_temp_new_i128(); | ||
103 | + rl = tcg_temp_new_i64(); | ||
104 | + rh = tcg_temp_new_i64(); | ||
105 | + | ||
106 | + if (a->imm) { | ||
107 | + temp = tcg_temp_new(); | ||
108 | + tcg_gen_addi_tl(temp, addr, a->imm); | ||
109 | + addr = temp; | ||
110 | + } | ||
111 | + | ||
112 | + tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); | ||
113 | + tcg_gen_extr_i128_i64(rl, rh, val); | ||
114 | + set_vreg64(rh, a->vd, 1); | ||
115 | + set_vreg64(rl, a->vd, 0); | ||
116 | + | ||
117 | + return true; | ||
118 | +} | ||
119 | + | ||
120 | +static bool trans_vst(DisasContext *ctx, arg_vr_i *a) | ||
121 | +{ | ||
122 | + TCGv addr, temp; | ||
123 | + TCGv_i128 val; | ||
124 | + TCGv_i64 ah, al; | ||
125 | + | ||
126 | + CHECK_SXE; | ||
127 | + | ||
128 | + addr = gpr_src(ctx, a->rj, EXT_NONE); | ||
129 | + val = tcg_temp_new_i128(); | ||
130 | + ah = tcg_temp_new_i64(); | ||
131 | + al = tcg_temp_new_i64(); | ||
132 | + | ||
133 | + if (a->imm) { | ||
134 | + temp = tcg_temp_new(); | ||
135 | + tcg_gen_addi_tl(temp, addr, a->imm); | ||
136 | + addr = temp; | ||
137 | + } | ||
138 | + | ||
139 | + get_vreg64(ah, a->vd, 1); | ||
140 | + get_vreg64(al, a->vd, 0); | ||
141 | + tcg_gen_concat_i64_i128(val, al, ah); | ||
142 | + tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); | ||
143 | + | ||
144 | + return true; | ||
145 | +} | ||
146 | + | ||
147 | +static bool trans_vldx(DisasContext *ctx, arg_vrr *a) | ||
148 | +{ | ||
149 | + TCGv addr, src1, src2; | ||
150 | + TCGv_i64 rl, rh; | ||
151 | + TCGv_i128 val; | ||
152 | + | ||
153 | + CHECK_SXE; | ||
154 | + | ||
155 | + addr = tcg_temp_new(); | ||
156 | + src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
157 | + src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
158 | + val = tcg_temp_new_i128(); | ||
159 | + rl = tcg_temp_new_i64(); | ||
160 | + rh = tcg_temp_new_i64(); | ||
161 | + | ||
162 | + tcg_gen_add_tl(addr, src1, src2); | ||
163 | + tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); | ||
164 | + tcg_gen_extr_i128_i64(rl, rh, val); | ||
165 | + set_vreg64(rh, a->vd, 1); | ||
166 | + set_vreg64(rl, a->vd, 0); | ||
167 | + | ||
168 | + return true; | ||
169 | +} | ||
170 | + | ||
171 | +static bool trans_vstx(DisasContext *ctx, arg_vrr *a) | ||
172 | +{ | ||
173 | + TCGv addr, src1, src2; | ||
174 | + TCGv_i64 ah, al; | ||
175 | + TCGv_i128 val; | ||
176 | + | ||
177 | + CHECK_SXE; | ||
178 | + | ||
179 | + addr = tcg_temp_new(); | ||
180 | + src1 = gpr_src(ctx, a->rj, EXT_NONE); | ||
181 | + src2 = gpr_src(ctx, a->rk, EXT_NONE); | ||
182 | + val = tcg_temp_new_i128(); | ||
183 | + ah = tcg_temp_new_i64(); | ||
184 | + al = tcg_temp_new_i64(); | ||
185 | + | ||
186 | + tcg_gen_add_tl(addr, src1, src2); | ||
187 | + get_vreg64(ah, a->vd, 1); | ||
188 | + get_vreg64(al, a->vd, 0); | ||
189 | + tcg_gen_concat_i64_i128(val, al, ah); | ||
190 | + tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); | ||
191 | + | ||
192 | + return true; | ||
193 | +} | ||
194 | + | ||
195 | +#define VLDREPL(NAME, MO) \ | ||
196 | +static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \ | ||
197 | +{ \ | ||
198 | + TCGv addr, temp; \ | ||
199 | + TCGv_i64 val; \ | ||
200 | + \ | ||
201 | + CHECK_SXE; \ | ||
202 | + \ | ||
203 | + addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
204 | + val = tcg_temp_new_i64(); \ | ||
205 | + \ | ||
206 | + if (a->imm) { \ | ||
207 | + temp = tcg_temp_new(); \ | ||
208 | + tcg_gen_addi_tl(temp, addr, a->imm); \ | ||
209 | + addr = temp; \ | ||
210 | + } \ | ||
211 | + \ | ||
212 | + tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \ | ||
213 | + tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \ | ||
214 | + \ | ||
215 | + return true; \ | ||
216 | +} | ||
217 | + | ||
218 | +VLDREPL(vldrepl_b, MO_8) | ||
219 | +VLDREPL(vldrepl_h, MO_16) | ||
220 | +VLDREPL(vldrepl_w, MO_32) | ||
221 | +VLDREPL(vldrepl_d, MO_64) | ||
222 | + | ||
223 | +#define VSTELM(NAME, MO, E) \ | ||
224 | +static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \ | ||
225 | +{ \ | ||
226 | + TCGv addr, temp; \ | ||
227 | + TCGv_i64 val; \ | ||
228 | + \ | ||
229 | + CHECK_SXE; \ | ||
230 | + \ | ||
231 | + addr = gpr_src(ctx, a->rj, EXT_NONE); \ | ||
232 | + val = tcg_temp_new_i64(); \ | ||
233 | + \ | ||
234 | + if (a->imm) { \ | ||
235 | + temp = tcg_temp_new(); \ | ||
236 | + tcg_gen_addi_tl(temp, addr, a->imm); \ | ||
237 | + addr = temp; \ | ||
238 | + } \ | ||
239 | + \ | ||
240 | + tcg_gen_ld_i64(val, cpu_env, \ | ||
241 | + offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \ | ||
242 | + tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \ | ||
243 | + \ | ||
244 | + return true; \ | ||
245 | +} | ||
246 | + | ||
247 | +VSTELM(vstelm_b, MO_8, B) | ||
248 | +VSTELM(vstelm_h, MO_16, H) | ||
249 | +VSTELM(vstelm_w, MO_32, W) | ||
250 | +VSTELM(vstelm_d, MO_64, D) | ||
251 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
252 | index XXXXXXX..XXXXXXX 100644 | ||
253 | --- a/target/loongarch/insns.decode | ||
254 | +++ b/target/loongarch/insns.decode | ||
255 | @@ -XXX,XX +XXX,XX @@ ertn 0000 01100100 10000 01110 00000 00000 @empty | ||
256 | idle 0000 01100100 10001 ............... @i15 | ||
257 | dbcl 0000 00000010 10101 ............... @i15 | ||
258 | |||
259 | +# | ||
260 | +# LSX Fields | ||
261 | +# | ||
262 | + | ||
263 | +%i9s3 10:s9 !function=shl_3 | ||
264 | +%i10s2 10:s10 !function=shl_2 | ||
265 | +%i11s1 10:s11 !function=shl_1 | ||
266 | +%i8s3 10:s8 !function=shl_3 | ||
267 | +%i8s2 10:s8 !function=shl_2 | ||
268 | +%i8s1 10:s8 !function=shl_1 | ||
269 | + | ||
270 | # | ||
271 | # LSX Argument sets | ||
272 | # | ||
273 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
274 | &rv_i rd vj imm | ||
275 | &vr vd rj | ||
276 | &vvr vd vj rk | ||
277 | +&vrr vd rj rk | ||
278 | +&vr_ii vd rj imm imm2 | ||
279 | |||
280 | # | ||
281 | # LSX Formats | ||
282 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
283 | @rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i | ||
284 | @vr .... ........ ..... ..... rj:5 vd:5 &vr | ||
285 | @vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr | ||
286 | +@vr_i9 .... ........ . ......... rj:5 vd:5 &vr_i imm=%i9s3 | ||
287 | +@vr_i10 .... ........ .......... rj:5 vd:5 &vr_i imm=%i10s2 | ||
288 | +@vr_i11 .... ....... ........... rj:5 vd:5 &vr_i imm=%i11s1 | ||
289 | +@vr_i12 .... ...... imm:s12 rj:5 vd:5 &vr_i | ||
290 | +@vr_i8i1 .... ........ . imm2:1 ........ rj:5 vd:5 &vr_ii imm=%i8s3 | ||
291 | +@vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2 | ||
292 | +@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1 | ||
293 | +@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii | ||
294 | +@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr | ||
295 | |||
296 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
297 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
298 | @@ -XXX,XX +XXX,XX @@ vextrins_d 0111 00111000 00 ........ ..... ..... @vv_ui8 | ||
299 | vextrins_w 0111 00111000 01 ........ ..... ..... @vv_ui8 | ||
300 | vextrins_h 0111 00111000 10 ........ ..... ..... @vv_ui8 | ||
301 | vextrins_b 0111 00111000 11 ........ ..... ..... @vv_ui8 | ||
302 | + | ||
303 | +vld 0010 110000 ............ ..... ..... @vr_i12 | ||
304 | +vst 0010 110001 ............ ..... ..... @vr_i12 | ||
305 | +vldx 0011 10000100 00000 ..... ..... ..... @vrr | ||
306 | +vstx 0011 10000100 01000 ..... ..... ..... @vrr | ||
307 | + | ||
308 | +vldrepl_d 0011 00000001 0 ......... ..... ..... @vr_i9 | ||
309 | +vldrepl_w 0011 00000010 .......... ..... ..... @vr_i10 | ||
310 | +vldrepl_h 0011 0000010 ........... ..... ..... @vr_i11 | ||
311 | +vldrepl_b 0011 000010 ............ ..... ..... @vr_i12 | ||
312 | +vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1 | ||
313 | +vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2 | ||
314 | +vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3 | ||
315 | +vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4 | ||
316 | diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c | ||
317 | index XXXXXXX..XXXXXXX 100644 | ||
318 | --- a/target/loongarch/translate.c | ||
319 | +++ b/target/loongarch/translate.c | ||
320 | @@ -XXX,XX +XXX,XX @@ static inline int plus_1(DisasContext *ctx, int x) | ||
321 | return x + 1; | ||
322 | } | ||
323 | |||
324 | +static inline int shl_1(DisasContext *ctx, int x) | ||
325 | +{ | ||
326 | + return x << 1; | ||
327 | +} | ||
328 | + | ||
329 | static inline int shl_2(DisasContext *ctx, int x) | ||
330 | { | ||
331 | return x << 2; | ||
332 | } | ||
333 | |||
334 | +static inline int shl_3(DisasContext *ctx, int x) | ||
335 | +{ | ||
336 | + return x << 3; | ||
337 | +} | ||
338 | + | ||
339 | /* | ||
340 | * LoongArch the upper 32 bits are undefined ("can be any value"). | ||
341 | * QEMU chooses to nanbox, because it is most likely to show guest bugs early. | ||
342 | -- | 178 | -- |
343 | 2.31.1 | 179 | 2.43.5 | diff view generated by jsdifflib |
1 | This patch includes: | 1 | There is arch_id and CPUState pointer in IPICore object. With function |
---|---|---|---|
2 | - VADDI.{B/H/W/D}U; | 2 | cpu_by_arch_id() it can be implemented by parsing IPICore array inside, |
3 | - VSUBI.{B/H/W/D}U. | 3 | rather than possible_cpus array. |
4 | 4 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Bibo Mao <maobibo@loongson.cn> |
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | 6 | Reviewed-by: Bibo Mao <maobibo@loongson.cn> |
7 | Message-Id: <20230504122810.4094787-6-gaosong@loongson.cn> | ||
8 | --- | 7 | --- |
9 | target/loongarch/disas.c | 14 ++++++++ | 8 | hw/intc/loongarch_ipi.c | 36 +++++++++++------------------------- |
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 37 +++++++++++++++++++++ | 9 | 1 file changed, 11 insertions(+), 25 deletions(-) |
11 | target/loongarch/insns.decode | 11 ++++++ | ||
12 | 3 files changed, 62 insertions(+) | ||
13 | 10 | ||
14 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | 11 | diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c |
15 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/loongarch/disas.c | 13 | --- a/hw/intc/loongarch_ipi.c |
17 | +++ b/target/loongarch/disas.c | 14 | +++ b/hw/intc/loongarch_ipi.c |
18 | @@ -XXX,XX +XXX,XX @@ static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic) | 15 | @@ -XXX,XX +XXX,XX @@ static AddressSpace *get_iocsr_as(CPUState *cpu) |
19 | output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk); | 16 | return LOONGARCH_CPU(cpu)->env.address_space_iocsr; |
20 | } | 17 | } |
21 | 18 | ||
22 | +static void output_vv_i(DisasContext *ctx, arg_vv_i *a, const char *mnemonic) | 19 | -static int archid_cmp(const void *a, const void *b) |
23 | +{ | 20 | +static int loongarch_ipi_cmp(const void *a, const void *b) |
24 | + output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm); | 21 | { |
25 | +} | 22 | - CPUArchId *archid_a = (CPUArchId *)a; |
26 | + | 23 | - CPUArchId *archid_b = (CPUArchId *)b; |
27 | INSN_LSX(vadd_b, vvv) | 24 | + IPICore *ipi_a = (IPICore *)a; |
28 | INSN_LSX(vadd_h, vvv) | 25 | + IPICore *ipi_b = (IPICore *)b; |
29 | INSN_LSX(vadd_w, vvv) | 26 | |
30 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsub_h, vvv) | 27 | - return archid_a->arch_id - archid_b->arch_id; |
31 | INSN_LSX(vsub_w, vvv) | 28 | -} |
32 | INSN_LSX(vsub_d, vvv) | 29 | - |
33 | INSN_LSX(vsub_q, vvv) | 30 | -static CPUArchId *find_cpu_by_archid(MachineState *ms, uint32_t id) |
34 | + | 31 | -{ |
35 | +INSN_LSX(vaddi_bu, vv_i) | 32 | - CPUArchId apic_id, *found_cpu; |
36 | +INSN_LSX(vaddi_hu, vv_i) | 33 | - |
37 | +INSN_LSX(vaddi_wu, vv_i) | 34 | - apic_id.arch_id = id; |
38 | +INSN_LSX(vaddi_du, vv_i) | 35 | - found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, |
39 | +INSN_LSX(vsubi_bu, vv_i) | 36 | - ms->possible_cpus->len, |
40 | +INSN_LSX(vsubi_hu, vv_i) | 37 | - sizeof(*ms->possible_cpus->cpus), |
41 | +INSN_LSX(vsubi_wu, vv_i) | 38 | - archid_cmp); |
42 | +INSN_LSX(vsubi_du, vv_i) | 39 | - |
43 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | 40 | - return found_cpu; |
44 | index XXXXXXX..XXXXXXX 100644 | 41 | + return ipi_a->arch_id - ipi_b->arch_id; |
45 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
46 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
47 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
48 | return true; | ||
49 | } | 42 | } |
50 | 43 | ||
51 | +static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | 44 | static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics, |
52 | + void (*func)(unsigned, uint32_t, uint32_t, | 45 | int64_t arch_id, int *index, CPUState **pcs) |
53 | + int64_t, uint32_t, uint32_t)) | 46 | { |
54 | +{ | 47 | - MachineState *machine = MACHINE(qdev_get_machine()); |
55 | + uint32_t vd_ofs, vj_ofs; | 48 | - CPUArchId *archid; |
56 | + | 49 | - CPUState *cs; |
57 | + CHECK_SXE; | 50 | + IPICore ipi, *found; |
58 | + | 51 | |
59 | + vd_ofs = vec_full_offset(a->vd); | 52 | - archid = find_cpu_by_archid(machine, arch_id); |
60 | + vj_ofs = vec_full_offset(a->vj); | 53 | - if (archid && archid->cpu) { |
61 | + | 54 | - cs = archid->cpu; |
62 | + func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8); | 55 | + ipi.arch_id = arch_id; |
63 | + return true; | 56 | + found = bsearch(&ipi, lics->cpu, lics->num_cpu, sizeof(IPICore), |
64 | +} | 57 | + loongarch_ipi_cmp); |
65 | + | 58 | + if (found && found->cpu) { |
66 | +static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) | 59 | if (index) { |
67 | +{ | 60 | - *index = cs->cpu_index; |
68 | + uint32_t vd_ofs, vj_ofs; | 61 | + *index = found - lics->cpu; |
69 | + | 62 | } |
70 | + CHECK_SXE; | 63 | |
71 | + | 64 | if (pcs) { |
72 | + vd_ofs = vec_full_offset(a->vd); | 65 | - *pcs = cs; |
73 | + vj_ofs = vec_full_offset(a->vj); | 66 | + *pcs = found->cpu; |
74 | + | 67 | } |
75 | + tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8); | 68 | |
76 | + return true; | 69 | return MEMTX_OK; |
77 | +} | ||
78 | + | ||
79 | TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add) | ||
80 | TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add) | ||
81 | TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add) | ||
82 | @@ -XXX,XX +XXX,XX @@ TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub) | ||
83 | TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub) | ||
84 | TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub) | ||
85 | TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub) | ||
86 | + | ||
87 | +TRANS(vaddi_bu, gvec_vv_i, MO_8, tcg_gen_gvec_addi) | ||
88 | +TRANS(vaddi_hu, gvec_vv_i, MO_16, tcg_gen_gvec_addi) | ||
89 | +TRANS(vaddi_wu, gvec_vv_i, MO_32, tcg_gen_gvec_addi) | ||
90 | +TRANS(vaddi_du, gvec_vv_i, MO_64, tcg_gen_gvec_addi) | ||
91 | +TRANS(vsubi_bu, gvec_subi, MO_8) | ||
92 | +TRANS(vsubi_hu, gvec_subi, MO_16) | ||
93 | +TRANS(vsubi_wu, gvec_subi, MO_32) | ||
94 | +TRANS(vsubi_du, gvec_subi, MO_64) | ||
95 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/target/loongarch/insns.decode | ||
98 | +++ b/target/loongarch/insns.decode | ||
99 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
100 | # | ||
101 | |||
102 | &vvv vd vj vk | ||
103 | +&vv_i vd vj imm | ||
104 | |||
105 | # | ||
106 | # LSX Formats | ||
107 | # | ||
108 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
109 | +@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
110 | |||
111 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
112 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
113 | @@ -XXX,XX +XXX,XX @@ vsub_h 0111 00000000 11001 ..... ..... ..... @vvv | ||
114 | vsub_w 0111 00000000 11010 ..... ..... ..... @vvv | ||
115 | vsub_d 0111 00000000 11011 ..... ..... ..... @vvv | ||
116 | vsub_q 0111 00010010 11011 ..... ..... ..... @vvv | ||
117 | + | ||
118 | +vaddi_bu 0111 00101000 10100 ..... ..... ..... @vv_ui5 | ||
119 | +vaddi_hu 0111 00101000 10101 ..... ..... ..... @vv_ui5 | ||
120 | +vaddi_wu 0111 00101000 10110 ..... ..... ..... @vv_ui5 | ||
121 | +vaddi_du 0111 00101000 10111 ..... ..... ..... @vv_ui5 | ||
122 | +vsubi_bu 0111 00101000 11000 ..... ..... ..... @vv_ui5 | ||
123 | +vsubi_hu 0111 00101000 11001 ..... ..... ..... @vv_ui5 | ||
124 | +vsubi_wu 0111 00101000 11010 ..... ..... ..... @vv_ui5 | ||
125 | +vsubi_du 0111 00101000 11011 ..... ..... ..... @vv_ui5 | ||
126 | -- | 70 | -- |
127 | 2.31.1 | 71 | 2.43.5 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes; | ||
2 | - VNEG.{B/H/W/D}. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-7-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 10 ++++++++++ | ||
9 | target/loongarch/insn_trans/trans_lsx.c.inc | 20 ++++++++++++++++++++ | ||
10 | target/loongarch/insns.decode | 7 +++++++ | ||
11 | 3 files changed, 37 insertions(+) | ||
12 | |||
13 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/disas.c | ||
16 | +++ b/target/loongarch/disas.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void output_vv_i(DisasContext *ctx, arg_vv_i *a, const char *mnemonic) | ||
18 | output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm); | ||
19 | } | ||
20 | |||
21 | +static void output_vv(DisasContext *ctx, arg_vv *a, const char *mnemonic) | ||
22 | +{ | ||
23 | + output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj); | ||
24 | +} | ||
25 | + | ||
26 | INSN_LSX(vadd_b, vvv) | ||
27 | INSN_LSX(vadd_h, vvv) | ||
28 | INSN_LSX(vadd_w, vvv) | ||
29 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsubi_bu, vv_i) | ||
30 | INSN_LSX(vsubi_hu, vv_i) | ||
31 | INSN_LSX(vsubi_wu, vv_i) | ||
32 | INSN_LSX(vsubi_du, vv_i) | ||
33 | + | ||
34 | +INSN_LSX(vneg_b, vv) | ||
35 | +INSN_LSX(vneg_h, vv) | ||
36 | +INSN_LSX(vneg_w, vv) | ||
37 | +INSN_LSX(vneg_d, vv) | ||
38 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
41 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
43 | return true; | ||
44 | } | ||
45 | |||
46 | +static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, | ||
47 | + void (*func)(unsigned, uint32_t, uint32_t, | ||
48 | + uint32_t, uint32_t)) | ||
49 | +{ | ||
50 | + uint32_t vd_ofs, vj_ofs; | ||
51 | + | ||
52 | + CHECK_SXE; | ||
53 | + | ||
54 | + vd_ofs = vec_full_offset(a->vd); | ||
55 | + vj_ofs = vec_full_offset(a->vj); | ||
56 | + | ||
57 | + func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8); | ||
58 | + return true; | ||
59 | +} | ||
60 | + | ||
61 | static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, | ||
62 | void (*func)(unsigned, uint32_t, uint32_t, | ||
63 | int64_t, uint32_t, uint32_t)) | ||
64 | @@ -XXX,XX +XXX,XX @@ TRANS(vsubi_bu, gvec_subi, MO_8) | ||
65 | TRANS(vsubi_hu, gvec_subi, MO_16) | ||
66 | TRANS(vsubi_wu, gvec_subi, MO_32) | ||
67 | TRANS(vsubi_du, gvec_subi, MO_64) | ||
68 | + | ||
69 | +TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg) | ||
70 | +TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg) | ||
71 | +TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg) | ||
72 | +TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg) | ||
73 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/target/loongarch/insns.decode | ||
76 | +++ b/target/loongarch/insns.decode | ||
77 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
78 | # LSX Argument sets | ||
79 | # | ||
80 | |||
81 | +&vv vd vj | ||
82 | &vvv vd vj vk | ||
83 | &vv_i vd vj imm | ||
84 | |||
85 | # | ||
86 | # LSX Formats | ||
87 | # | ||
88 | +@vv .... ........ ..... ..... vj:5 vd:5 &vv | ||
89 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
90 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ vsubi_bu 0111 00101000 11000 ..... ..... ..... @vv_ui5 | ||
93 | vsubi_hu 0111 00101000 11001 ..... ..... ..... @vv_ui5 | ||
94 | vsubi_wu 0111 00101000 11010 ..... ..... ..... @vv_ui5 | ||
95 | vsubi_du 0111 00101000 11011 ..... ..... ..... @vv_ui5 | ||
96 | + | ||
97 | +vneg_b 0111 00101001 11000 01100 ..... ..... @vv | ||
98 | +vneg_h 0111 00101001 11000 01101 ..... ..... @vv | ||
99 | +vneg_w 0111 00101001 11000 01110 ..... ..... @vv | ||
100 | +vneg_d 0111 00101001 11000 01111 ..... ..... @vv | ||
101 | -- | ||
102 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSADD.{B/H/W/D}[U]; | ||
3 | - VSSUB.{B/H/W/D}[U]. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-8-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 17 +++++++++++++++++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 17 +++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 17 +++++++++++++++++ | ||
12 | 3 files changed, 51 insertions(+) | ||
13 | |||
14 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/loongarch/disas.c | ||
17 | +++ b/target/loongarch/disas.c | ||
18 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vneg_b, vv) | ||
19 | INSN_LSX(vneg_h, vv) | ||
20 | INSN_LSX(vneg_w, vv) | ||
21 | INSN_LSX(vneg_d, vv) | ||
22 | + | ||
23 | +INSN_LSX(vsadd_b, vvv) | ||
24 | +INSN_LSX(vsadd_h, vvv) | ||
25 | +INSN_LSX(vsadd_w, vvv) | ||
26 | +INSN_LSX(vsadd_d, vvv) | ||
27 | +INSN_LSX(vsadd_bu, vvv) | ||
28 | +INSN_LSX(vsadd_hu, vvv) | ||
29 | +INSN_LSX(vsadd_wu, vvv) | ||
30 | +INSN_LSX(vsadd_du, vvv) | ||
31 | +INSN_LSX(vssub_b, vvv) | ||
32 | +INSN_LSX(vssub_h, vvv) | ||
33 | +INSN_LSX(vssub_w, vvv) | ||
34 | +INSN_LSX(vssub_d, vvv) | ||
35 | +INSN_LSX(vssub_bu, vvv) | ||
36 | +INSN_LSX(vssub_hu, vvv) | ||
37 | +INSN_LSX(vssub_wu, vvv) | ||
38 | +INSN_LSX(vssub_du, vvv) | ||
39 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
43 | @@ -XXX,XX +XXX,XX @@ TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg) | ||
44 | TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg) | ||
45 | TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg) | ||
46 | TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg) | ||
47 | + | ||
48 | +TRANS(vsadd_b, gvec_vvv, MO_8, tcg_gen_gvec_ssadd) | ||
49 | +TRANS(vsadd_h, gvec_vvv, MO_16, tcg_gen_gvec_ssadd) | ||
50 | +TRANS(vsadd_w, gvec_vvv, MO_32, tcg_gen_gvec_ssadd) | ||
51 | +TRANS(vsadd_d, gvec_vvv, MO_64, tcg_gen_gvec_ssadd) | ||
52 | +TRANS(vsadd_bu, gvec_vvv, MO_8, tcg_gen_gvec_usadd) | ||
53 | +TRANS(vsadd_hu, gvec_vvv, MO_16, tcg_gen_gvec_usadd) | ||
54 | +TRANS(vsadd_wu, gvec_vvv, MO_32, tcg_gen_gvec_usadd) | ||
55 | +TRANS(vsadd_du, gvec_vvv, MO_64, tcg_gen_gvec_usadd) | ||
56 | +TRANS(vssub_b, gvec_vvv, MO_8, tcg_gen_gvec_sssub) | ||
57 | +TRANS(vssub_h, gvec_vvv, MO_16, tcg_gen_gvec_sssub) | ||
58 | +TRANS(vssub_w, gvec_vvv, MO_32, tcg_gen_gvec_sssub) | ||
59 | +TRANS(vssub_d, gvec_vvv, MO_64, tcg_gen_gvec_sssub) | ||
60 | +TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub) | ||
61 | +TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub) | ||
62 | +TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub) | ||
63 | +TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub) | ||
64 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/target/loongarch/insns.decode | ||
67 | +++ b/target/loongarch/insns.decode | ||
68 | @@ -XXX,XX +XXX,XX @@ vneg_b 0111 00101001 11000 01100 ..... ..... @vv | ||
69 | vneg_h 0111 00101001 11000 01101 ..... ..... @vv | ||
70 | vneg_w 0111 00101001 11000 01110 ..... ..... @vv | ||
71 | vneg_d 0111 00101001 11000 01111 ..... ..... @vv | ||
72 | + | ||
73 | +vsadd_b 0111 00000100 01100 ..... ..... ..... @vvv | ||
74 | +vsadd_h 0111 00000100 01101 ..... ..... ..... @vvv | ||
75 | +vsadd_w 0111 00000100 01110 ..... ..... ..... @vvv | ||
76 | +vsadd_d 0111 00000100 01111 ..... ..... ..... @vvv | ||
77 | +vsadd_bu 0111 00000100 10100 ..... ..... ..... @vvv | ||
78 | +vsadd_hu 0111 00000100 10101 ..... ..... ..... @vvv | ||
79 | +vsadd_wu 0111 00000100 10110 ..... ..... ..... @vvv | ||
80 | +vsadd_du 0111 00000100 10111 ..... ..... ..... @vvv | ||
81 | +vssub_b 0111 00000100 10000 ..... ..... ..... @vvv | ||
82 | +vssub_h 0111 00000100 10001 ..... ..... ..... @vvv | ||
83 | +vssub_w 0111 00000100 10010 ..... ..... ..... @vvv | ||
84 | +vssub_d 0111 00000100 10011 ..... ..... ..... @vvv | ||
85 | +vssub_bu 0111 00000100 11000 ..... ..... ..... @vvv | ||
86 | +vssub_hu 0111 00000100 11001 ..... ..... ..... @vvv | ||
87 | +vssub_wu 0111 00000100 11010 ..... ..... ..... @vvv | ||
88 | +vssub_du 0111 00000100 11011 ..... ..... ..... @vvv | ||
89 | -- | ||
90 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}; | ||
3 | - VHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-9-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 17 +++++ | ||
10 | target/loongarch/helper.h | 18 +++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 17 +++++ | ||
12 | target/loongarch/insns.decode | 17 +++++ | ||
13 | target/loongarch/lsx_helper.c | 81 +++++++++++++++++++++ | ||
14 | 5 files changed, 150 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vssub_bu, vvv) | ||
21 | INSN_LSX(vssub_hu, vvv) | ||
22 | INSN_LSX(vssub_wu, vvv) | ||
23 | INSN_LSX(vssub_du, vvv) | ||
24 | + | ||
25 | +INSN_LSX(vhaddw_h_b, vvv) | ||
26 | +INSN_LSX(vhaddw_w_h, vvv) | ||
27 | +INSN_LSX(vhaddw_d_w, vvv) | ||
28 | +INSN_LSX(vhaddw_q_d, vvv) | ||
29 | +INSN_LSX(vhaddw_hu_bu, vvv) | ||
30 | +INSN_LSX(vhaddw_wu_hu, vvv) | ||
31 | +INSN_LSX(vhaddw_du_wu, vvv) | ||
32 | +INSN_LSX(vhaddw_qu_du, vvv) | ||
33 | +INSN_LSX(vhsubw_h_b, vvv) | ||
34 | +INSN_LSX(vhsubw_w_h, vvv) | ||
35 | +INSN_LSX(vhsubw_d_w, vvv) | ||
36 | +INSN_LSX(vhsubw_q_d, vvv) | ||
37 | +INSN_LSX(vhsubw_hu_bu, vvv) | ||
38 | +INSN_LSX(vhsubw_wu_hu, vvv) | ||
39 | +INSN_LSX(vhsubw_du_wu, vvv) | ||
40 | +INSN_LSX(vhsubw_qu_du, vvv) | ||
41 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/helper.h | ||
44 | +++ b/target/loongarch/helper.h | ||
45 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(ldpte, void, env, tl, tl, i32) | ||
46 | DEF_HELPER_1(ertn, void, env) | ||
47 | DEF_HELPER_1(idle, void, env) | ||
48 | #endif | ||
49 | + | ||
50 | +/* LoongArch LSX */ | ||
51 | +DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32) | ||
52 | +DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32) | ||
53 | +DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32) | ||
55 | +DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32) | ||
56 | +DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32) | ||
57 | +DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32) | ||
58 | +DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32) | ||
59 | +DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32) | ||
60 | +DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32) | ||
61 | +DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32) | ||
66 | +DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32) | ||
67 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
70 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
71 | @@ -XXX,XX +XXX,XX @@ TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub) | ||
72 | TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub) | ||
73 | TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub) | ||
74 | TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub) | ||
75 | + | ||
76 | +TRANS(vhaddw_h_b, gen_vvv, gen_helper_vhaddw_h_b) | ||
77 | +TRANS(vhaddw_w_h, gen_vvv, gen_helper_vhaddw_w_h) | ||
78 | +TRANS(vhaddw_d_w, gen_vvv, gen_helper_vhaddw_d_w) | ||
79 | +TRANS(vhaddw_q_d, gen_vvv, gen_helper_vhaddw_q_d) | ||
80 | +TRANS(vhaddw_hu_bu, gen_vvv, gen_helper_vhaddw_hu_bu) | ||
81 | +TRANS(vhaddw_wu_hu, gen_vvv, gen_helper_vhaddw_wu_hu) | ||
82 | +TRANS(vhaddw_du_wu, gen_vvv, gen_helper_vhaddw_du_wu) | ||
83 | +TRANS(vhaddw_qu_du, gen_vvv, gen_helper_vhaddw_qu_du) | ||
84 | +TRANS(vhsubw_h_b, gen_vvv, gen_helper_vhsubw_h_b) | ||
85 | +TRANS(vhsubw_w_h, gen_vvv, gen_helper_vhsubw_w_h) | ||
86 | +TRANS(vhsubw_d_w, gen_vvv, gen_helper_vhsubw_d_w) | ||
87 | +TRANS(vhsubw_q_d, gen_vvv, gen_helper_vhsubw_q_d) | ||
88 | +TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu) | ||
89 | +TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu) | ||
90 | +TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu) | ||
91 | +TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du) | ||
92 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/target/loongarch/insns.decode | ||
95 | +++ b/target/loongarch/insns.decode | ||
96 | @@ -XXX,XX +XXX,XX @@ vssub_bu 0111 00000100 11000 ..... ..... ..... @vvv | ||
97 | vssub_hu 0111 00000100 11001 ..... ..... ..... @vvv | ||
98 | vssub_wu 0111 00000100 11010 ..... ..... ..... @vvv | ||
99 | vssub_du 0111 00000100 11011 ..... ..... ..... @vvv | ||
100 | + | ||
101 | +vhaddw_h_b 0111 00000101 01000 ..... ..... ..... @vvv | ||
102 | +vhaddw_w_h 0111 00000101 01001 ..... ..... ..... @vvv | ||
103 | +vhaddw_d_w 0111 00000101 01010 ..... ..... ..... @vvv | ||
104 | +vhaddw_q_d 0111 00000101 01011 ..... ..... ..... @vvv | ||
105 | +vhaddw_hu_bu 0111 00000101 10000 ..... ..... ..... @vvv | ||
106 | +vhaddw_wu_hu 0111 00000101 10001 ..... ..... ..... @vvv | ||
107 | +vhaddw_du_wu 0111 00000101 10010 ..... ..... ..... @vvv | ||
108 | +vhaddw_qu_du 0111 00000101 10011 ..... ..... ..... @vvv | ||
109 | +vhsubw_h_b 0111 00000101 01100 ..... ..... ..... @vvv | ||
110 | +vhsubw_w_h 0111 00000101 01101 ..... ..... ..... @vvv | ||
111 | +vhsubw_d_w 0111 00000101 01110 ..... ..... ..... @vvv | ||
112 | +vhsubw_q_d 0111 00000101 01111 ..... ..... ..... @vvv | ||
113 | +vhsubw_hu_bu 0111 00000101 10100 ..... ..... ..... @vvv | ||
114 | +vhsubw_wu_hu 0111 00000101 10101 ..... ..... ..... @vvv | ||
115 | +vhsubw_du_wu 0111 00000101 10110 ..... ..... ..... @vvv | ||
116 | +vhsubw_qu_du 0111 00000101 10111 ..... ..... ..... @vvv | ||
117 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/target/loongarch/lsx_helper.c | ||
120 | +++ b/target/loongarch/lsx_helper.c | ||
121 | @@ -XXX,XX +XXX,XX @@ | ||
122 | * | ||
123 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited | ||
124 | */ | ||
125 | + | ||
126 | +#include "qemu/osdep.h" | ||
127 | +#include "cpu.h" | ||
128 | +#include "exec/exec-all.h" | ||
129 | +#include "exec/helper-proto.h" | ||
130 | + | ||
131 | +#define DO_ADD(a, b) (a + b) | ||
132 | +#define DO_SUB(a, b) (a - b) | ||
133 | + | ||
134 | +#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
135 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
136 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
137 | +{ \ | ||
138 | + int i; \ | ||
139 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
140 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
141 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
142 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
143 | + \ | ||
144 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
145 | + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ | ||
146 | + } \ | ||
147 | +} | ||
148 | + | ||
149 | +DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) | ||
150 | +DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) | ||
151 | +DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) | ||
152 | + | ||
153 | +void HELPER(vhaddw_q_d)(CPULoongArchState *env, | ||
154 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
155 | +{ | ||
156 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
157 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
158 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
159 | + | ||
160 | + Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
161 | +} | ||
162 | + | ||
163 | +DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) | ||
164 | +DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) | ||
165 | +DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) | ||
166 | + | ||
167 | +void HELPER(vhsubw_q_d)(CPULoongArchState *env, | ||
168 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
169 | +{ | ||
170 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
171 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
172 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
173 | + | ||
174 | + Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); | ||
175 | +} | ||
176 | + | ||
177 | +DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) | ||
178 | +DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) | ||
179 | +DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) | ||
180 | + | ||
181 | +void HELPER(vhaddw_qu_du)(CPULoongArchState *env, | ||
182 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
183 | +{ | ||
184 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
185 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
186 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
187 | + | ||
188 | + Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
189 | + int128_make64((uint64_t)Vk->D(0))); | ||
190 | +} | ||
191 | + | ||
192 | +DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) | ||
193 | +DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) | ||
194 | +DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) | ||
195 | + | ||
196 | +void HELPER(vhsubw_qu_du)(CPULoongArchState *env, | ||
197 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
198 | +{ | ||
199 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
200 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
201 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
202 | + | ||
203 | + Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
204 | + int128_make64((uint64_t)Vk->D(0))); | ||
205 | +} | ||
206 | -- | ||
207 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
3 | - VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
4 | - VADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-10-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 43 ++ | ||
11 | target/loongarch/helper.h | 45 ++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 795 ++++++++++++++++++++ | ||
13 | target/loongarch/insns.decode | 43 ++ | ||
14 | target/loongarch/lsx_helper.c | 190 +++++ | ||
15 | 5 files changed, 1116 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/disas.c | ||
20 | +++ b/target/loongarch/disas.c | ||
21 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vhsubw_hu_bu, vvv) | ||
22 | INSN_LSX(vhsubw_wu_hu, vvv) | ||
23 | INSN_LSX(vhsubw_du_wu, vvv) | ||
24 | INSN_LSX(vhsubw_qu_du, vvv) | ||
25 | + | ||
26 | +INSN_LSX(vaddwev_h_b, vvv) | ||
27 | +INSN_LSX(vaddwev_w_h, vvv) | ||
28 | +INSN_LSX(vaddwev_d_w, vvv) | ||
29 | +INSN_LSX(vaddwev_q_d, vvv) | ||
30 | +INSN_LSX(vaddwod_h_b, vvv) | ||
31 | +INSN_LSX(vaddwod_w_h, vvv) | ||
32 | +INSN_LSX(vaddwod_d_w, vvv) | ||
33 | +INSN_LSX(vaddwod_q_d, vvv) | ||
34 | +INSN_LSX(vsubwev_h_b, vvv) | ||
35 | +INSN_LSX(vsubwev_w_h, vvv) | ||
36 | +INSN_LSX(vsubwev_d_w, vvv) | ||
37 | +INSN_LSX(vsubwev_q_d, vvv) | ||
38 | +INSN_LSX(vsubwod_h_b, vvv) | ||
39 | +INSN_LSX(vsubwod_w_h, vvv) | ||
40 | +INSN_LSX(vsubwod_d_w, vvv) | ||
41 | +INSN_LSX(vsubwod_q_d, vvv) | ||
42 | + | ||
43 | +INSN_LSX(vaddwev_h_bu, vvv) | ||
44 | +INSN_LSX(vaddwev_w_hu, vvv) | ||
45 | +INSN_LSX(vaddwev_d_wu, vvv) | ||
46 | +INSN_LSX(vaddwev_q_du, vvv) | ||
47 | +INSN_LSX(vaddwod_h_bu, vvv) | ||
48 | +INSN_LSX(vaddwod_w_hu, vvv) | ||
49 | +INSN_LSX(vaddwod_d_wu, vvv) | ||
50 | +INSN_LSX(vaddwod_q_du, vvv) | ||
51 | +INSN_LSX(vsubwev_h_bu, vvv) | ||
52 | +INSN_LSX(vsubwev_w_hu, vvv) | ||
53 | +INSN_LSX(vsubwev_d_wu, vvv) | ||
54 | +INSN_LSX(vsubwev_q_du, vvv) | ||
55 | +INSN_LSX(vsubwod_h_bu, vvv) | ||
56 | +INSN_LSX(vsubwod_w_hu, vvv) | ||
57 | +INSN_LSX(vsubwod_d_wu, vvv) | ||
58 | +INSN_LSX(vsubwod_q_du, vvv) | ||
59 | + | ||
60 | +INSN_LSX(vaddwev_h_bu_b, vvv) | ||
61 | +INSN_LSX(vaddwev_w_hu_h, vvv) | ||
62 | +INSN_LSX(vaddwev_d_wu_w, vvv) | ||
63 | +INSN_LSX(vaddwev_q_du_d, vvv) | ||
64 | +INSN_LSX(vaddwod_h_bu_b, vvv) | ||
65 | +INSN_LSX(vaddwod_w_hu_h, vvv) | ||
66 | +INSN_LSX(vaddwod_d_wu_w, vvv) | ||
67 | +INSN_LSX(vaddwod_q_du_d, vvv) | ||
68 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/helper.h | ||
71 | +++ b/target/loongarch/helper.h | ||
72 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32) | ||
73 | DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32) | ||
74 | DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32) | ||
75 | DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32) | ||
76 | + | ||
77 | +DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vaddwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vaddwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
85 | + | ||
86 | +DEF_HELPER_FLAGS_4(vsubwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vsubwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
88 | +DEF_HELPER_FLAGS_4(vsubwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
89 | +DEF_HELPER_FLAGS_4(vsubwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
90 | +DEF_HELPER_FLAGS_4(vsubwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
91 | +DEF_HELPER_FLAGS_4(vsubwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
92 | +DEF_HELPER_FLAGS_4(vsubwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
93 | +DEF_HELPER_FLAGS_4(vsubwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
94 | + | ||
95 | +DEF_HELPER_FLAGS_4(vaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
96 | +DEF_HELPER_FLAGS_4(vaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
98 | +DEF_HELPER_FLAGS_4(vaddwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
99 | +DEF_HELPER_FLAGS_4(vaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
100 | +DEF_HELPER_FLAGS_4(vaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
101 | +DEF_HELPER_FLAGS_4(vaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
102 | +DEF_HELPER_FLAGS_4(vaddwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
103 | + | ||
104 | +DEF_HELPER_FLAGS_4(vsubwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
105 | +DEF_HELPER_FLAGS_4(vsubwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
106 | +DEF_HELPER_FLAGS_4(vsubwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
107 | +DEF_HELPER_FLAGS_4(vsubwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
108 | +DEF_HELPER_FLAGS_4(vsubwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
109 | +DEF_HELPER_FLAGS_4(vsubwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
110 | +DEF_HELPER_FLAGS_4(vsubwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
111 | +DEF_HELPER_FLAGS_4(vsubwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
112 | + | ||
113 | +DEF_HELPER_FLAGS_4(vaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
114 | +DEF_HELPER_FLAGS_4(vaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
115 | +DEF_HELPER_FLAGS_4(vaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
116 | +DEF_HELPER_FLAGS_4(vaddwev_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
117 | +DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
118 | +DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
119 | +DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
120 | +DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
121 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
124 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
125 | @@ -XXX,XX +XXX,XX @@ TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu) | ||
126 | TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu) | ||
127 | TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu) | ||
128 | TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du) | ||
129 | + | ||
130 | +static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
131 | +{ | ||
132 | + TCGv_vec t1, t2; | ||
133 | + | ||
134 | + int halfbits = 4 << vece; | ||
135 | + | ||
136 | + t1 = tcg_temp_new_vec_matching(a); | ||
137 | + t2 = tcg_temp_new_vec_matching(b); | ||
138 | + | ||
139 | + /* Sign-extend the even elements from a */ | ||
140 | + tcg_gen_shli_vec(vece, t1, a, halfbits); | ||
141 | + tcg_gen_sari_vec(vece, t1, t1, halfbits); | ||
142 | + | ||
143 | + /* Sign-extend the even elements from b */ | ||
144 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
145 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
146 | + | ||
147 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
148 | +} | ||
149 | + | ||
150 | +static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
151 | +{ | ||
152 | + TCGv_i32 t1, t2; | ||
153 | + | ||
154 | + t1 = tcg_temp_new_i32(); | ||
155 | + t2 = tcg_temp_new_i32(); | ||
156 | + tcg_gen_ext16s_i32(t1, a); | ||
157 | + tcg_gen_ext16s_i32(t2, b); | ||
158 | + tcg_gen_add_i32(t, t1, t2); | ||
159 | +} | ||
160 | + | ||
161 | +static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
162 | +{ | ||
163 | + TCGv_i64 t1, t2; | ||
164 | + | ||
165 | + t1 = tcg_temp_new_i64(); | ||
166 | + t2 = tcg_temp_new_i64(); | ||
167 | + tcg_gen_ext32s_i64(t1, a); | ||
168 | + tcg_gen_ext32s_i64(t2, b); | ||
169 | + tcg_gen_add_i64(t, t1, t2); | ||
170 | +} | ||
171 | + | ||
172 | +static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
173 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
174 | +{ | ||
175 | + static const TCGOpcode vecop_list[] = { | ||
176 | + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
177 | + }; | ||
178 | + static const GVecGen3 op[4] = { | ||
179 | + { | ||
180 | + .fniv = gen_vaddwev_s, | ||
181 | + .fno = gen_helper_vaddwev_h_b, | ||
182 | + .opt_opc = vecop_list, | ||
183 | + .vece = MO_16 | ||
184 | + }, | ||
185 | + { | ||
186 | + .fni4 = gen_vaddwev_w_h, | ||
187 | + .fniv = gen_vaddwev_s, | ||
188 | + .fno = gen_helper_vaddwev_w_h, | ||
189 | + .opt_opc = vecop_list, | ||
190 | + .vece = MO_32 | ||
191 | + }, | ||
192 | + { | ||
193 | + .fni8 = gen_vaddwev_d_w, | ||
194 | + .fniv = gen_vaddwev_s, | ||
195 | + .fno = gen_helper_vaddwev_d_w, | ||
196 | + .opt_opc = vecop_list, | ||
197 | + .vece = MO_64 | ||
198 | + }, | ||
199 | + { | ||
200 | + .fno = gen_helper_vaddwev_q_d, | ||
201 | + .vece = MO_128 | ||
202 | + }, | ||
203 | + }; | ||
204 | + | ||
205 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
206 | +} | ||
207 | + | ||
208 | +TRANS(vaddwev_h_b, gvec_vvv, MO_8, do_vaddwev_s) | ||
209 | +TRANS(vaddwev_w_h, gvec_vvv, MO_16, do_vaddwev_s) | ||
210 | +TRANS(vaddwev_d_w, gvec_vvv, MO_32, do_vaddwev_s) | ||
211 | +TRANS(vaddwev_q_d, gvec_vvv, MO_64, do_vaddwev_s) | ||
212 | + | ||
213 | +static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
214 | +{ | ||
215 | + TCGv_i32 t1, t2; | ||
216 | + | ||
217 | + t1 = tcg_temp_new_i32(); | ||
218 | + t2 = tcg_temp_new_i32(); | ||
219 | + tcg_gen_sari_i32(t1, a, 16); | ||
220 | + tcg_gen_sari_i32(t2, b, 16); | ||
221 | + tcg_gen_add_i32(t, t1, t2); | ||
222 | +} | ||
223 | + | ||
224 | +static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
225 | +{ | ||
226 | + TCGv_i64 t1, t2; | ||
227 | + | ||
228 | + t1 = tcg_temp_new_i64(); | ||
229 | + t2 = tcg_temp_new_i64(); | ||
230 | + tcg_gen_sari_i64(t1, a, 32); | ||
231 | + tcg_gen_sari_i64(t2, b, 32); | ||
232 | + tcg_gen_add_i64(t, t1, t2); | ||
233 | +} | ||
234 | + | ||
235 | +static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
236 | +{ | ||
237 | + TCGv_vec t1, t2; | ||
238 | + | ||
239 | + int halfbits = 4 << vece; | ||
240 | + | ||
241 | + t1 = tcg_temp_new_vec_matching(a); | ||
242 | + t2 = tcg_temp_new_vec_matching(b); | ||
243 | + | ||
244 | + /* Sign-extend the odd elements for vector */ | ||
245 | + tcg_gen_sari_vec(vece, t1, a, halfbits); | ||
246 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
247 | + | ||
248 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
249 | +} | ||
250 | + | ||
251 | +static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
252 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
253 | +{ | ||
254 | + static const TCGOpcode vecop_list[] = { | ||
255 | + INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
256 | + }; | ||
257 | + static const GVecGen3 op[4] = { | ||
258 | + { | ||
259 | + .fniv = gen_vaddwod_s, | ||
260 | + .fno = gen_helper_vaddwod_h_b, | ||
261 | + .opt_opc = vecop_list, | ||
262 | + .vece = MO_16 | ||
263 | + }, | ||
264 | + { | ||
265 | + .fni4 = gen_vaddwod_w_h, | ||
266 | + .fniv = gen_vaddwod_s, | ||
267 | + .fno = gen_helper_vaddwod_w_h, | ||
268 | + .opt_opc = vecop_list, | ||
269 | + .vece = MO_32 | ||
270 | + }, | ||
271 | + { | ||
272 | + .fni8 = gen_vaddwod_d_w, | ||
273 | + .fniv = gen_vaddwod_s, | ||
274 | + .fno = gen_helper_vaddwod_d_w, | ||
275 | + .opt_opc = vecop_list, | ||
276 | + .vece = MO_64 | ||
277 | + }, | ||
278 | + { | ||
279 | + .fno = gen_helper_vaddwod_q_d, | ||
280 | + .vece = MO_128 | ||
281 | + }, | ||
282 | + }; | ||
283 | + | ||
284 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
285 | +} | ||
286 | + | ||
287 | +TRANS(vaddwod_h_b, gvec_vvv, MO_8, do_vaddwod_s) | ||
288 | +TRANS(vaddwod_w_h, gvec_vvv, MO_16, do_vaddwod_s) | ||
289 | +TRANS(vaddwod_d_w, gvec_vvv, MO_32, do_vaddwod_s) | ||
290 | +TRANS(vaddwod_q_d, gvec_vvv, MO_64, do_vaddwod_s) | ||
291 | + | ||
292 | +static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
293 | +{ | ||
294 | + TCGv_vec t1, t2; | ||
295 | + | ||
296 | + int halfbits = 4 << vece; | ||
297 | + | ||
298 | + t1 = tcg_temp_new_vec_matching(a); | ||
299 | + t2 = tcg_temp_new_vec_matching(b); | ||
300 | + | ||
301 | + /* Sign-extend the even elements from a */ | ||
302 | + tcg_gen_shli_vec(vece, t1, a, halfbits); | ||
303 | + tcg_gen_sari_vec(vece, t1, t1, halfbits); | ||
304 | + | ||
305 | + /* Sign-extend the even elements from b */ | ||
306 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
307 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
308 | + | ||
309 | + tcg_gen_sub_vec(vece, t, t1, t2); | ||
310 | +} | ||
311 | + | ||
312 | +static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
313 | +{ | ||
314 | + TCGv_i32 t1, t2; | ||
315 | + | ||
316 | + t1 = tcg_temp_new_i32(); | ||
317 | + t2 = tcg_temp_new_i32(); | ||
318 | + tcg_gen_ext16s_i32(t1, a); | ||
319 | + tcg_gen_ext16s_i32(t2, b); | ||
320 | + tcg_gen_sub_i32(t, t1, t2); | ||
321 | +} | ||
322 | + | ||
323 | +static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
324 | +{ | ||
325 | + TCGv_i64 t1, t2; | ||
326 | + | ||
327 | + t1 = tcg_temp_new_i64(); | ||
328 | + t2 = tcg_temp_new_i64(); | ||
329 | + tcg_gen_ext32s_i64(t1, a); | ||
330 | + tcg_gen_ext32s_i64(t2, b); | ||
331 | + tcg_gen_sub_i64(t, t1, t2); | ||
332 | +} | ||
333 | + | ||
334 | +static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
335 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
336 | +{ | ||
337 | + static const TCGOpcode vecop_list[] = { | ||
338 | + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0 | ||
339 | + }; | ||
340 | + static const GVecGen3 op[4] = { | ||
341 | + { | ||
342 | + .fniv = gen_vsubwev_s, | ||
343 | + .fno = gen_helper_vsubwev_h_b, | ||
344 | + .opt_opc = vecop_list, | ||
345 | + .vece = MO_16 | ||
346 | + }, | ||
347 | + { | ||
348 | + .fni4 = gen_vsubwev_w_h, | ||
349 | + .fniv = gen_vsubwev_s, | ||
350 | + .fno = gen_helper_vsubwev_w_h, | ||
351 | + .opt_opc = vecop_list, | ||
352 | + .vece = MO_32 | ||
353 | + }, | ||
354 | + { | ||
355 | + .fni8 = gen_vsubwev_d_w, | ||
356 | + .fniv = gen_vsubwev_s, | ||
357 | + .fno = gen_helper_vsubwev_d_w, | ||
358 | + .opt_opc = vecop_list, | ||
359 | + .vece = MO_64 | ||
360 | + }, | ||
361 | + { | ||
362 | + .fno = gen_helper_vsubwev_q_d, | ||
363 | + .vece = MO_128 | ||
364 | + }, | ||
365 | + }; | ||
366 | + | ||
367 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
368 | +} | ||
369 | + | ||
370 | +TRANS(vsubwev_h_b, gvec_vvv, MO_8, do_vsubwev_s) | ||
371 | +TRANS(vsubwev_w_h, gvec_vvv, MO_16, do_vsubwev_s) | ||
372 | +TRANS(vsubwev_d_w, gvec_vvv, MO_32, do_vsubwev_s) | ||
373 | +TRANS(vsubwev_q_d, gvec_vvv, MO_64, do_vsubwev_s) | ||
374 | + | ||
375 | +static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
376 | +{ | ||
377 | + TCGv_vec t1, t2; | ||
378 | + | ||
379 | + int halfbits = 4 << vece; | ||
380 | + | ||
381 | + t1 = tcg_temp_new_vec_matching(a); | ||
382 | + t2 = tcg_temp_new_vec_matching(b); | ||
383 | + | ||
384 | + /* Sign-extend the odd elements for vector */ | ||
385 | + tcg_gen_sari_vec(vece, t1, a, halfbits); | ||
386 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
387 | + | ||
388 | + tcg_gen_sub_vec(vece, t, t1, t2); | ||
389 | +} | ||
390 | + | ||
391 | +static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
392 | +{ | ||
393 | + TCGv_i32 t1, t2; | ||
394 | + | ||
395 | + t1 = tcg_temp_new_i32(); | ||
396 | + t2 = tcg_temp_new_i32(); | ||
397 | + tcg_gen_sari_i32(t1, a, 16); | ||
398 | + tcg_gen_sari_i32(t2, b, 16); | ||
399 | + tcg_gen_sub_i32(t, t1, t2); | ||
400 | +} | ||
401 | + | ||
402 | +static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
403 | +{ | ||
404 | + TCGv_i64 t1, t2; | ||
405 | + | ||
406 | + t1 = tcg_temp_new_i64(); | ||
407 | + t2 = tcg_temp_new_i64(); | ||
408 | + tcg_gen_sari_i64(t1, a, 32); | ||
409 | + tcg_gen_sari_i64(t2, b, 32); | ||
410 | + tcg_gen_sub_i64(t, t1, t2); | ||
411 | +} | ||
412 | + | ||
413 | +static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
414 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
415 | +{ | ||
416 | + static const TCGOpcode vecop_list[] = { | ||
417 | + INDEX_op_sari_vec, INDEX_op_sub_vec, 0 | ||
418 | + }; | ||
419 | + static const GVecGen3 op[4] = { | ||
420 | + { | ||
421 | + .fniv = gen_vsubwod_s, | ||
422 | + .fno = gen_helper_vsubwod_h_b, | ||
423 | + .opt_opc = vecop_list, | ||
424 | + .vece = MO_16 | ||
425 | + }, | ||
426 | + { | ||
427 | + .fni4 = gen_vsubwod_w_h, | ||
428 | + .fniv = gen_vsubwod_s, | ||
429 | + .fno = gen_helper_vsubwod_w_h, | ||
430 | + .opt_opc = vecop_list, | ||
431 | + .vece = MO_32 | ||
432 | + }, | ||
433 | + { | ||
434 | + .fni8 = gen_vsubwod_d_w, | ||
435 | + .fniv = gen_vsubwod_s, | ||
436 | + .fno = gen_helper_vsubwod_d_w, | ||
437 | + .opt_opc = vecop_list, | ||
438 | + .vece = MO_64 | ||
439 | + }, | ||
440 | + { | ||
441 | + .fno = gen_helper_vsubwod_q_d, | ||
442 | + .vece = MO_128 | ||
443 | + }, | ||
444 | + }; | ||
445 | + | ||
446 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
447 | +} | ||
448 | + | ||
449 | +TRANS(vsubwod_h_b, gvec_vvv, MO_8, do_vsubwod_s) | ||
450 | +TRANS(vsubwod_w_h, gvec_vvv, MO_16, do_vsubwod_s) | ||
451 | +TRANS(vsubwod_d_w, gvec_vvv, MO_32, do_vsubwod_s) | ||
452 | +TRANS(vsubwod_q_d, gvec_vvv, MO_64, do_vsubwod_s) | ||
453 | + | ||
454 | +static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
455 | +{ | ||
456 | + TCGv_vec t1, t2, t3; | ||
457 | + | ||
458 | + t1 = tcg_temp_new_vec_matching(a); | ||
459 | + t2 = tcg_temp_new_vec_matching(b); | ||
460 | + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
461 | + tcg_gen_and_vec(vece, t1, a, t3); | ||
462 | + tcg_gen_and_vec(vece, t2, b, t3); | ||
463 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
464 | +} | ||
465 | + | ||
466 | +static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
467 | +{ | ||
468 | + TCGv_i32 t1, t2; | ||
469 | + | ||
470 | + t1 = tcg_temp_new_i32(); | ||
471 | + t2 = tcg_temp_new_i32(); | ||
472 | + tcg_gen_ext16u_i32(t1, a); | ||
473 | + tcg_gen_ext16u_i32(t2, b); | ||
474 | + tcg_gen_add_i32(t, t1, t2); | ||
475 | +} | ||
476 | + | ||
477 | +static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
478 | +{ | ||
479 | + TCGv_i64 t1, t2; | ||
480 | + | ||
481 | + t1 = tcg_temp_new_i64(); | ||
482 | + t2 = tcg_temp_new_i64(); | ||
483 | + tcg_gen_ext32u_i64(t1, a); | ||
484 | + tcg_gen_ext32u_i64(t2, b); | ||
485 | + tcg_gen_add_i64(t, t1, t2); | ||
486 | +} | ||
487 | + | ||
488 | +static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
489 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
490 | +{ | ||
491 | + static const TCGOpcode vecop_list[] = { | ||
492 | + INDEX_op_add_vec, 0 | ||
493 | + }; | ||
494 | + static const GVecGen3 op[4] = { | ||
495 | + { | ||
496 | + .fniv = gen_vaddwev_u, | ||
497 | + .fno = gen_helper_vaddwev_h_bu, | ||
498 | + .opt_opc = vecop_list, | ||
499 | + .vece = MO_16 | ||
500 | + }, | ||
501 | + { | ||
502 | + .fni4 = gen_vaddwev_w_hu, | ||
503 | + .fniv = gen_vaddwev_u, | ||
504 | + .fno = gen_helper_vaddwev_w_hu, | ||
505 | + .opt_opc = vecop_list, | ||
506 | + .vece = MO_32 | ||
507 | + }, | ||
508 | + { | ||
509 | + .fni8 = gen_vaddwev_d_wu, | ||
510 | + .fniv = gen_vaddwev_u, | ||
511 | + .fno = gen_helper_vaddwev_d_wu, | ||
512 | + .opt_opc = vecop_list, | ||
513 | + .vece = MO_64 | ||
514 | + }, | ||
515 | + { | ||
516 | + .fno = gen_helper_vaddwev_q_du, | ||
517 | + .vece = MO_128 | ||
518 | + }, | ||
519 | + }; | ||
520 | + | ||
521 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
522 | +} | ||
523 | + | ||
524 | +TRANS(vaddwev_h_bu, gvec_vvv, MO_8, do_vaddwev_u) | ||
525 | +TRANS(vaddwev_w_hu, gvec_vvv, MO_16, do_vaddwev_u) | ||
526 | +TRANS(vaddwev_d_wu, gvec_vvv, MO_32, do_vaddwev_u) | ||
527 | +TRANS(vaddwev_q_du, gvec_vvv, MO_64, do_vaddwev_u) | ||
528 | + | ||
529 | +static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
530 | +{ | ||
531 | + TCGv_vec t1, t2; | ||
532 | + | ||
533 | + int halfbits = 4 << vece; | ||
534 | + | ||
535 | + t1 = tcg_temp_new_vec_matching(a); | ||
536 | + t2 = tcg_temp_new_vec_matching(b); | ||
537 | + | ||
538 | + /* Zero-extend the odd elements for vector */ | ||
539 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
540 | + tcg_gen_shri_vec(vece, t2, b, halfbits); | ||
541 | + | ||
542 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
543 | +} | ||
544 | + | ||
545 | +static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
546 | +{ | ||
547 | + TCGv_i32 t1, t2; | ||
548 | + | ||
549 | + t1 = tcg_temp_new_i32(); | ||
550 | + t2 = tcg_temp_new_i32(); | ||
551 | + tcg_gen_shri_i32(t1, a, 16); | ||
552 | + tcg_gen_shri_i32(t2, b, 16); | ||
553 | + tcg_gen_add_i32(t, t1, t2); | ||
554 | +} | ||
555 | + | ||
556 | +static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
557 | +{ | ||
558 | + TCGv_i64 t1, t2; | ||
559 | + | ||
560 | + t1 = tcg_temp_new_i64(); | ||
561 | + t2 = tcg_temp_new_i64(); | ||
562 | + tcg_gen_shri_i64(t1, a, 32); | ||
563 | + tcg_gen_shri_i64(t2, b, 32); | ||
564 | + tcg_gen_add_i64(t, t1, t2); | ||
565 | +} | ||
566 | + | ||
567 | +static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
568 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
569 | +{ | ||
570 | + static const TCGOpcode vecop_list[] = { | ||
571 | + INDEX_op_shri_vec, INDEX_op_add_vec, 0 | ||
572 | + }; | ||
573 | + static const GVecGen3 op[4] = { | ||
574 | + { | ||
575 | + .fniv = gen_vaddwod_u, | ||
576 | + .fno = gen_helper_vaddwod_h_bu, | ||
577 | + .opt_opc = vecop_list, | ||
578 | + .vece = MO_16 | ||
579 | + }, | ||
580 | + { | ||
581 | + .fni4 = gen_vaddwod_w_hu, | ||
582 | + .fniv = gen_vaddwod_u, | ||
583 | + .fno = gen_helper_vaddwod_w_hu, | ||
584 | + .opt_opc = vecop_list, | ||
585 | + .vece = MO_32 | ||
586 | + }, | ||
587 | + { | ||
588 | + .fni8 = gen_vaddwod_d_wu, | ||
589 | + .fniv = gen_vaddwod_u, | ||
590 | + .fno = gen_helper_vaddwod_d_wu, | ||
591 | + .opt_opc = vecop_list, | ||
592 | + .vece = MO_64 | ||
593 | + }, | ||
594 | + { | ||
595 | + .fno = gen_helper_vaddwod_q_du, | ||
596 | + .vece = MO_128 | ||
597 | + }, | ||
598 | + }; | ||
599 | + | ||
600 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
601 | +} | ||
602 | + | ||
603 | +TRANS(vaddwod_h_bu, gvec_vvv, MO_8, do_vaddwod_u) | ||
604 | +TRANS(vaddwod_w_hu, gvec_vvv, MO_16, do_vaddwod_u) | ||
605 | +TRANS(vaddwod_d_wu, gvec_vvv, MO_32, do_vaddwod_u) | ||
606 | +TRANS(vaddwod_q_du, gvec_vvv, MO_64, do_vaddwod_u) | ||
607 | + | ||
608 | +static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
609 | +{ | ||
610 | + TCGv_vec t1, t2, t3; | ||
611 | + | ||
612 | + t1 = tcg_temp_new_vec_matching(a); | ||
613 | + t2 = tcg_temp_new_vec_matching(b); | ||
614 | + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
615 | + tcg_gen_and_vec(vece, t1, a, t3); | ||
616 | + tcg_gen_and_vec(vece, t2, b, t3); | ||
617 | + tcg_gen_sub_vec(vece, t, t1, t2); | ||
618 | +} | ||
619 | + | ||
620 | +static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
621 | +{ | ||
622 | + TCGv_i32 t1, t2; | ||
623 | + | ||
624 | + t1 = tcg_temp_new_i32(); | ||
625 | + t2 = tcg_temp_new_i32(); | ||
626 | + tcg_gen_ext16u_i32(t1, a); | ||
627 | + tcg_gen_ext16u_i32(t2, b); | ||
628 | + tcg_gen_sub_i32(t, t1, t2); | ||
629 | +} | ||
630 | + | ||
631 | +static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
632 | +{ | ||
633 | + TCGv_i64 t1, t2; | ||
634 | + | ||
635 | + t1 = tcg_temp_new_i64(); | ||
636 | + t2 = tcg_temp_new_i64(); | ||
637 | + tcg_gen_ext32u_i64(t1, a); | ||
638 | + tcg_gen_ext32u_i64(t2, b); | ||
639 | + tcg_gen_sub_i64(t, t1, t2); | ||
640 | +} | ||
641 | + | ||
642 | +static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
643 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
644 | +{ | ||
645 | + static const TCGOpcode vecop_list[] = { | ||
646 | + INDEX_op_sub_vec, 0 | ||
647 | + }; | ||
648 | + static const GVecGen3 op[4] = { | ||
649 | + { | ||
650 | + .fniv = gen_vsubwev_u, | ||
651 | + .fno = gen_helper_vsubwev_h_bu, | ||
652 | + .opt_opc = vecop_list, | ||
653 | + .vece = MO_16 | ||
654 | + }, | ||
655 | + { | ||
656 | + .fni4 = gen_vsubwev_w_hu, | ||
657 | + .fniv = gen_vsubwev_u, | ||
658 | + .fno = gen_helper_vsubwev_w_hu, | ||
659 | + .opt_opc = vecop_list, | ||
660 | + .vece = MO_32 | ||
661 | + }, | ||
662 | + { | ||
663 | + .fni8 = gen_vsubwev_d_wu, | ||
664 | + .fniv = gen_vsubwev_u, | ||
665 | + .fno = gen_helper_vsubwev_d_wu, | ||
666 | + .opt_opc = vecop_list, | ||
667 | + .vece = MO_64 | ||
668 | + }, | ||
669 | + { | ||
670 | + .fno = gen_helper_vsubwev_q_du, | ||
671 | + .vece = MO_128 | ||
672 | + }, | ||
673 | + }; | ||
674 | + | ||
675 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
676 | +} | ||
677 | + | ||
678 | +TRANS(vsubwev_h_bu, gvec_vvv, MO_8, do_vsubwev_u) | ||
679 | +TRANS(vsubwev_w_hu, gvec_vvv, MO_16, do_vsubwev_u) | ||
680 | +TRANS(vsubwev_d_wu, gvec_vvv, MO_32, do_vsubwev_u) | ||
681 | +TRANS(vsubwev_q_du, gvec_vvv, MO_64, do_vsubwev_u) | ||
682 | + | ||
683 | +static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
684 | +{ | ||
685 | + TCGv_vec t1, t2; | ||
686 | + | ||
687 | + int halfbits = 4 << vece; | ||
688 | + | ||
689 | + t1 = tcg_temp_new_vec_matching(a); | ||
690 | + t2 = tcg_temp_new_vec_matching(b); | ||
691 | + | ||
692 | + /* Zero-extend the odd elements for vector */ | ||
693 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
694 | + tcg_gen_shri_vec(vece, t2, b, halfbits); | ||
695 | + | ||
696 | + tcg_gen_sub_vec(vece, t, t1, t2); | ||
697 | +} | ||
698 | + | ||
699 | +static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
700 | +{ | ||
701 | + TCGv_i32 t1, t2; | ||
702 | + | ||
703 | + t1 = tcg_temp_new_i32(); | ||
704 | + t2 = tcg_temp_new_i32(); | ||
705 | + tcg_gen_shri_i32(t1, a, 16); | ||
706 | + tcg_gen_shri_i32(t2, b, 16); | ||
707 | + tcg_gen_sub_i32(t, t1, t2); | ||
708 | +} | ||
709 | + | ||
710 | +static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
711 | +{ | ||
712 | + TCGv_i64 t1, t2; | ||
713 | + | ||
714 | + t1 = tcg_temp_new_i64(); | ||
715 | + t2 = tcg_temp_new_i64(); | ||
716 | + tcg_gen_shri_i64(t1, a, 32); | ||
717 | + tcg_gen_shri_i64(t2, b, 32); | ||
718 | + tcg_gen_sub_i64(t, t1, t2); | ||
719 | +} | ||
720 | + | ||
721 | +static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
722 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
723 | +{ | ||
724 | + static const TCGOpcode vecop_list[] = { | ||
725 | + INDEX_op_shri_vec, INDEX_op_sub_vec, 0 | ||
726 | + }; | ||
727 | + static const GVecGen3 op[4] = { | ||
728 | + { | ||
729 | + .fniv = gen_vsubwod_u, | ||
730 | + .fno = gen_helper_vsubwod_h_bu, | ||
731 | + .opt_opc = vecop_list, | ||
732 | + .vece = MO_16 | ||
733 | + }, | ||
734 | + { | ||
735 | + .fni4 = gen_vsubwod_w_hu, | ||
736 | + .fniv = gen_vsubwod_u, | ||
737 | + .fno = gen_helper_vsubwod_w_hu, | ||
738 | + .opt_opc = vecop_list, | ||
739 | + .vece = MO_32 | ||
740 | + }, | ||
741 | + { | ||
742 | + .fni8 = gen_vsubwod_d_wu, | ||
743 | + .fniv = gen_vsubwod_u, | ||
744 | + .fno = gen_helper_vsubwod_d_wu, | ||
745 | + .opt_opc = vecop_list, | ||
746 | + .vece = MO_64 | ||
747 | + }, | ||
748 | + { | ||
749 | + .fno = gen_helper_vsubwod_q_du, | ||
750 | + .vece = MO_128 | ||
751 | + }, | ||
752 | + }; | ||
753 | + | ||
754 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
755 | +} | ||
756 | + | ||
757 | +TRANS(vsubwod_h_bu, gvec_vvv, MO_8, do_vsubwod_u) | ||
758 | +TRANS(vsubwod_w_hu, gvec_vvv, MO_16, do_vsubwod_u) | ||
759 | +TRANS(vsubwod_d_wu, gvec_vvv, MO_32, do_vsubwod_u) | ||
760 | +TRANS(vsubwod_q_du, gvec_vvv, MO_64, do_vsubwod_u) | ||
761 | + | ||
762 | +static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
763 | +{ | ||
764 | + TCGv_vec t1, t2, t3; | ||
765 | + | ||
766 | + int halfbits = 4 << vece; | ||
767 | + | ||
768 | + t1 = tcg_temp_new_vec_matching(a); | ||
769 | + t2 = tcg_temp_new_vec_matching(b); | ||
770 | + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits)); | ||
771 | + | ||
772 | + /* Zero-extend the even elements from a */ | ||
773 | + tcg_gen_and_vec(vece, t1, a, t3); | ||
774 | + | ||
775 | + /* Sign-extend the even elements from b */ | ||
776 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
777 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
778 | + | ||
779 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
780 | +} | ||
781 | + | ||
782 | +static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
783 | +{ | ||
784 | + TCGv_i32 t1, t2; | ||
785 | + | ||
786 | + t1 = tcg_temp_new_i32(); | ||
787 | + t2 = tcg_temp_new_i32(); | ||
788 | + tcg_gen_ext16u_i32(t1, a); | ||
789 | + tcg_gen_ext16s_i32(t2, b); | ||
790 | + tcg_gen_add_i32(t, t1, t2); | ||
791 | +} | ||
792 | + | ||
793 | +static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
794 | +{ | ||
795 | + TCGv_i64 t1, t2; | ||
796 | + | ||
797 | + t1 = tcg_temp_new_i64(); | ||
798 | + t2 = tcg_temp_new_i64(); | ||
799 | + tcg_gen_ext32u_i64(t1, a); | ||
800 | + tcg_gen_ext32s_i64(t2, b); | ||
801 | + tcg_gen_add_i64(t, t1, t2); | ||
802 | +} | ||
803 | + | ||
804 | +static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
805 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
806 | +{ | ||
807 | + static const TCGOpcode vecop_list[] = { | ||
808 | + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
809 | + }; | ||
810 | + static const GVecGen3 op[4] = { | ||
811 | + { | ||
812 | + .fniv = gen_vaddwev_u_s, | ||
813 | + .fno = gen_helper_vaddwev_h_bu_b, | ||
814 | + .opt_opc = vecop_list, | ||
815 | + .vece = MO_16 | ||
816 | + }, | ||
817 | + { | ||
818 | + .fni4 = gen_vaddwev_w_hu_h, | ||
819 | + .fniv = gen_vaddwev_u_s, | ||
820 | + .fno = gen_helper_vaddwev_w_hu_h, | ||
821 | + .opt_opc = vecop_list, | ||
822 | + .vece = MO_32 | ||
823 | + }, | ||
824 | + { | ||
825 | + .fni8 = gen_vaddwev_d_wu_w, | ||
826 | + .fniv = gen_vaddwev_u_s, | ||
827 | + .fno = gen_helper_vaddwev_d_wu_w, | ||
828 | + .opt_opc = vecop_list, | ||
829 | + .vece = MO_64 | ||
830 | + }, | ||
831 | + { | ||
832 | + .fno = gen_helper_vaddwev_q_du_d, | ||
833 | + .vece = MO_128 | ||
834 | + }, | ||
835 | + }; | ||
836 | + | ||
837 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
838 | +} | ||
839 | + | ||
840 | +TRANS(vaddwev_h_bu_b, gvec_vvv, MO_8, do_vaddwev_u_s) | ||
841 | +TRANS(vaddwev_w_hu_h, gvec_vvv, MO_16, do_vaddwev_u_s) | ||
842 | +TRANS(vaddwev_d_wu_w, gvec_vvv, MO_32, do_vaddwev_u_s) | ||
843 | +TRANS(vaddwev_q_du_d, gvec_vvv, MO_64, do_vaddwev_u_s) | ||
844 | + | ||
845 | +static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
846 | +{ | ||
847 | + TCGv_vec t1, t2; | ||
848 | + | ||
849 | + int halfbits = 4 << vece; | ||
850 | + | ||
851 | + t1 = tcg_temp_new_vec_matching(a); | ||
852 | + t2 = tcg_temp_new_vec_matching(b); | ||
853 | + | ||
854 | + /* Zero-extend the odd elements from a */ | ||
855 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
856 | + /* Sign-extend the odd elements from b */ | ||
857 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
858 | + | ||
859 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
860 | +} | ||
861 | + | ||
862 | +static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
863 | +{ | ||
864 | + TCGv_i32 t1, t2; | ||
865 | + | ||
866 | + t1 = tcg_temp_new_i32(); | ||
867 | + t2 = tcg_temp_new_i32(); | ||
868 | + tcg_gen_shri_i32(t1, a, 16); | ||
869 | + tcg_gen_sari_i32(t2, b, 16); | ||
870 | + tcg_gen_add_i32(t, t1, t2); | ||
871 | +} | ||
872 | + | ||
873 | +static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
874 | +{ | ||
875 | + TCGv_i64 t1, t2; | ||
876 | + | ||
877 | + t1 = tcg_temp_new_i64(); | ||
878 | + t2 = tcg_temp_new_i64(); | ||
879 | + tcg_gen_shri_i64(t1, a, 32); | ||
880 | + tcg_gen_sari_i64(t2, b, 32); | ||
881 | + tcg_gen_add_i64(t, t1, t2); | ||
882 | +} | ||
883 | + | ||
884 | +static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
885 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
886 | +{ | ||
887 | + static const TCGOpcode vecop_list[] = { | ||
888 | + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
889 | + }; | ||
890 | + static const GVecGen3 op[4] = { | ||
891 | + { | ||
892 | + .fniv = gen_vaddwod_u_s, | ||
893 | + .fno = gen_helper_vaddwod_h_bu_b, | ||
894 | + .opt_opc = vecop_list, | ||
895 | + .vece = MO_16 | ||
896 | + }, | ||
897 | + { | ||
898 | + .fni4 = gen_vaddwod_w_hu_h, | ||
899 | + .fniv = gen_vaddwod_u_s, | ||
900 | + .fno = gen_helper_vaddwod_w_hu_h, | ||
901 | + .opt_opc = vecop_list, | ||
902 | + .vece = MO_32 | ||
903 | + }, | ||
904 | + { | ||
905 | + .fni8 = gen_vaddwod_d_wu_w, | ||
906 | + .fniv = gen_vaddwod_u_s, | ||
907 | + .fno = gen_helper_vaddwod_d_wu_w, | ||
908 | + .opt_opc = vecop_list, | ||
909 | + .vece = MO_64 | ||
910 | + }, | ||
911 | + { | ||
912 | + .fno = gen_helper_vaddwod_q_du_d, | ||
913 | + .vece = MO_128 | ||
914 | + }, | ||
915 | + }; | ||
916 | + | ||
917 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
918 | +} | ||
919 | + | ||
920 | +TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s) | ||
921 | +TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s) | ||
922 | +TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s) | ||
923 | +TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s) | ||
924 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
925 | index XXXXXXX..XXXXXXX 100644 | ||
926 | --- a/target/loongarch/insns.decode | ||
927 | +++ b/target/loongarch/insns.decode | ||
928 | @@ -XXX,XX +XXX,XX @@ vhsubw_hu_bu 0111 00000101 10100 ..... ..... ..... @vvv | ||
929 | vhsubw_wu_hu 0111 00000101 10101 ..... ..... ..... @vvv | ||
930 | vhsubw_du_wu 0111 00000101 10110 ..... ..... ..... @vvv | ||
931 | vhsubw_qu_du 0111 00000101 10111 ..... ..... ..... @vvv | ||
932 | + | ||
933 | +vaddwev_h_b 0111 00000001 11100 ..... ..... ..... @vvv | ||
934 | +vaddwev_w_h 0111 00000001 11101 ..... ..... ..... @vvv | ||
935 | +vaddwev_d_w 0111 00000001 11110 ..... ..... ..... @vvv | ||
936 | +vaddwev_q_d 0111 00000001 11111 ..... ..... ..... @vvv | ||
937 | +vaddwod_h_b 0111 00000010 00100 ..... ..... ..... @vvv | ||
938 | +vaddwod_w_h 0111 00000010 00101 ..... ..... ..... @vvv | ||
939 | +vaddwod_d_w 0111 00000010 00110 ..... ..... ..... @vvv | ||
940 | +vaddwod_q_d 0111 00000010 00111 ..... ..... ..... @vvv | ||
941 | +vsubwev_h_b 0111 00000010 00000 ..... ..... ..... @vvv | ||
942 | +vsubwev_w_h 0111 00000010 00001 ..... ..... ..... @vvv | ||
943 | +vsubwev_d_w 0111 00000010 00010 ..... ..... ..... @vvv | ||
944 | +vsubwev_q_d 0111 00000010 00011 ..... ..... ..... @vvv | ||
945 | +vsubwod_h_b 0111 00000010 01000 ..... ..... ..... @vvv | ||
946 | +vsubwod_w_h 0111 00000010 01001 ..... ..... ..... @vvv | ||
947 | +vsubwod_d_w 0111 00000010 01010 ..... ..... ..... @vvv | ||
948 | +vsubwod_q_d 0111 00000010 01011 ..... ..... ..... @vvv | ||
949 | + | ||
950 | +vaddwev_h_bu 0111 00000010 11100 ..... ..... ..... @vvv | ||
951 | +vaddwev_w_hu 0111 00000010 11101 ..... ..... ..... @vvv | ||
952 | +vaddwev_d_wu 0111 00000010 11110 ..... ..... ..... @vvv | ||
953 | +vaddwev_q_du 0111 00000010 11111 ..... ..... ..... @vvv | ||
954 | +vaddwod_h_bu 0111 00000011 00100 ..... ..... ..... @vvv | ||
955 | +vaddwod_w_hu 0111 00000011 00101 ..... ..... ..... @vvv | ||
956 | +vaddwod_d_wu 0111 00000011 00110 ..... ..... ..... @vvv | ||
957 | +vaddwod_q_du 0111 00000011 00111 ..... ..... ..... @vvv | ||
958 | +vsubwev_h_bu 0111 00000011 00000 ..... ..... ..... @vvv | ||
959 | +vsubwev_w_hu 0111 00000011 00001 ..... ..... ..... @vvv | ||
960 | +vsubwev_d_wu 0111 00000011 00010 ..... ..... ..... @vvv | ||
961 | +vsubwev_q_du 0111 00000011 00011 ..... ..... ..... @vvv | ||
962 | +vsubwod_h_bu 0111 00000011 01000 ..... ..... ..... @vvv | ||
963 | +vsubwod_w_hu 0111 00000011 01001 ..... ..... ..... @vvv | ||
964 | +vsubwod_d_wu 0111 00000011 01010 ..... ..... ..... @vvv | ||
965 | +vsubwod_q_du 0111 00000011 01011 ..... ..... ..... @vvv | ||
966 | + | ||
967 | +vaddwev_h_bu_b 0111 00000011 11100 ..... ..... ..... @vvv | ||
968 | +vaddwev_w_hu_h 0111 00000011 11101 ..... ..... ..... @vvv | ||
969 | +vaddwev_d_wu_w 0111 00000011 11110 ..... ..... ..... @vvv | ||
970 | +vaddwev_q_du_d 0111 00000011 11111 ..... ..... ..... @vvv | ||
971 | +vaddwod_h_bu_b 0111 00000100 00000 ..... ..... ..... @vvv | ||
972 | +vaddwod_w_hu_h 0111 00000100 00001 ..... ..... ..... @vvv | ||
973 | +vaddwod_d_wu_w 0111 00000100 00010 ..... ..... ..... @vvv | ||
974 | +vaddwod_q_du_d 0111 00000100 00011 ..... ..... ..... @vvv | ||
975 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
976 | index XXXXXXX..XXXXXXX 100644 | ||
977 | --- a/target/loongarch/lsx_helper.c | ||
978 | +++ b/target/loongarch/lsx_helper.c | ||
979 | @@ -XXX,XX +XXX,XX @@ void HELPER(vhsubw_qu_du)(CPULoongArchState *env, | ||
980 | Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
981 | int128_make64((uint64_t)Vk->D(0))); | ||
982 | } | ||
983 | + | ||
984 | +#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ | ||
985 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
986 | +{ \ | ||
987 | + int i; \ | ||
988 | + VReg *Vd = (VReg *)vd; \ | ||
989 | + VReg *Vj = (VReg *)vj; \ | ||
990 | + VReg *Vk = (VReg *)vk; \ | ||
991 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
992 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
993 | + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ | ||
994 | + } \ | ||
995 | +} | ||
996 | + | ||
997 | +#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ | ||
998 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
999 | +{ \ | ||
1000 | + int i; \ | ||
1001 | + VReg *Vd = (VReg *)vd; \ | ||
1002 | + VReg *Vj = (VReg *)vj; \ | ||
1003 | + VReg *Vk = (VReg *)vk; \ | ||
1004 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
1005 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
1006 | + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ | ||
1007 | + } \ | ||
1008 | +} | ||
1009 | + | ||
1010 | +void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1011 | +{ | ||
1012 | + VReg *Vd = (VReg *)vd; | ||
1013 | + VReg *Vj = (VReg *)vj; | ||
1014 | + VReg *Vk = (VReg *)vk; | ||
1015 | + | ||
1016 | + Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); | ||
1017 | +} | ||
1018 | + | ||
1019 | +DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) | ||
1020 | +DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) | ||
1021 | +DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) | ||
1022 | + | ||
1023 | +void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1024 | +{ | ||
1025 | + VReg *Vd = (VReg *)vd; | ||
1026 | + VReg *Vj = (VReg *)vj; | ||
1027 | + VReg *Vk = (VReg *)vk; | ||
1028 | + | ||
1029 | + Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); | ||
1030 | +} | ||
1031 | + | ||
1032 | +DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) | ||
1033 | +DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) | ||
1034 | +DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) | ||
1035 | + | ||
1036 | +void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1037 | +{ | ||
1038 | + VReg *Vd = (VReg *)vd; | ||
1039 | + VReg *Vj = (VReg *)vj; | ||
1040 | + VReg *Vk = (VReg *)vk; | ||
1041 | + | ||
1042 | + Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); | ||
1043 | +} | ||
1044 | + | ||
1045 | +DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) | ||
1046 | +DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) | ||
1047 | +DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) | ||
1048 | + | ||
1049 | +void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1050 | +{ | ||
1051 | + VReg *Vd = (VReg *)vd; | ||
1052 | + VReg *Vj = (VReg *)vj; | ||
1053 | + VReg *Vk = (VReg *)vk; | ||
1054 | + | ||
1055 | + Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); | ||
1056 | +} | ||
1057 | + | ||
1058 | +DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) | ||
1059 | +DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) | ||
1060 | +DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) | ||
1061 | + | ||
1062 | +void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
1063 | +{ | ||
1064 | + VReg *Vd = (VReg *)vd; | ||
1065 | + VReg *Vj = (VReg *)vj; | ||
1066 | + VReg *Vk = (VReg *)vk; | ||
1067 | + | ||
1068 | + Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), | ||
1069 | + int128_make64((uint64_t)Vk->D(0))); | ||
1070 | +} | ||
1071 | + | ||
1072 | +DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) | ||
1073 | +DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) | ||
1074 | +DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) | ||
1075 | + | ||
1076 | +void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
1077 | +{ | ||
1078 | + VReg *Vd = (VReg *)vd; | ||
1079 | + VReg *Vj = (VReg *)vj; | ||
1080 | + VReg *Vk = (VReg *)vk; | ||
1081 | + | ||
1082 | + Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
1083 | + int128_make64((uint64_t)Vk->D(1))); | ||
1084 | +} | ||
1085 | + | ||
1086 | +DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) | ||
1087 | +DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) | ||
1088 | +DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) | ||
1089 | + | ||
1090 | +void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
1091 | +{ | ||
1092 | + VReg *Vd = (VReg *)vd; | ||
1093 | + VReg *Vj = (VReg *)vj; | ||
1094 | + VReg *Vk = (VReg *)vk; | ||
1095 | + | ||
1096 | + Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)), | ||
1097 | + int128_make64((uint64_t)Vk->D(0))); | ||
1098 | +} | ||
1099 | + | ||
1100 | +DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) | ||
1101 | +DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) | ||
1102 | +DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) | ||
1103 | + | ||
1104 | +void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
1105 | +{ | ||
1106 | + VReg *Vd = (VReg *)vd; | ||
1107 | + VReg *Vj = (VReg *)vj; | ||
1108 | + VReg *Vk = (VReg *)vk; | ||
1109 | + | ||
1110 | + Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), | ||
1111 | + int128_make64((uint64_t)Vk->D(1))); | ||
1112 | +} | ||
1113 | + | ||
1114 | +DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) | ||
1115 | +DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) | ||
1116 | +DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) | ||
1117 | + | ||
1118 | +#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
1119 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
1120 | +{ \ | ||
1121 | + int i; \ | ||
1122 | + VReg *Vd = (VReg *)vd; \ | ||
1123 | + VReg *Vj = (VReg *)vj; \ | ||
1124 | + VReg *Vk = (VReg *)vk; \ | ||
1125 | + typedef __typeof(Vd->ES1(0)) TDS; \ | ||
1126 | + typedef __typeof(Vd->EU1(0)) TDU; \ | ||
1127 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
1128 | + Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ | ||
1129 | + } \ | ||
1130 | +} | ||
1131 | + | ||
1132 | +#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
1133 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
1134 | +{ \ | ||
1135 | + int i; \ | ||
1136 | + VReg *Vd = (VReg *)vd; \ | ||
1137 | + VReg *Vj = (VReg *)vj; \ | ||
1138 | + VReg *Vk = (VReg *)vk; \ | ||
1139 | + typedef __typeof(Vd->ES1(0)) TDS; \ | ||
1140 | + typedef __typeof(Vd->EU1(0)) TDU; \ | ||
1141 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
1142 | + Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ | ||
1143 | + } \ | ||
1144 | +} | ||
1145 | + | ||
1146 | +void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1147 | +{ | ||
1148 | + VReg *Vd = (VReg *)vd; | ||
1149 | + VReg *Vj = (VReg *)vj; | ||
1150 | + VReg *Vk = (VReg *)vk; | ||
1151 | + | ||
1152 | + Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), | ||
1153 | + int128_makes64(Vk->D(0))); | ||
1154 | +} | ||
1155 | + | ||
1156 | +DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
1157 | +DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) | ||
1158 | +DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
1159 | + | ||
1160 | +void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
1161 | +{ | ||
1162 | + VReg *Vd = (VReg *)vd; | ||
1163 | + VReg *Vj = (VReg *)vj; | ||
1164 | + VReg *Vk = (VReg *)vk; | ||
1165 | + | ||
1166 | + Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), | ||
1167 | + int128_makes64(Vk->D(1))); | ||
1168 | +} | ||
1169 | + | ||
1170 | +DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
1171 | +DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) | ||
1172 | +DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
1173 | -- | ||
1174 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VAVG.{B/H/W/D}[U]; | ||
3 | - VAVGR.{B/H/W/D}[U]. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-11-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 17 ++ | ||
10 | target/loongarch/helper.h | 18 ++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 197 ++++++++++++++++++++ | ||
12 | target/loongarch/insns.decode | 17 ++ | ||
13 | target/loongarch/lsx_helper.c | 32 ++++ | ||
14 | 5 files changed, 281 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vaddwod_h_bu_b, vvv) | ||
21 | INSN_LSX(vaddwod_w_hu_h, vvv) | ||
22 | INSN_LSX(vaddwod_d_wu_w, vvv) | ||
23 | INSN_LSX(vaddwod_q_du_d, vvv) | ||
24 | + | ||
25 | +INSN_LSX(vavg_b, vvv) | ||
26 | +INSN_LSX(vavg_h, vvv) | ||
27 | +INSN_LSX(vavg_w, vvv) | ||
28 | +INSN_LSX(vavg_d, vvv) | ||
29 | +INSN_LSX(vavg_bu, vvv) | ||
30 | +INSN_LSX(vavg_hu, vvv) | ||
31 | +INSN_LSX(vavg_wu, vvv) | ||
32 | +INSN_LSX(vavg_du, vvv) | ||
33 | +INSN_LSX(vavgr_b, vvv) | ||
34 | +INSN_LSX(vavgr_h, vvv) | ||
35 | +INSN_LSX(vavgr_w, vvv) | ||
36 | +INSN_LSX(vavgr_d, vvv) | ||
37 | +INSN_LSX(vavgr_bu, vvv) | ||
38 | +INSN_LSX(vavgr_hu, vvv) | ||
39 | +INSN_LSX(vavgr_wu, vvv) | ||
40 | +INSN_LSX(vavgr_du, vvv) | ||
41 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/helper.h | ||
44 | +++ b/target/loongarch/helper.h | ||
45 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
46 | DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
47 | DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
48 | DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
49 | + | ||
50 | +DEF_HELPER_FLAGS_4(vavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
51 | +DEF_HELPER_FLAGS_4(vavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
52 | +DEF_HELPER_FLAGS_4(vavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
53 | +DEF_HELPER_FLAGS_4(vavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
54 | +DEF_HELPER_FLAGS_4(vavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
55 | +DEF_HELPER_FLAGS_4(vavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
56 | +DEF_HELPER_FLAGS_4(vavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
57 | +DEF_HELPER_FLAGS_4(vavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
58 | + | ||
59 | +DEF_HELPER_FLAGS_4(vavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
60 | +DEF_HELPER_FLAGS_4(vavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
61 | +DEF_HELPER_FLAGS_4(vavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
62 | +DEF_HELPER_FLAGS_4(vavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
63 | +DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
64 | +DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
65 | +DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
66 | +DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
67 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
70 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
71 | @@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s) | ||
72 | TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s) | ||
73 | TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s) | ||
74 | TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s) | ||
75 | + | ||
76 | +static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, | ||
77 | + void (*gen_shr_vec)(unsigned, TCGv_vec, | ||
78 | + TCGv_vec, int64_t), | ||
79 | + void (*gen_round_vec)(unsigned, TCGv_vec, | ||
80 | + TCGv_vec, TCGv_vec)) | ||
81 | +{ | ||
82 | + TCGv_vec tmp = tcg_temp_new_vec_matching(t); | ||
83 | + gen_round_vec(vece, tmp, a, b); | ||
84 | + tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1)); | ||
85 | + gen_shr_vec(vece, a, a, 1); | ||
86 | + gen_shr_vec(vece, b, b, 1); | ||
87 | + tcg_gen_add_vec(vece, t, a, b); | ||
88 | + tcg_gen_add_vec(vece, t, t, tmp); | ||
89 | +} | ||
90 | + | ||
91 | +static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
92 | +{ | ||
93 | + do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec); | ||
94 | +} | ||
95 | + | ||
96 | +static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
97 | +{ | ||
98 | + do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec); | ||
99 | +} | ||
100 | + | ||
101 | +static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
102 | +{ | ||
103 | + do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec); | ||
104 | +} | ||
105 | + | ||
106 | +static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
107 | +{ | ||
108 | + do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec); | ||
109 | +} | ||
110 | + | ||
111 | +static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
112 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
113 | +{ | ||
114 | + static const TCGOpcode vecop_list[] = { | ||
115 | + INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
116 | + }; | ||
117 | + static const GVecGen3 op[4] = { | ||
118 | + { | ||
119 | + .fniv = gen_vavg_s, | ||
120 | + .fno = gen_helper_vavg_b, | ||
121 | + .opt_opc = vecop_list, | ||
122 | + .vece = MO_8 | ||
123 | + }, | ||
124 | + { | ||
125 | + .fniv = gen_vavg_s, | ||
126 | + .fno = gen_helper_vavg_h, | ||
127 | + .opt_opc = vecop_list, | ||
128 | + .vece = MO_16 | ||
129 | + }, | ||
130 | + { | ||
131 | + .fniv = gen_vavg_s, | ||
132 | + .fno = gen_helper_vavg_w, | ||
133 | + .opt_opc = vecop_list, | ||
134 | + .vece = MO_32 | ||
135 | + }, | ||
136 | + { | ||
137 | + .fniv = gen_vavg_s, | ||
138 | + .fno = gen_helper_vavg_d, | ||
139 | + .opt_opc = vecop_list, | ||
140 | + .vece = MO_64 | ||
141 | + }, | ||
142 | + }; | ||
143 | + | ||
144 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
145 | +} | ||
146 | + | ||
147 | +static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
148 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
149 | +{ | ||
150 | + static const TCGOpcode vecop_list[] = { | ||
151 | + INDEX_op_shri_vec, INDEX_op_add_vec, 0 | ||
152 | + }; | ||
153 | + static const GVecGen3 op[4] = { | ||
154 | + { | ||
155 | + .fniv = gen_vavg_u, | ||
156 | + .fno = gen_helper_vavg_bu, | ||
157 | + .opt_opc = vecop_list, | ||
158 | + .vece = MO_8 | ||
159 | + }, | ||
160 | + { | ||
161 | + .fniv = gen_vavg_u, | ||
162 | + .fno = gen_helper_vavg_hu, | ||
163 | + .opt_opc = vecop_list, | ||
164 | + .vece = MO_16 | ||
165 | + }, | ||
166 | + { | ||
167 | + .fniv = gen_vavg_u, | ||
168 | + .fno = gen_helper_vavg_wu, | ||
169 | + .opt_opc = vecop_list, | ||
170 | + .vece = MO_32 | ||
171 | + }, | ||
172 | + { | ||
173 | + .fniv = gen_vavg_u, | ||
174 | + .fno = gen_helper_vavg_du, | ||
175 | + .opt_opc = vecop_list, | ||
176 | + .vece = MO_64 | ||
177 | + }, | ||
178 | + }; | ||
179 | + | ||
180 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
181 | +} | ||
182 | + | ||
183 | +TRANS(vavg_b, gvec_vvv, MO_8, do_vavg_s) | ||
184 | +TRANS(vavg_h, gvec_vvv, MO_16, do_vavg_s) | ||
185 | +TRANS(vavg_w, gvec_vvv, MO_32, do_vavg_s) | ||
186 | +TRANS(vavg_d, gvec_vvv, MO_64, do_vavg_s) | ||
187 | +TRANS(vavg_bu, gvec_vvv, MO_8, do_vavg_u) | ||
188 | +TRANS(vavg_hu, gvec_vvv, MO_16, do_vavg_u) | ||
189 | +TRANS(vavg_wu, gvec_vvv, MO_32, do_vavg_u) | ||
190 | +TRANS(vavg_du, gvec_vvv, MO_64, do_vavg_u) | ||
191 | + | ||
192 | +static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
193 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
194 | +{ | ||
195 | + static const TCGOpcode vecop_list[] = { | ||
196 | + INDEX_op_sari_vec, INDEX_op_add_vec, 0 | ||
197 | + }; | ||
198 | + static const GVecGen3 op[4] = { | ||
199 | + { | ||
200 | + .fniv = gen_vavgr_s, | ||
201 | + .fno = gen_helper_vavgr_b, | ||
202 | + .opt_opc = vecop_list, | ||
203 | + .vece = MO_8 | ||
204 | + }, | ||
205 | + { | ||
206 | + .fniv = gen_vavgr_s, | ||
207 | + .fno = gen_helper_vavgr_h, | ||
208 | + .opt_opc = vecop_list, | ||
209 | + .vece = MO_16 | ||
210 | + }, | ||
211 | + { | ||
212 | + .fniv = gen_vavgr_s, | ||
213 | + .fno = gen_helper_vavgr_w, | ||
214 | + .opt_opc = vecop_list, | ||
215 | + .vece = MO_32 | ||
216 | + }, | ||
217 | + { | ||
218 | + .fniv = gen_vavgr_s, | ||
219 | + .fno = gen_helper_vavgr_d, | ||
220 | + .opt_opc = vecop_list, | ||
221 | + .vece = MO_64 | ||
222 | + }, | ||
223 | + }; | ||
224 | + | ||
225 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
226 | +} | ||
227 | + | ||
228 | +static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
229 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
230 | +{ | ||
231 | + static const TCGOpcode vecop_list[] = { | ||
232 | + INDEX_op_shri_vec, INDEX_op_add_vec, 0 | ||
233 | + }; | ||
234 | + static const GVecGen3 op[4] = { | ||
235 | + { | ||
236 | + .fniv = gen_vavgr_u, | ||
237 | + .fno = gen_helper_vavgr_bu, | ||
238 | + .opt_opc = vecop_list, | ||
239 | + .vece = MO_8 | ||
240 | + }, | ||
241 | + { | ||
242 | + .fniv = gen_vavgr_u, | ||
243 | + .fno = gen_helper_vavgr_hu, | ||
244 | + .opt_opc = vecop_list, | ||
245 | + .vece = MO_16 | ||
246 | + }, | ||
247 | + { | ||
248 | + .fniv = gen_vavgr_u, | ||
249 | + .fno = gen_helper_vavgr_wu, | ||
250 | + .opt_opc = vecop_list, | ||
251 | + .vece = MO_32 | ||
252 | + }, | ||
253 | + { | ||
254 | + .fniv = gen_vavgr_u, | ||
255 | + .fno = gen_helper_vavgr_du, | ||
256 | + .opt_opc = vecop_list, | ||
257 | + .vece = MO_64 | ||
258 | + }, | ||
259 | + }; | ||
260 | + | ||
261 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
262 | +} | ||
263 | + | ||
264 | +TRANS(vavgr_b, gvec_vvv, MO_8, do_vavgr_s) | ||
265 | +TRANS(vavgr_h, gvec_vvv, MO_16, do_vavgr_s) | ||
266 | +TRANS(vavgr_w, gvec_vvv, MO_32, do_vavgr_s) | ||
267 | +TRANS(vavgr_d, gvec_vvv, MO_64, do_vavgr_s) | ||
268 | +TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u) | ||
269 | +TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u) | ||
270 | +TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u) | ||
271 | +TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u) | ||
272 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
273 | index XXXXXXX..XXXXXXX 100644 | ||
274 | --- a/target/loongarch/insns.decode | ||
275 | +++ b/target/loongarch/insns.decode | ||
276 | @@ -XXX,XX +XXX,XX @@ vaddwod_h_bu_b 0111 00000100 00000 ..... ..... ..... @vvv | ||
277 | vaddwod_w_hu_h 0111 00000100 00001 ..... ..... ..... @vvv | ||
278 | vaddwod_d_wu_w 0111 00000100 00010 ..... ..... ..... @vvv | ||
279 | vaddwod_q_du_d 0111 00000100 00011 ..... ..... ..... @vvv | ||
280 | + | ||
281 | +vavg_b 0111 00000110 01000 ..... ..... ..... @vvv | ||
282 | +vavg_h 0111 00000110 01001 ..... ..... ..... @vvv | ||
283 | +vavg_w 0111 00000110 01010 ..... ..... ..... @vvv | ||
284 | +vavg_d 0111 00000110 01011 ..... ..... ..... @vvv | ||
285 | +vavg_bu 0111 00000110 01100 ..... ..... ..... @vvv | ||
286 | +vavg_hu 0111 00000110 01101 ..... ..... ..... @vvv | ||
287 | +vavg_wu 0111 00000110 01110 ..... ..... ..... @vvv | ||
288 | +vavg_du 0111 00000110 01111 ..... ..... ..... @vvv | ||
289 | +vavgr_b 0111 00000110 10000 ..... ..... ..... @vvv | ||
290 | +vavgr_h 0111 00000110 10001 ..... ..... ..... @vvv | ||
291 | +vavgr_w 0111 00000110 10010 ..... ..... ..... @vvv | ||
292 | +vavgr_d 0111 00000110 10011 ..... ..... ..... @vvv | ||
293 | +vavgr_bu 0111 00000110 10100 ..... ..... ..... @vvv | ||
294 | +vavgr_hu 0111 00000110 10101 ..... ..... ..... @vvv | ||
295 | +vavgr_wu 0111 00000110 10110 ..... ..... ..... @vvv | ||
296 | +vavgr_du 0111 00000110 10111 ..... ..... ..... @vvv | ||
297 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
298 | index XXXXXXX..XXXXXXX 100644 | ||
299 | --- a/target/loongarch/lsx_helper.c | ||
300 | +++ b/target/loongarch/lsx_helper.c | ||
301 | @@ -XXX,XX +XXX,XX @@ void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
302 | DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) | ||
303 | DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) | ||
304 | DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) | ||
305 | + | ||
306 | +#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) | ||
307 | +#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) | ||
308 | + | ||
309 | +#define DO_3OP(NAME, BIT, E, DO_OP) \ | ||
310 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
311 | +{ \ | ||
312 | + int i; \ | ||
313 | + VReg *Vd = (VReg *)vd; \ | ||
314 | + VReg *Vj = (VReg *)vj; \ | ||
315 | + VReg *Vk = (VReg *)vk; \ | ||
316 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
317 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
318 | + } \ | ||
319 | +} | ||
320 | + | ||
321 | +DO_3OP(vavg_b, 8, B, DO_VAVG) | ||
322 | +DO_3OP(vavg_h, 16, H, DO_VAVG) | ||
323 | +DO_3OP(vavg_w, 32, W, DO_VAVG) | ||
324 | +DO_3OP(vavg_d, 64, D, DO_VAVG) | ||
325 | +DO_3OP(vavgr_b, 8, B, DO_VAVGR) | ||
326 | +DO_3OP(vavgr_h, 16, H, DO_VAVGR) | ||
327 | +DO_3OP(vavgr_w, 32, W, DO_VAVGR) | ||
328 | +DO_3OP(vavgr_d, 64, D, DO_VAVGR) | ||
329 | +DO_3OP(vavg_bu, 8, UB, DO_VAVG) | ||
330 | +DO_3OP(vavg_hu, 16, UH, DO_VAVG) | ||
331 | +DO_3OP(vavg_wu, 32, UW, DO_VAVG) | ||
332 | +DO_3OP(vavg_du, 64, UD, DO_VAVG) | ||
333 | +DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) | ||
334 | +DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) | ||
335 | +DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) | ||
336 | +DO_3OP(vavgr_du, 64, UD, DO_VAVGR) | ||
337 | -- | ||
338 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VABSD.{B/H/W/D}[U]. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-12-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 9 ++ | ||
9 | target/loongarch/helper.h | 9 ++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 95 +++++++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 9 ++ | ||
12 | target/loongarch/lsx_helper.c | 11 +++ | ||
13 | 5 files changed, 133 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vavgr_bu, vvv) | ||
20 | INSN_LSX(vavgr_hu, vvv) | ||
21 | INSN_LSX(vavgr_wu, vvv) | ||
22 | INSN_LSX(vavgr_du, vvv) | ||
23 | + | ||
24 | +INSN_LSX(vabsd_b, vvv) | ||
25 | +INSN_LSX(vabsd_h, vvv) | ||
26 | +INSN_LSX(vabsd_w, vvv) | ||
27 | +INSN_LSX(vabsd_d, vvv) | ||
28 | +INSN_LSX(vabsd_bu, vvv) | ||
29 | +INSN_LSX(vabsd_hu, vvv) | ||
30 | +INSN_LSX(vabsd_wu, vvv) | ||
31 | +INSN_LSX(vabsd_du, vvv) | ||
32 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/loongarch/helper.h | ||
35 | +++ b/target/loongarch/helper.h | ||
36 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
37 | DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_4(vabsd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
42 | +DEF_HELPER_FLAGS_4(vabsd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
43 | +DEF_HELPER_FLAGS_4(vabsd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vabsd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
45 | +DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
46 | +DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
47 | +DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
48 | +DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
49 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
52 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
53 | @@ -XXX,XX +XXX,XX @@ TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u) | ||
54 | TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u) | ||
55 | TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u) | ||
56 | TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u) | ||
57 | + | ||
58 | +static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
59 | +{ | ||
60 | + tcg_gen_smax_vec(vece, t, a, b); | ||
61 | + tcg_gen_smin_vec(vece, a, a, b); | ||
62 | + tcg_gen_sub_vec(vece, t, t, a); | ||
63 | +} | ||
64 | + | ||
65 | +static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
66 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
67 | +{ | ||
68 | + static const TCGOpcode vecop_list[] = { | ||
69 | + INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0 | ||
70 | + }; | ||
71 | + static const GVecGen3 op[4] = { | ||
72 | + { | ||
73 | + .fniv = gen_vabsd_s, | ||
74 | + .fno = gen_helper_vabsd_b, | ||
75 | + .opt_opc = vecop_list, | ||
76 | + .vece = MO_8 | ||
77 | + }, | ||
78 | + { | ||
79 | + .fniv = gen_vabsd_s, | ||
80 | + .fno = gen_helper_vabsd_h, | ||
81 | + .opt_opc = vecop_list, | ||
82 | + .vece = MO_16 | ||
83 | + }, | ||
84 | + { | ||
85 | + .fniv = gen_vabsd_s, | ||
86 | + .fno = gen_helper_vabsd_w, | ||
87 | + .opt_opc = vecop_list, | ||
88 | + .vece = MO_32 | ||
89 | + }, | ||
90 | + { | ||
91 | + .fniv = gen_vabsd_s, | ||
92 | + .fno = gen_helper_vabsd_d, | ||
93 | + .opt_opc = vecop_list, | ||
94 | + .vece = MO_64 | ||
95 | + }, | ||
96 | + }; | ||
97 | + | ||
98 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
99 | +} | ||
100 | + | ||
101 | +static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
102 | +{ | ||
103 | + tcg_gen_umax_vec(vece, t, a, b); | ||
104 | + tcg_gen_umin_vec(vece, a, a, b); | ||
105 | + tcg_gen_sub_vec(vece, t, t, a); | ||
106 | +} | ||
107 | + | ||
108 | +static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
109 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
110 | +{ | ||
111 | + static const TCGOpcode vecop_list[] = { | ||
112 | + INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0 | ||
113 | + }; | ||
114 | + static const GVecGen3 op[4] = { | ||
115 | + { | ||
116 | + .fniv = gen_vabsd_u, | ||
117 | + .fno = gen_helper_vabsd_bu, | ||
118 | + .opt_opc = vecop_list, | ||
119 | + .vece = MO_8 | ||
120 | + }, | ||
121 | + { | ||
122 | + .fniv = gen_vabsd_u, | ||
123 | + .fno = gen_helper_vabsd_hu, | ||
124 | + .opt_opc = vecop_list, | ||
125 | + .vece = MO_16 | ||
126 | + }, | ||
127 | + { | ||
128 | + .fniv = gen_vabsd_u, | ||
129 | + .fno = gen_helper_vabsd_wu, | ||
130 | + .opt_opc = vecop_list, | ||
131 | + .vece = MO_32 | ||
132 | + }, | ||
133 | + { | ||
134 | + .fniv = gen_vabsd_u, | ||
135 | + .fno = gen_helper_vabsd_du, | ||
136 | + .opt_opc = vecop_list, | ||
137 | + .vece = MO_64 | ||
138 | + }, | ||
139 | + }; | ||
140 | + | ||
141 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
142 | +} | ||
143 | + | ||
144 | +TRANS(vabsd_b, gvec_vvv, MO_8, do_vabsd_s) | ||
145 | +TRANS(vabsd_h, gvec_vvv, MO_16, do_vabsd_s) | ||
146 | +TRANS(vabsd_w, gvec_vvv, MO_32, do_vabsd_s) | ||
147 | +TRANS(vabsd_d, gvec_vvv, MO_64, do_vabsd_s) | ||
148 | +TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u) | ||
149 | +TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u) | ||
150 | +TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u) | ||
151 | +TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u) | ||
152 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/target/loongarch/insns.decode | ||
155 | +++ b/target/loongarch/insns.decode | ||
156 | @@ -XXX,XX +XXX,XX @@ vavgr_bu 0111 00000110 10100 ..... ..... ..... @vvv | ||
157 | vavgr_hu 0111 00000110 10101 ..... ..... ..... @vvv | ||
158 | vavgr_wu 0111 00000110 10110 ..... ..... ..... @vvv | ||
159 | vavgr_du 0111 00000110 10111 ..... ..... ..... @vvv | ||
160 | + | ||
161 | +vabsd_b 0111 00000110 00000 ..... ..... ..... @vvv | ||
162 | +vabsd_h 0111 00000110 00001 ..... ..... ..... @vvv | ||
163 | +vabsd_w 0111 00000110 00010 ..... ..... ..... @vvv | ||
164 | +vabsd_d 0111 00000110 00011 ..... ..... ..... @vvv | ||
165 | +vabsd_bu 0111 00000110 00100 ..... ..... ..... @vvv | ||
166 | +vabsd_hu 0111 00000110 00101 ..... ..... ..... @vvv | ||
167 | +vabsd_wu 0111 00000110 00110 ..... ..... ..... @vvv | ||
168 | +vabsd_du 0111 00000110 00111 ..... ..... ..... @vvv | ||
169 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
170 | index XXXXXXX..XXXXXXX 100644 | ||
171 | --- a/target/loongarch/lsx_helper.c | ||
172 | +++ b/target/loongarch/lsx_helper.c | ||
173 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) | ||
174 | DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) | ||
175 | DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) | ||
176 | DO_3OP(vavgr_du, 64, UD, DO_VAVGR) | ||
177 | + | ||
178 | +#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) | ||
179 | + | ||
180 | +DO_3OP(vabsd_b, 8, B, DO_VABSD) | ||
181 | +DO_3OP(vabsd_h, 16, H, DO_VABSD) | ||
182 | +DO_3OP(vabsd_w, 32, W, DO_VABSD) | ||
183 | +DO_3OP(vabsd_d, 64, D, DO_VABSD) | ||
184 | +DO_3OP(vabsd_bu, 8, UB, DO_VABSD) | ||
185 | +DO_3OP(vabsd_hu, 16, UH, DO_VABSD) | ||
186 | +DO_3OP(vabsd_wu, 32, UW, DO_VABSD) | ||
187 | +DO_3OP(vabsd_du, 64, UD, DO_VABSD) | ||
188 | -- | ||
189 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VADDA.{B/H/W/D}. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-13-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 5 ++ | ||
9 | target/loongarch/helper.h | 5 ++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 53 +++++++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 5 ++ | ||
12 | target/loongarch/lsx_helper.c | 19 ++++++++ | ||
13 | 5 files changed, 87 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vabsd_bu, vvv) | ||
20 | INSN_LSX(vabsd_hu, vvv) | ||
21 | INSN_LSX(vabsd_wu, vvv) | ||
22 | INSN_LSX(vabsd_du, vvv) | ||
23 | + | ||
24 | +INSN_LSX(vadda_b, vvv) | ||
25 | +INSN_LSX(vadda_h, vvv) | ||
26 | +INSN_LSX(vadda_w, vvv) | ||
27 | +INSN_LSX(vadda_d, vvv) | ||
28 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/loongarch/helper.h | ||
31 | +++ b/target/loongarch/helper.h | ||
32 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
33 | DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
34 | DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
35 | DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
36 | + | ||
37 | +DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | +DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | +DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | +DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
41 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
44 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
45 | @@ -XXX,XX +XXX,XX @@ TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u) | ||
46 | TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u) | ||
47 | TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u) | ||
48 | TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u) | ||
49 | + | ||
50 | +static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
51 | +{ | ||
52 | + TCGv_vec t1, t2; | ||
53 | + | ||
54 | + t1 = tcg_temp_new_vec_matching(a); | ||
55 | + t2 = tcg_temp_new_vec_matching(b); | ||
56 | + | ||
57 | + tcg_gen_abs_vec(vece, t1, a); | ||
58 | + tcg_gen_abs_vec(vece, t2, b); | ||
59 | + tcg_gen_add_vec(vece, t, t1, t2); | ||
60 | +} | ||
61 | + | ||
62 | +static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
63 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
64 | +{ | ||
65 | + static const TCGOpcode vecop_list[] = { | ||
66 | + INDEX_op_abs_vec, INDEX_op_add_vec, 0 | ||
67 | + }; | ||
68 | + static const GVecGen3 op[4] = { | ||
69 | + { | ||
70 | + .fniv = gen_vadda, | ||
71 | + .fno = gen_helper_vadda_b, | ||
72 | + .opt_opc = vecop_list, | ||
73 | + .vece = MO_8 | ||
74 | + }, | ||
75 | + { | ||
76 | + .fniv = gen_vadda, | ||
77 | + .fno = gen_helper_vadda_h, | ||
78 | + .opt_opc = vecop_list, | ||
79 | + .vece = MO_16 | ||
80 | + }, | ||
81 | + { | ||
82 | + .fniv = gen_vadda, | ||
83 | + .fno = gen_helper_vadda_w, | ||
84 | + .opt_opc = vecop_list, | ||
85 | + .vece = MO_32 | ||
86 | + }, | ||
87 | + { | ||
88 | + .fniv = gen_vadda, | ||
89 | + .fno = gen_helper_vadda_d, | ||
90 | + .opt_opc = vecop_list, | ||
91 | + .vece = MO_64 | ||
92 | + }, | ||
93 | + }; | ||
94 | + | ||
95 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
96 | +} | ||
97 | + | ||
98 | +TRANS(vadda_b, gvec_vvv, MO_8, do_vadda) | ||
99 | +TRANS(vadda_h, gvec_vvv, MO_16, do_vadda) | ||
100 | +TRANS(vadda_w, gvec_vvv, MO_32, do_vadda) | ||
101 | +TRANS(vadda_d, gvec_vvv, MO_64, do_vadda) | ||
102 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/loongarch/insns.decode | ||
105 | +++ b/target/loongarch/insns.decode | ||
106 | @@ -XXX,XX +XXX,XX @@ vabsd_bu 0111 00000110 00100 ..... ..... ..... @vvv | ||
107 | vabsd_hu 0111 00000110 00101 ..... ..... ..... @vvv | ||
108 | vabsd_wu 0111 00000110 00110 ..... ..... ..... @vvv | ||
109 | vabsd_du 0111 00000110 00111 ..... ..... ..... @vvv | ||
110 | + | ||
111 | +vadda_b 0111 00000101 11000 ..... ..... ..... @vvv | ||
112 | +vadda_h 0111 00000101 11001 ..... ..... ..... @vvv | ||
113 | +vadda_w 0111 00000101 11010 ..... ..... ..... @vvv | ||
114 | +vadda_d 0111 00000101 11011 ..... ..... ..... @vvv | ||
115 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/target/loongarch/lsx_helper.c | ||
118 | +++ b/target/loongarch/lsx_helper.c | ||
119 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_bu, 8, UB, DO_VABSD) | ||
120 | DO_3OP(vabsd_hu, 16, UH, DO_VABSD) | ||
121 | DO_3OP(vabsd_wu, 32, UW, DO_VABSD) | ||
122 | DO_3OP(vabsd_du, 64, UD, DO_VABSD) | ||
123 | + | ||
124 | +#define DO_VABS(a) ((a < 0) ? (-a) : (a)) | ||
125 | + | ||
126 | +#define DO_VADDA(NAME, BIT, E, DO_OP) \ | ||
127 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
128 | +{ \ | ||
129 | + int i; \ | ||
130 | + VReg *Vd = (VReg *)vd; \ | ||
131 | + VReg *Vj = (VReg *)vj; \ | ||
132 | + VReg *Vk = (VReg *)vk; \ | ||
133 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
134 | + Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \ | ||
135 | + } \ | ||
136 | +} | ||
137 | + | ||
138 | +DO_VADDA(vadda_b, 8, B, DO_VABS) | ||
139 | +DO_VADDA(vadda_h, 16, H, DO_VABS) | ||
140 | +DO_VADDA(vadda_w, 32, W, DO_VABS) | ||
141 | +DO_VADDA(vadda_d, 64, D, DO_VABS) | ||
142 | -- | ||
143 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VMAX[I].{B/H/W/D}[U]; | ||
3 | - VMIN[I].{B/H/W/D}[U]. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-14-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 33 ++++ | ||
10 | target/loongarch/helper.h | 18 ++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 200 ++++++++++++++++++++ | ||
12 | target/loongarch/insns.decode | 35 ++++ | ||
13 | target/loongarch/lsx_helper.c | 33 ++++ | ||
14 | 5 files changed, 319 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vadda_b, vvv) | ||
21 | INSN_LSX(vadda_h, vvv) | ||
22 | INSN_LSX(vadda_w, vvv) | ||
23 | INSN_LSX(vadda_d, vvv) | ||
24 | + | ||
25 | +INSN_LSX(vmax_b, vvv) | ||
26 | +INSN_LSX(vmax_h, vvv) | ||
27 | +INSN_LSX(vmax_w, vvv) | ||
28 | +INSN_LSX(vmax_d, vvv) | ||
29 | +INSN_LSX(vmin_b, vvv) | ||
30 | +INSN_LSX(vmin_h, vvv) | ||
31 | +INSN_LSX(vmin_w, vvv) | ||
32 | +INSN_LSX(vmin_d, vvv) | ||
33 | +INSN_LSX(vmax_bu, vvv) | ||
34 | +INSN_LSX(vmax_hu, vvv) | ||
35 | +INSN_LSX(vmax_wu, vvv) | ||
36 | +INSN_LSX(vmax_du, vvv) | ||
37 | +INSN_LSX(vmin_bu, vvv) | ||
38 | +INSN_LSX(vmin_hu, vvv) | ||
39 | +INSN_LSX(vmin_wu, vvv) | ||
40 | +INSN_LSX(vmin_du, vvv) | ||
41 | +INSN_LSX(vmaxi_b, vv_i) | ||
42 | +INSN_LSX(vmaxi_h, vv_i) | ||
43 | +INSN_LSX(vmaxi_w, vv_i) | ||
44 | +INSN_LSX(vmaxi_d, vv_i) | ||
45 | +INSN_LSX(vmini_b, vv_i) | ||
46 | +INSN_LSX(vmini_h, vv_i) | ||
47 | +INSN_LSX(vmini_w, vv_i) | ||
48 | +INSN_LSX(vmini_d, vv_i) | ||
49 | +INSN_LSX(vmaxi_bu, vv_i) | ||
50 | +INSN_LSX(vmaxi_hu, vv_i) | ||
51 | +INSN_LSX(vmaxi_wu, vv_i) | ||
52 | +INSN_LSX(vmaxi_du, vv_i) | ||
53 | +INSN_LSX(vmini_bu, vv_i) | ||
54 | +INSN_LSX(vmini_hu, vv_i) | ||
55 | +INSN_LSX(vmini_wu, vv_i) | ||
56 | +INSN_LSX(vmini_du, vv_i) | ||
57 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/loongarch/helper.h | ||
60 | +++ b/target/loongarch/helper.h | ||
61 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
62 | DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
63 | DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
64 | DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
65 | + | ||
66 | +DEF_HELPER_FLAGS_4(vmini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
67 | +DEF_HELPER_FLAGS_4(vmini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
68 | +DEF_HELPER_FLAGS_4(vmini_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
69 | +DEF_HELPER_FLAGS_4(vmini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
70 | +DEF_HELPER_FLAGS_4(vmini_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
71 | +DEF_HELPER_FLAGS_4(vmini_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
72 | +DEF_HELPER_FLAGS_4(vmini_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
73 | +DEF_HELPER_FLAGS_4(vmini_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
74 | + | ||
75 | +DEF_HELPER_FLAGS_4(vmaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
76 | +DEF_HELPER_FLAGS_4(vmaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
77 | +DEF_HELPER_FLAGS_4(vmaxi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vmaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
83 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
86 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
87 | @@ -XXX,XX +XXX,XX @@ TRANS(vadda_b, gvec_vvv, MO_8, do_vadda) | ||
88 | TRANS(vadda_h, gvec_vvv, MO_16, do_vadda) | ||
89 | TRANS(vadda_w, gvec_vvv, MO_32, do_vadda) | ||
90 | TRANS(vadda_d, gvec_vvv, MO_64, do_vadda) | ||
91 | + | ||
92 | +TRANS(vmax_b, gvec_vvv, MO_8, tcg_gen_gvec_smax) | ||
93 | +TRANS(vmax_h, gvec_vvv, MO_16, tcg_gen_gvec_smax) | ||
94 | +TRANS(vmax_w, gvec_vvv, MO_32, tcg_gen_gvec_smax) | ||
95 | +TRANS(vmax_d, gvec_vvv, MO_64, tcg_gen_gvec_smax) | ||
96 | +TRANS(vmax_bu, gvec_vvv, MO_8, tcg_gen_gvec_umax) | ||
97 | +TRANS(vmax_hu, gvec_vvv, MO_16, tcg_gen_gvec_umax) | ||
98 | +TRANS(vmax_wu, gvec_vvv, MO_32, tcg_gen_gvec_umax) | ||
99 | +TRANS(vmax_du, gvec_vvv, MO_64, tcg_gen_gvec_umax) | ||
100 | + | ||
101 | +TRANS(vmin_b, gvec_vvv, MO_8, tcg_gen_gvec_smin) | ||
102 | +TRANS(vmin_h, gvec_vvv, MO_16, tcg_gen_gvec_smin) | ||
103 | +TRANS(vmin_w, gvec_vvv, MO_32, tcg_gen_gvec_smin) | ||
104 | +TRANS(vmin_d, gvec_vvv, MO_64, tcg_gen_gvec_smin) | ||
105 | +TRANS(vmin_bu, gvec_vvv, MO_8, tcg_gen_gvec_umin) | ||
106 | +TRANS(vmin_hu, gvec_vvv, MO_16, tcg_gen_gvec_umin) | ||
107 | +TRANS(vmin_wu, gvec_vvv, MO_32, tcg_gen_gvec_umin) | ||
108 | +TRANS(vmin_du, gvec_vvv, MO_64, tcg_gen_gvec_umin) | ||
109 | + | ||
110 | +static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
111 | +{ | ||
112 | + tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
113 | +} | ||
114 | + | ||
115 | +static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
116 | +{ | ||
117 | + tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
118 | +} | ||
119 | + | ||
120 | +static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
121 | +{ | ||
122 | + tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
123 | +} | ||
124 | + | ||
125 | +static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
126 | +{ | ||
127 | + tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
128 | +} | ||
129 | + | ||
130 | +static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
131 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
132 | +{ | ||
133 | + static const TCGOpcode vecop_list[] = { | ||
134 | + INDEX_op_smin_vec, 0 | ||
135 | + }; | ||
136 | + static const GVecGen2i op[4] = { | ||
137 | + { | ||
138 | + .fniv = gen_vmini_s, | ||
139 | + .fnoi = gen_helper_vmini_b, | ||
140 | + .opt_opc = vecop_list, | ||
141 | + .vece = MO_8 | ||
142 | + }, | ||
143 | + { | ||
144 | + .fniv = gen_vmini_s, | ||
145 | + .fnoi = gen_helper_vmini_h, | ||
146 | + .opt_opc = vecop_list, | ||
147 | + .vece = MO_16 | ||
148 | + }, | ||
149 | + { | ||
150 | + .fniv = gen_vmini_s, | ||
151 | + .fnoi = gen_helper_vmini_w, | ||
152 | + .opt_opc = vecop_list, | ||
153 | + .vece = MO_32 | ||
154 | + }, | ||
155 | + { | ||
156 | + .fniv = gen_vmini_s, | ||
157 | + .fnoi = gen_helper_vmini_d, | ||
158 | + .opt_opc = vecop_list, | ||
159 | + .vece = MO_64 | ||
160 | + }, | ||
161 | + }; | ||
162 | + | ||
163 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
164 | +} | ||
165 | + | ||
166 | +static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
167 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
168 | +{ | ||
169 | + static const TCGOpcode vecop_list[] = { | ||
170 | + INDEX_op_umin_vec, 0 | ||
171 | + }; | ||
172 | + static const GVecGen2i op[4] = { | ||
173 | + { | ||
174 | + .fniv = gen_vmini_u, | ||
175 | + .fnoi = gen_helper_vmini_bu, | ||
176 | + .opt_opc = vecop_list, | ||
177 | + .vece = MO_8 | ||
178 | + }, | ||
179 | + { | ||
180 | + .fniv = gen_vmini_u, | ||
181 | + .fnoi = gen_helper_vmini_hu, | ||
182 | + .opt_opc = vecop_list, | ||
183 | + .vece = MO_16 | ||
184 | + }, | ||
185 | + { | ||
186 | + .fniv = gen_vmini_u, | ||
187 | + .fnoi = gen_helper_vmini_wu, | ||
188 | + .opt_opc = vecop_list, | ||
189 | + .vece = MO_32 | ||
190 | + }, | ||
191 | + { | ||
192 | + .fniv = gen_vmini_u, | ||
193 | + .fnoi = gen_helper_vmini_du, | ||
194 | + .opt_opc = vecop_list, | ||
195 | + .vece = MO_64 | ||
196 | + }, | ||
197 | + }; | ||
198 | + | ||
199 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
200 | +} | ||
201 | + | ||
202 | +TRANS(vmini_b, gvec_vv_i, MO_8, do_vmini_s) | ||
203 | +TRANS(vmini_h, gvec_vv_i, MO_16, do_vmini_s) | ||
204 | +TRANS(vmini_w, gvec_vv_i, MO_32, do_vmini_s) | ||
205 | +TRANS(vmini_d, gvec_vv_i, MO_64, do_vmini_s) | ||
206 | +TRANS(vmini_bu, gvec_vv_i, MO_8, do_vmini_u) | ||
207 | +TRANS(vmini_hu, gvec_vv_i, MO_16, do_vmini_u) | ||
208 | +TRANS(vmini_wu, gvec_vv_i, MO_32, do_vmini_u) | ||
209 | +TRANS(vmini_du, gvec_vv_i, MO_64, do_vmini_u) | ||
210 | + | ||
211 | +static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
212 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
213 | +{ | ||
214 | + static const TCGOpcode vecop_list[] = { | ||
215 | + INDEX_op_smax_vec, 0 | ||
216 | + }; | ||
217 | + static const GVecGen2i op[4] = { | ||
218 | + { | ||
219 | + .fniv = gen_vmaxi_s, | ||
220 | + .fnoi = gen_helper_vmaxi_b, | ||
221 | + .opt_opc = vecop_list, | ||
222 | + .vece = MO_8 | ||
223 | + }, | ||
224 | + { | ||
225 | + .fniv = gen_vmaxi_s, | ||
226 | + .fnoi = gen_helper_vmaxi_h, | ||
227 | + .opt_opc = vecop_list, | ||
228 | + .vece = MO_16 | ||
229 | + }, | ||
230 | + { | ||
231 | + .fniv = gen_vmaxi_s, | ||
232 | + .fnoi = gen_helper_vmaxi_w, | ||
233 | + .opt_opc = vecop_list, | ||
234 | + .vece = MO_32 | ||
235 | + }, | ||
236 | + { | ||
237 | + .fniv = gen_vmaxi_s, | ||
238 | + .fnoi = gen_helper_vmaxi_d, | ||
239 | + .opt_opc = vecop_list, | ||
240 | + .vece = MO_64 | ||
241 | + }, | ||
242 | + }; | ||
243 | + | ||
244 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
245 | +} | ||
246 | + | ||
247 | +static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
248 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
249 | +{ | ||
250 | + static const TCGOpcode vecop_list[] = { | ||
251 | + INDEX_op_umax_vec, 0 | ||
252 | + }; | ||
253 | + static const GVecGen2i op[4] = { | ||
254 | + { | ||
255 | + .fniv = gen_vmaxi_u, | ||
256 | + .fnoi = gen_helper_vmaxi_bu, | ||
257 | + .opt_opc = vecop_list, | ||
258 | + .vece = MO_8 | ||
259 | + }, | ||
260 | + { | ||
261 | + .fniv = gen_vmaxi_u, | ||
262 | + .fnoi = gen_helper_vmaxi_hu, | ||
263 | + .opt_opc = vecop_list, | ||
264 | + .vece = MO_16 | ||
265 | + }, | ||
266 | + { | ||
267 | + .fniv = gen_vmaxi_u, | ||
268 | + .fnoi = gen_helper_vmaxi_wu, | ||
269 | + .opt_opc = vecop_list, | ||
270 | + .vece = MO_32 | ||
271 | + }, | ||
272 | + { | ||
273 | + .fniv = gen_vmaxi_u, | ||
274 | + .fnoi = gen_helper_vmaxi_du, | ||
275 | + .opt_opc = vecop_list, | ||
276 | + .vece = MO_64 | ||
277 | + }, | ||
278 | + }; | ||
279 | + | ||
280 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
281 | +} | ||
282 | + | ||
283 | +TRANS(vmaxi_b, gvec_vv_i, MO_8, do_vmaxi_s) | ||
284 | +TRANS(vmaxi_h, gvec_vv_i, MO_16, do_vmaxi_s) | ||
285 | +TRANS(vmaxi_w, gvec_vv_i, MO_32, do_vmaxi_s) | ||
286 | +TRANS(vmaxi_d, gvec_vv_i, MO_64, do_vmaxi_s) | ||
287 | +TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u) | ||
288 | +TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u) | ||
289 | +TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u) | ||
290 | +TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u) | ||
291 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
292 | index XXXXXXX..XXXXXXX 100644 | ||
293 | --- a/target/loongarch/insns.decode | ||
294 | +++ b/target/loongarch/insns.decode | ||
295 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
296 | @vv .... ........ ..... ..... vj:5 vd:5 &vv | ||
297 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
298 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
299 | +@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
300 | |||
301 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
302 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
303 | @@ -XXX,XX +XXX,XX @@ vadda_b 0111 00000101 11000 ..... ..... ..... @vvv | ||
304 | vadda_h 0111 00000101 11001 ..... ..... ..... @vvv | ||
305 | vadda_w 0111 00000101 11010 ..... ..... ..... @vvv | ||
306 | vadda_d 0111 00000101 11011 ..... ..... ..... @vvv | ||
307 | + | ||
308 | +vmax_b 0111 00000111 00000 ..... ..... ..... @vvv | ||
309 | +vmax_h 0111 00000111 00001 ..... ..... ..... @vvv | ||
310 | +vmax_w 0111 00000111 00010 ..... ..... ..... @vvv | ||
311 | +vmax_d 0111 00000111 00011 ..... ..... ..... @vvv | ||
312 | +vmaxi_b 0111 00101001 00000 ..... ..... ..... @vv_i5 | ||
313 | +vmaxi_h 0111 00101001 00001 ..... ..... ..... @vv_i5 | ||
314 | +vmaxi_w 0111 00101001 00010 ..... ..... ..... @vv_i5 | ||
315 | +vmaxi_d 0111 00101001 00011 ..... ..... ..... @vv_i5 | ||
316 | +vmax_bu 0111 00000111 01000 ..... ..... ..... @vvv | ||
317 | +vmax_hu 0111 00000111 01001 ..... ..... ..... @vvv | ||
318 | +vmax_wu 0111 00000111 01010 ..... ..... ..... @vvv | ||
319 | +vmax_du 0111 00000111 01011 ..... ..... ..... @vvv | ||
320 | +vmaxi_bu 0111 00101001 01000 ..... ..... ..... @vv_ui5 | ||
321 | +vmaxi_hu 0111 00101001 01001 ..... ..... ..... @vv_ui5 | ||
322 | +vmaxi_wu 0111 00101001 01010 ..... ..... ..... @vv_ui5 | ||
323 | +vmaxi_du 0111 00101001 01011 ..... ..... ..... @vv_ui5 | ||
324 | + | ||
325 | +vmin_b 0111 00000111 00100 ..... ..... ..... @vvv | ||
326 | +vmin_h 0111 00000111 00101 ..... ..... ..... @vvv | ||
327 | +vmin_w 0111 00000111 00110 ..... ..... ..... @vvv | ||
328 | +vmin_d 0111 00000111 00111 ..... ..... ..... @vvv | ||
329 | +vmini_b 0111 00101001 00100 ..... ..... ..... @vv_i5 | ||
330 | +vmini_h 0111 00101001 00101 ..... ..... ..... @vv_i5 | ||
331 | +vmini_w 0111 00101001 00110 ..... ..... ..... @vv_i5 | ||
332 | +vmini_d 0111 00101001 00111 ..... ..... ..... @vv_i5 | ||
333 | +vmin_bu 0111 00000111 01100 ..... ..... ..... @vvv | ||
334 | +vmin_hu 0111 00000111 01101 ..... ..... ..... @vvv | ||
335 | +vmin_wu 0111 00000111 01110 ..... ..... ..... @vvv | ||
336 | +vmin_du 0111 00000111 01111 ..... ..... ..... @vvv | ||
337 | +vmini_bu 0111 00101001 01100 ..... ..... ..... @vv_ui5 | ||
338 | +vmini_hu 0111 00101001 01101 ..... ..... ..... @vv_ui5 | ||
339 | +vmini_wu 0111 00101001 01110 ..... ..... ..... @vv_ui5 | ||
340 | +vmini_du 0111 00101001 01111 ..... ..... ..... @vv_ui5 | ||
341 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
342 | index XXXXXXX..XXXXXXX 100644 | ||
343 | --- a/target/loongarch/lsx_helper.c | ||
344 | +++ b/target/loongarch/lsx_helper.c | ||
345 | @@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_b, 8, B, DO_VABS) | ||
346 | DO_VADDA(vadda_h, 16, H, DO_VABS) | ||
347 | DO_VADDA(vadda_w, 32, W, DO_VABS) | ||
348 | DO_VADDA(vadda_d, 64, D, DO_VABS) | ||
349 | + | ||
350 | +#define DO_MIN(a, b) (a < b ? a : b) | ||
351 | +#define DO_MAX(a, b) (a > b ? a : b) | ||
352 | + | ||
353 | +#define VMINMAXI(NAME, BIT, E, DO_OP) \ | ||
354 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
355 | +{ \ | ||
356 | + int i; \ | ||
357 | + VReg *Vd = (VReg *)vd; \ | ||
358 | + VReg *Vj = (VReg *)vj; \ | ||
359 | + typedef __typeof(Vd->E(0)) TD; \ | ||
360 | + \ | ||
361 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
362 | + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
363 | + } \ | ||
364 | +} | ||
365 | + | ||
366 | +VMINMAXI(vmini_b, 8, B, DO_MIN) | ||
367 | +VMINMAXI(vmini_h, 16, H, DO_MIN) | ||
368 | +VMINMAXI(vmini_w, 32, W, DO_MIN) | ||
369 | +VMINMAXI(vmini_d, 64, D, DO_MIN) | ||
370 | +VMINMAXI(vmaxi_b, 8, B, DO_MAX) | ||
371 | +VMINMAXI(vmaxi_h, 16, H, DO_MAX) | ||
372 | +VMINMAXI(vmaxi_w, 32, W, DO_MAX) | ||
373 | +VMINMAXI(vmaxi_d, 64, D, DO_MAX) | ||
374 | +VMINMAXI(vmini_bu, 8, UB, DO_MIN) | ||
375 | +VMINMAXI(vmini_hu, 16, UH, DO_MIN) | ||
376 | +VMINMAXI(vmini_wu, 32, UW, DO_MIN) | ||
377 | +VMINMAXI(vmini_du, 64, UD, DO_MIN) | ||
378 | +VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) | ||
379 | +VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) | ||
380 | +VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) | ||
381 | +VMINMAXI(vmaxi_du, 64, UD, DO_MAX) | ||
382 | -- | ||
383 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VMUL.{B/H/W/D}; | ||
3 | - VMUH.{B/H/W/D}[U]; | ||
4 | - VMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
5 | - VMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
6 | 1 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230504122810.4094787-15-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/disas.c | 38 ++ | ||
12 | target/loongarch/helper.h | 30 ++ | ||
13 | target/loongarch/insn_trans/trans_lsx.c.inc | 550 ++++++++++++++++++++ | ||
14 | target/loongarch/insns.decode | 38 ++ | ||
15 | target/loongarch/lsx_helper.c | 76 +++ | ||
16 | 5 files changed, 732 insertions(+) | ||
17 | |||
18 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/loongarch/disas.c | ||
21 | +++ b/target/loongarch/disas.c | ||
22 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmini_bu, vv_i) | ||
23 | INSN_LSX(vmini_hu, vv_i) | ||
24 | INSN_LSX(vmini_wu, vv_i) | ||
25 | INSN_LSX(vmini_du, vv_i) | ||
26 | + | ||
27 | +INSN_LSX(vmul_b, vvv) | ||
28 | +INSN_LSX(vmul_h, vvv) | ||
29 | +INSN_LSX(vmul_w, vvv) | ||
30 | +INSN_LSX(vmul_d, vvv) | ||
31 | +INSN_LSX(vmuh_b, vvv) | ||
32 | +INSN_LSX(vmuh_h, vvv) | ||
33 | +INSN_LSX(vmuh_w, vvv) | ||
34 | +INSN_LSX(vmuh_d, vvv) | ||
35 | +INSN_LSX(vmuh_bu, vvv) | ||
36 | +INSN_LSX(vmuh_hu, vvv) | ||
37 | +INSN_LSX(vmuh_wu, vvv) | ||
38 | +INSN_LSX(vmuh_du, vvv) | ||
39 | + | ||
40 | +INSN_LSX(vmulwev_h_b, vvv) | ||
41 | +INSN_LSX(vmulwev_w_h, vvv) | ||
42 | +INSN_LSX(vmulwev_d_w, vvv) | ||
43 | +INSN_LSX(vmulwev_q_d, vvv) | ||
44 | +INSN_LSX(vmulwod_h_b, vvv) | ||
45 | +INSN_LSX(vmulwod_w_h, vvv) | ||
46 | +INSN_LSX(vmulwod_d_w, vvv) | ||
47 | +INSN_LSX(vmulwod_q_d, vvv) | ||
48 | +INSN_LSX(vmulwev_h_bu, vvv) | ||
49 | +INSN_LSX(vmulwev_w_hu, vvv) | ||
50 | +INSN_LSX(vmulwev_d_wu, vvv) | ||
51 | +INSN_LSX(vmulwev_q_du, vvv) | ||
52 | +INSN_LSX(vmulwod_h_bu, vvv) | ||
53 | +INSN_LSX(vmulwod_w_hu, vvv) | ||
54 | +INSN_LSX(vmulwod_d_wu, vvv) | ||
55 | +INSN_LSX(vmulwod_q_du, vvv) | ||
56 | +INSN_LSX(vmulwev_h_bu_b, vvv) | ||
57 | +INSN_LSX(vmulwev_w_hu_h, vvv) | ||
58 | +INSN_LSX(vmulwev_d_wu_w, vvv) | ||
59 | +INSN_LSX(vmulwev_q_du_d, vvv) | ||
60 | +INSN_LSX(vmulwod_h_bu_b, vvv) | ||
61 | +INSN_LSX(vmulwod_w_hu_h, vvv) | ||
62 | +INSN_LSX(vmulwod_d_wu_w, vvv) | ||
63 | +INSN_LSX(vmulwod_q_du_d, vvv) | ||
64 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/target/loongarch/helper.h | ||
67 | +++ b/target/loongarch/helper.h | ||
68 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
69 | DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
70 | DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
71 | DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
72 | + | ||
73 | +DEF_HELPER_FLAGS_4(vmuh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
74 | +DEF_HELPER_FLAGS_4(vmuh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
75 | +DEF_HELPER_FLAGS_4(vmuh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
76 | +DEF_HELPER_FLAGS_4(vmuh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
77 | +DEF_HELPER_FLAGS_4(vmuh_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vmuh_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vmuh_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vmuh_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
81 | + | ||
82 | +DEF_HELPER_FLAGS_4(vmulwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vmulwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vmulwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
85 | +DEF_HELPER_FLAGS_4(vmulwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
86 | +DEF_HELPER_FLAGS_4(vmulwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vmulwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
88 | + | ||
89 | +DEF_HELPER_FLAGS_4(vmulwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
90 | +DEF_HELPER_FLAGS_4(vmulwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
91 | +DEF_HELPER_FLAGS_4(vmulwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
92 | +DEF_HELPER_FLAGS_4(vmulwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
93 | +DEF_HELPER_FLAGS_4(vmulwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
94 | +DEF_HELPER_FLAGS_4(vmulwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
95 | + | ||
96 | +DEF_HELPER_FLAGS_4(vmulwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vmulwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
98 | +DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
99 | +DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
100 | +DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
101 | +DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
102 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
105 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
106 | @@ -XXX,XX +XXX,XX @@ TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u) | ||
107 | TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u) | ||
108 | TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u) | ||
109 | TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u) | ||
110 | + | ||
111 | +TRANS(vmul_b, gvec_vvv, MO_8, tcg_gen_gvec_mul) | ||
112 | +TRANS(vmul_h, gvec_vvv, MO_16, tcg_gen_gvec_mul) | ||
113 | +TRANS(vmul_w, gvec_vvv, MO_32, tcg_gen_gvec_mul) | ||
114 | +TRANS(vmul_d, gvec_vvv, MO_64, tcg_gen_gvec_mul) | ||
115 | + | ||
116 | +static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
117 | +{ | ||
118 | + TCGv_i32 discard = tcg_temp_new_i32(); | ||
119 | + tcg_gen_muls2_i32(discard, t, a, b); | ||
120 | +} | ||
121 | + | ||
122 | +static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
123 | +{ | ||
124 | + TCGv_i64 discard = tcg_temp_new_i64(); | ||
125 | + tcg_gen_muls2_i64(discard, t, a, b); | ||
126 | +} | ||
127 | + | ||
128 | +static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
129 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
130 | +{ | ||
131 | + static const GVecGen3 op[4] = { | ||
132 | + { | ||
133 | + .fno = gen_helper_vmuh_b, | ||
134 | + .vece = MO_8 | ||
135 | + }, | ||
136 | + { | ||
137 | + .fno = gen_helper_vmuh_h, | ||
138 | + .vece = MO_16 | ||
139 | + }, | ||
140 | + { | ||
141 | + .fni4 = gen_vmuh_w, | ||
142 | + .fno = gen_helper_vmuh_w, | ||
143 | + .vece = MO_32 | ||
144 | + }, | ||
145 | + { | ||
146 | + .fni8 = gen_vmuh_d, | ||
147 | + .fno = gen_helper_vmuh_d, | ||
148 | + .vece = MO_64 | ||
149 | + }, | ||
150 | + }; | ||
151 | + | ||
152 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
153 | +} | ||
154 | + | ||
155 | +TRANS(vmuh_b, gvec_vvv, MO_8, do_vmuh_s) | ||
156 | +TRANS(vmuh_h, gvec_vvv, MO_16, do_vmuh_s) | ||
157 | +TRANS(vmuh_w, gvec_vvv, MO_32, do_vmuh_s) | ||
158 | +TRANS(vmuh_d, gvec_vvv, MO_64, do_vmuh_s) | ||
159 | + | ||
160 | +static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
161 | +{ | ||
162 | + TCGv_i32 discard = tcg_temp_new_i32(); | ||
163 | + tcg_gen_mulu2_i32(discard, t, a, b); | ||
164 | +} | ||
165 | + | ||
166 | +static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
167 | +{ | ||
168 | + TCGv_i64 discard = tcg_temp_new_i64(); | ||
169 | + tcg_gen_mulu2_i64(discard, t, a, b); | ||
170 | +} | ||
171 | + | ||
172 | +static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
173 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
174 | +{ | ||
175 | + static const GVecGen3 op[4] = { | ||
176 | + { | ||
177 | + .fno = gen_helper_vmuh_bu, | ||
178 | + .vece = MO_8 | ||
179 | + }, | ||
180 | + { | ||
181 | + .fno = gen_helper_vmuh_hu, | ||
182 | + .vece = MO_16 | ||
183 | + }, | ||
184 | + { | ||
185 | + .fni4 = gen_vmuh_wu, | ||
186 | + .fno = gen_helper_vmuh_wu, | ||
187 | + .vece = MO_32 | ||
188 | + }, | ||
189 | + { | ||
190 | + .fni8 = gen_vmuh_du, | ||
191 | + .fno = gen_helper_vmuh_du, | ||
192 | + .vece = MO_64 | ||
193 | + }, | ||
194 | + }; | ||
195 | + | ||
196 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
197 | +} | ||
198 | + | ||
199 | +TRANS(vmuh_bu, gvec_vvv, MO_8, do_vmuh_u) | ||
200 | +TRANS(vmuh_hu, gvec_vvv, MO_16, do_vmuh_u) | ||
201 | +TRANS(vmuh_wu, gvec_vvv, MO_32, do_vmuh_u) | ||
202 | +TRANS(vmuh_du, gvec_vvv, MO_64, do_vmuh_u) | ||
203 | + | ||
204 | +static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
205 | +{ | ||
206 | + TCGv_vec t1, t2; | ||
207 | + int halfbits = 4 << vece; | ||
208 | + | ||
209 | + t1 = tcg_temp_new_vec_matching(a); | ||
210 | + t2 = tcg_temp_new_vec_matching(b); | ||
211 | + tcg_gen_shli_vec(vece, t1, a, halfbits); | ||
212 | + tcg_gen_sari_vec(vece, t1, t1, halfbits); | ||
213 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
214 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
215 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
216 | +} | ||
217 | + | ||
218 | +static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
219 | +{ | ||
220 | + TCGv_i32 t1, t2; | ||
221 | + | ||
222 | + t1 = tcg_temp_new_i32(); | ||
223 | + t2 = tcg_temp_new_i32(); | ||
224 | + tcg_gen_ext16s_i32(t1, a); | ||
225 | + tcg_gen_ext16s_i32(t2, b); | ||
226 | + tcg_gen_mul_i32(t, t1, t2); | ||
227 | +} | ||
228 | + | ||
229 | +static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
230 | +{ | ||
231 | + TCGv_i64 t1, t2; | ||
232 | + | ||
233 | + t1 = tcg_temp_new_i64(); | ||
234 | + t2 = tcg_temp_new_i64(); | ||
235 | + tcg_gen_ext32s_i64(t1, a); | ||
236 | + tcg_gen_ext32s_i64(t2, b); | ||
237 | + tcg_gen_mul_i64(t, t1, t2); | ||
238 | +} | ||
239 | + | ||
240 | +static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
241 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
242 | +{ | ||
243 | + static const TCGOpcode vecop_list[] = { | ||
244 | + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 | ||
245 | + }; | ||
246 | + static const GVecGen3 op[3] = { | ||
247 | + { | ||
248 | + .fniv = gen_vmulwev_s, | ||
249 | + .fno = gen_helper_vmulwev_h_b, | ||
250 | + .opt_opc = vecop_list, | ||
251 | + .vece = MO_16 | ||
252 | + }, | ||
253 | + { | ||
254 | + .fni4 = gen_vmulwev_w_h, | ||
255 | + .fniv = gen_vmulwev_s, | ||
256 | + .fno = gen_helper_vmulwev_w_h, | ||
257 | + .opt_opc = vecop_list, | ||
258 | + .vece = MO_32 | ||
259 | + }, | ||
260 | + { | ||
261 | + .fni8 = gen_vmulwev_d_w, | ||
262 | + .fniv = gen_vmulwev_s, | ||
263 | + .fno = gen_helper_vmulwev_d_w, | ||
264 | + .opt_opc = vecop_list, | ||
265 | + .vece = MO_64 | ||
266 | + }, | ||
267 | + }; | ||
268 | + | ||
269 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
270 | +} | ||
271 | + | ||
272 | +TRANS(vmulwev_h_b, gvec_vvv, MO_8, do_vmulwev_s) | ||
273 | +TRANS(vmulwev_w_h, gvec_vvv, MO_16, do_vmulwev_s) | ||
274 | +TRANS(vmulwev_d_w, gvec_vvv, MO_32, do_vmulwev_s) | ||
275 | + | ||
276 | +static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh, | ||
277 | + TCGv_i64 arg1, TCGv_i64 arg2) | ||
278 | +{ | ||
279 | + tcg_gen_mulsu2_i64(rl, rh, arg2, arg1); | ||
280 | +} | ||
281 | + | ||
282 | +#define VMUL_Q(NAME, FN, idx1, idx2) \ | ||
283 | +static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \ | ||
284 | +{ \ | ||
285 | + TCGv_i64 rh, rl, arg1, arg2; \ | ||
286 | + \ | ||
287 | + rh = tcg_temp_new_i64(); \ | ||
288 | + rl = tcg_temp_new_i64(); \ | ||
289 | + arg1 = tcg_temp_new_i64(); \ | ||
290 | + arg2 = tcg_temp_new_i64(); \ | ||
291 | + \ | ||
292 | + get_vreg64(arg1, a->vj, idx1); \ | ||
293 | + get_vreg64(arg2, a->vk, idx2); \ | ||
294 | + \ | ||
295 | + tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \ | ||
296 | + \ | ||
297 | + set_vreg64(rh, a->vd, 1); \ | ||
298 | + set_vreg64(rl, a->vd, 0); \ | ||
299 | + \ | ||
300 | + return true; \ | ||
301 | +} | ||
302 | + | ||
303 | +VMUL_Q(vmulwev_q_d, muls2, 0, 0) | ||
304 | +VMUL_Q(vmulwod_q_d, muls2, 1, 1) | ||
305 | +VMUL_Q(vmulwev_q_du, mulu2, 0, 0) | ||
306 | +VMUL_Q(vmulwod_q_du, mulu2, 1, 1) | ||
307 | +VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0) | ||
308 | +VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1) | ||
309 | + | ||
310 | +static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
311 | +{ | ||
312 | + TCGv_vec t1, t2; | ||
313 | + int halfbits = 4 << vece; | ||
314 | + | ||
315 | + t1 = tcg_temp_new_vec_matching(a); | ||
316 | + t2 = tcg_temp_new_vec_matching(b); | ||
317 | + tcg_gen_sari_vec(vece, t1, a, halfbits); | ||
318 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
319 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
320 | +} | ||
321 | + | ||
322 | +static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
323 | +{ | ||
324 | + TCGv_i32 t1, t2; | ||
325 | + | ||
326 | + t1 = tcg_temp_new_i32(); | ||
327 | + t2 = tcg_temp_new_i32(); | ||
328 | + tcg_gen_sari_i32(t1, a, 16); | ||
329 | + tcg_gen_sari_i32(t2, b, 16); | ||
330 | + tcg_gen_mul_i32(t, t1, t2); | ||
331 | +} | ||
332 | + | ||
333 | +static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
334 | +{ | ||
335 | + TCGv_i64 t1, t2; | ||
336 | + | ||
337 | + t1 = tcg_temp_new_i64(); | ||
338 | + t2 = tcg_temp_new_i64(); | ||
339 | + tcg_gen_sari_i64(t1, a, 32); | ||
340 | + tcg_gen_sari_i64(t2, b, 32); | ||
341 | + tcg_gen_mul_i64(t, t1, t2); | ||
342 | +} | ||
343 | + | ||
344 | +static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
345 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
346 | +{ | ||
347 | + static const TCGOpcode vecop_list[] = { | ||
348 | + INDEX_op_sari_vec, INDEX_op_mul_vec, 0 | ||
349 | + }; | ||
350 | + static const GVecGen3 op[3] = { | ||
351 | + { | ||
352 | + .fniv = gen_vmulwod_s, | ||
353 | + .fno = gen_helper_vmulwod_h_b, | ||
354 | + .opt_opc = vecop_list, | ||
355 | + .vece = MO_16 | ||
356 | + }, | ||
357 | + { | ||
358 | + .fni4 = gen_vmulwod_w_h, | ||
359 | + .fniv = gen_vmulwod_s, | ||
360 | + .fno = gen_helper_vmulwod_w_h, | ||
361 | + .opt_opc = vecop_list, | ||
362 | + .vece = MO_32 | ||
363 | + }, | ||
364 | + { | ||
365 | + .fni8 = gen_vmulwod_d_w, | ||
366 | + .fniv = gen_vmulwod_s, | ||
367 | + .fno = gen_helper_vmulwod_d_w, | ||
368 | + .opt_opc = vecop_list, | ||
369 | + .vece = MO_64 | ||
370 | + }, | ||
371 | + }; | ||
372 | + | ||
373 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
374 | +} | ||
375 | + | ||
376 | +TRANS(vmulwod_h_b, gvec_vvv, MO_8, do_vmulwod_s) | ||
377 | +TRANS(vmulwod_w_h, gvec_vvv, MO_16, do_vmulwod_s) | ||
378 | +TRANS(vmulwod_d_w, gvec_vvv, MO_32, do_vmulwod_s) | ||
379 | + | ||
380 | +static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
381 | +{ | ||
382 | + TCGv_vec t1, t2, mask; | ||
383 | + | ||
384 | + t1 = tcg_temp_new_vec_matching(a); | ||
385 | + t2 = tcg_temp_new_vec_matching(b); | ||
386 | + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
387 | + tcg_gen_and_vec(vece, t1, a, mask); | ||
388 | + tcg_gen_and_vec(vece, t2, b, mask); | ||
389 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
390 | +} | ||
391 | + | ||
392 | +static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
393 | +{ | ||
394 | + TCGv_i32 t1, t2; | ||
395 | + | ||
396 | + t1 = tcg_temp_new_i32(); | ||
397 | + t2 = tcg_temp_new_i32(); | ||
398 | + tcg_gen_ext16u_i32(t1, a); | ||
399 | + tcg_gen_ext16u_i32(t2, b); | ||
400 | + tcg_gen_mul_i32(t, t1, t2); | ||
401 | +} | ||
402 | + | ||
403 | +static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
404 | +{ | ||
405 | + TCGv_i64 t1, t2; | ||
406 | + | ||
407 | + t1 = tcg_temp_new_i64(); | ||
408 | + t2 = tcg_temp_new_i64(); | ||
409 | + tcg_gen_ext32u_i64(t1, a); | ||
410 | + tcg_gen_ext32u_i64(t2, b); | ||
411 | + tcg_gen_mul_i64(t, t1, t2); | ||
412 | +} | ||
413 | + | ||
414 | +static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
415 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
416 | +{ | ||
417 | + static const TCGOpcode vecop_list[] = { | ||
418 | + INDEX_op_mul_vec, 0 | ||
419 | + }; | ||
420 | + static const GVecGen3 op[3] = { | ||
421 | + { | ||
422 | + .fniv = gen_vmulwev_u, | ||
423 | + .fno = gen_helper_vmulwev_h_bu, | ||
424 | + .opt_opc = vecop_list, | ||
425 | + .vece = MO_16 | ||
426 | + }, | ||
427 | + { | ||
428 | + .fni4 = gen_vmulwev_w_hu, | ||
429 | + .fniv = gen_vmulwev_u, | ||
430 | + .fno = gen_helper_vmulwev_w_hu, | ||
431 | + .opt_opc = vecop_list, | ||
432 | + .vece = MO_32 | ||
433 | + }, | ||
434 | + { | ||
435 | + .fni8 = gen_vmulwev_d_wu, | ||
436 | + .fniv = gen_vmulwev_u, | ||
437 | + .fno = gen_helper_vmulwev_d_wu, | ||
438 | + .opt_opc = vecop_list, | ||
439 | + .vece = MO_64 | ||
440 | + }, | ||
441 | + }; | ||
442 | + | ||
443 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
444 | +} | ||
445 | + | ||
446 | +TRANS(vmulwev_h_bu, gvec_vvv, MO_8, do_vmulwev_u) | ||
447 | +TRANS(vmulwev_w_hu, gvec_vvv, MO_16, do_vmulwev_u) | ||
448 | +TRANS(vmulwev_d_wu, gvec_vvv, MO_32, do_vmulwev_u) | ||
449 | + | ||
450 | +static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
451 | +{ | ||
452 | + TCGv_vec t1, t2; | ||
453 | + int halfbits = 4 << vece; | ||
454 | + | ||
455 | + t1 = tcg_temp_new_vec_matching(a); | ||
456 | + t2 = tcg_temp_new_vec_matching(b); | ||
457 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
458 | + tcg_gen_shri_vec(vece, t2, b, halfbits); | ||
459 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
460 | +} | ||
461 | + | ||
462 | +static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
463 | +{ | ||
464 | + TCGv_i32 t1, t2; | ||
465 | + | ||
466 | + t1 = tcg_temp_new_i32(); | ||
467 | + t2 = tcg_temp_new_i32(); | ||
468 | + tcg_gen_shri_i32(t1, a, 16); | ||
469 | + tcg_gen_shri_i32(t2, b, 16); | ||
470 | + tcg_gen_mul_i32(t, t1, t2); | ||
471 | +} | ||
472 | + | ||
473 | +static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
474 | +{ | ||
475 | + TCGv_i64 t1, t2; | ||
476 | + | ||
477 | + t1 = tcg_temp_new_i64(); | ||
478 | + t2 = tcg_temp_new_i64(); | ||
479 | + tcg_gen_shri_i64(t1, a, 32); | ||
480 | + tcg_gen_shri_i64(t2, b, 32); | ||
481 | + tcg_gen_mul_i64(t, t1, t2); | ||
482 | +} | ||
483 | + | ||
484 | +static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
485 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
486 | +{ | ||
487 | + static const TCGOpcode vecop_list[] = { | ||
488 | + INDEX_op_shri_vec, INDEX_op_mul_vec, 0 | ||
489 | + }; | ||
490 | + static const GVecGen3 op[3] = { | ||
491 | + { | ||
492 | + .fniv = gen_vmulwod_u, | ||
493 | + .fno = gen_helper_vmulwod_h_bu, | ||
494 | + .opt_opc = vecop_list, | ||
495 | + .vece = MO_16 | ||
496 | + }, | ||
497 | + { | ||
498 | + .fni4 = gen_vmulwod_w_hu, | ||
499 | + .fniv = gen_vmulwod_u, | ||
500 | + .fno = gen_helper_vmulwod_w_hu, | ||
501 | + .opt_opc = vecop_list, | ||
502 | + .vece = MO_32 | ||
503 | + }, | ||
504 | + { | ||
505 | + .fni8 = gen_vmulwod_d_wu, | ||
506 | + .fniv = gen_vmulwod_u, | ||
507 | + .fno = gen_helper_vmulwod_d_wu, | ||
508 | + .opt_opc = vecop_list, | ||
509 | + .vece = MO_64 | ||
510 | + }, | ||
511 | + }; | ||
512 | + | ||
513 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
514 | +} | ||
515 | + | ||
516 | +TRANS(vmulwod_h_bu, gvec_vvv, MO_8, do_vmulwod_u) | ||
517 | +TRANS(vmulwod_w_hu, gvec_vvv, MO_16, do_vmulwod_u) | ||
518 | +TRANS(vmulwod_d_wu, gvec_vvv, MO_32, do_vmulwod_u) | ||
519 | + | ||
520 | +static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
521 | +{ | ||
522 | + TCGv_vec t1, t2, mask; | ||
523 | + int halfbits = 4 << vece; | ||
524 | + | ||
525 | + t1 = tcg_temp_new_vec_matching(a); | ||
526 | + t2 = tcg_temp_new_vec_matching(b); | ||
527 | + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
528 | + tcg_gen_and_vec(vece, t1, a, mask); | ||
529 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
530 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
531 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
532 | +} | ||
533 | + | ||
534 | +static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
535 | +{ | ||
536 | + TCGv_i32 t1, t2; | ||
537 | + | ||
538 | + t1 = tcg_temp_new_i32(); | ||
539 | + t2 = tcg_temp_new_i32(); | ||
540 | + tcg_gen_ext16u_i32(t1, a); | ||
541 | + tcg_gen_ext16s_i32(t2, b); | ||
542 | + tcg_gen_mul_i32(t, t1, t2); | ||
543 | +} | ||
544 | + | ||
545 | +static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
546 | +{ | ||
547 | + TCGv_i64 t1, t2; | ||
548 | + | ||
549 | + t1 = tcg_temp_new_i64(); | ||
550 | + t2 = tcg_temp_new_i64(); | ||
551 | + tcg_gen_ext32u_i64(t1, a); | ||
552 | + tcg_gen_ext32s_i64(t2, b); | ||
553 | + tcg_gen_mul_i64(t, t1, t2); | ||
554 | +} | ||
555 | + | ||
556 | +static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
557 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
558 | +{ | ||
559 | + static const TCGOpcode vecop_list[] = { | ||
560 | + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 | ||
561 | + }; | ||
562 | + static const GVecGen3 op[3] = { | ||
563 | + { | ||
564 | + .fniv = gen_vmulwev_u_s, | ||
565 | + .fno = gen_helper_vmulwev_h_bu_b, | ||
566 | + .opt_opc = vecop_list, | ||
567 | + .vece = MO_16 | ||
568 | + }, | ||
569 | + { | ||
570 | + .fni4 = gen_vmulwev_w_hu_h, | ||
571 | + .fniv = gen_vmulwev_u_s, | ||
572 | + .fno = gen_helper_vmulwev_w_hu_h, | ||
573 | + .opt_opc = vecop_list, | ||
574 | + .vece = MO_32 | ||
575 | + }, | ||
576 | + { | ||
577 | + .fni8 = gen_vmulwev_d_wu_w, | ||
578 | + .fniv = gen_vmulwev_u_s, | ||
579 | + .fno = gen_helper_vmulwev_d_wu_w, | ||
580 | + .opt_opc = vecop_list, | ||
581 | + .vece = MO_64 | ||
582 | + }, | ||
583 | + }; | ||
584 | + | ||
585 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
586 | +} | ||
587 | + | ||
588 | +TRANS(vmulwev_h_bu_b, gvec_vvv, MO_8, do_vmulwev_u_s) | ||
589 | +TRANS(vmulwev_w_hu_h, gvec_vvv, MO_16, do_vmulwev_u_s) | ||
590 | +TRANS(vmulwev_d_wu_w, gvec_vvv, MO_32, do_vmulwev_u_s) | ||
591 | + | ||
592 | +static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
593 | +{ | ||
594 | + TCGv_vec t1, t2; | ||
595 | + int halfbits = 4 << vece; | ||
596 | + | ||
597 | + t1 = tcg_temp_new_vec_matching(a); | ||
598 | + t2 = tcg_temp_new_vec_matching(b); | ||
599 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
600 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
601 | + tcg_gen_mul_vec(vece, t, t1, t2); | ||
602 | +} | ||
603 | + | ||
604 | +static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
605 | +{ | ||
606 | + TCGv_i32 t1, t2; | ||
607 | + | ||
608 | + t1 = tcg_temp_new_i32(); | ||
609 | + t2 = tcg_temp_new_i32(); | ||
610 | + tcg_gen_shri_i32(t1, a, 16); | ||
611 | + tcg_gen_sari_i32(t2, b, 16); | ||
612 | + tcg_gen_mul_i32(t, t1, t2); | ||
613 | +} | ||
614 | +static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
615 | +{ | ||
616 | + TCGv_i64 t1, t2; | ||
617 | + | ||
618 | + t1 = tcg_temp_new_i64(); | ||
619 | + t2 = tcg_temp_new_i64(); | ||
620 | + tcg_gen_shri_i64(t1, a, 32); | ||
621 | + tcg_gen_sari_i64(t2, b, 32); | ||
622 | + tcg_gen_mul_i64(t, t1, t2); | ||
623 | +} | ||
624 | + | ||
625 | +static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
626 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
627 | +{ | ||
628 | + static const TCGOpcode vecop_list[] = { | ||
629 | + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 | ||
630 | + }; | ||
631 | + static const GVecGen3 op[3] = { | ||
632 | + { | ||
633 | + .fniv = gen_vmulwod_u_s, | ||
634 | + .fno = gen_helper_vmulwod_h_bu_b, | ||
635 | + .opt_opc = vecop_list, | ||
636 | + .vece = MO_16 | ||
637 | + }, | ||
638 | + { | ||
639 | + .fni4 = gen_vmulwod_w_hu_h, | ||
640 | + .fniv = gen_vmulwod_u_s, | ||
641 | + .fno = gen_helper_vmulwod_w_hu_h, | ||
642 | + .opt_opc = vecop_list, | ||
643 | + .vece = MO_32 | ||
644 | + }, | ||
645 | + { | ||
646 | + .fni8 = gen_vmulwod_d_wu_w, | ||
647 | + .fniv = gen_vmulwod_u_s, | ||
648 | + .fno = gen_helper_vmulwod_d_wu_w, | ||
649 | + .opt_opc = vecop_list, | ||
650 | + .vece = MO_64 | ||
651 | + }, | ||
652 | + }; | ||
653 | + | ||
654 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
655 | +} | ||
656 | + | ||
657 | +TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s) | ||
658 | +TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s) | ||
659 | +TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s) | ||
660 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
661 | index XXXXXXX..XXXXXXX 100644 | ||
662 | --- a/target/loongarch/insns.decode | ||
663 | +++ b/target/loongarch/insns.decode | ||
664 | @@ -XXX,XX +XXX,XX @@ vmini_bu 0111 00101001 01100 ..... ..... ..... @vv_ui5 | ||
665 | vmini_hu 0111 00101001 01101 ..... ..... ..... @vv_ui5 | ||
666 | vmini_wu 0111 00101001 01110 ..... ..... ..... @vv_ui5 | ||
667 | vmini_du 0111 00101001 01111 ..... ..... ..... @vv_ui5 | ||
668 | + | ||
669 | +vmul_b 0111 00001000 01000 ..... ..... ..... @vvv | ||
670 | +vmul_h 0111 00001000 01001 ..... ..... ..... @vvv | ||
671 | +vmul_w 0111 00001000 01010 ..... ..... ..... @vvv | ||
672 | +vmul_d 0111 00001000 01011 ..... ..... ..... @vvv | ||
673 | +vmuh_b 0111 00001000 01100 ..... ..... ..... @vvv | ||
674 | +vmuh_h 0111 00001000 01101 ..... ..... ..... @vvv | ||
675 | +vmuh_w 0111 00001000 01110 ..... ..... ..... @vvv | ||
676 | +vmuh_d 0111 00001000 01111 ..... ..... ..... @vvv | ||
677 | +vmuh_bu 0111 00001000 10000 ..... ..... ..... @vvv | ||
678 | +vmuh_hu 0111 00001000 10001 ..... ..... ..... @vvv | ||
679 | +vmuh_wu 0111 00001000 10010 ..... ..... ..... @vvv | ||
680 | +vmuh_du 0111 00001000 10011 ..... ..... ..... @vvv | ||
681 | + | ||
682 | +vmulwev_h_b 0111 00001001 00000 ..... ..... ..... @vvv | ||
683 | +vmulwev_w_h 0111 00001001 00001 ..... ..... ..... @vvv | ||
684 | +vmulwev_d_w 0111 00001001 00010 ..... ..... ..... @vvv | ||
685 | +vmulwev_q_d 0111 00001001 00011 ..... ..... ..... @vvv | ||
686 | +vmulwod_h_b 0111 00001001 00100 ..... ..... ..... @vvv | ||
687 | +vmulwod_w_h 0111 00001001 00101 ..... ..... ..... @vvv | ||
688 | +vmulwod_d_w 0111 00001001 00110 ..... ..... ..... @vvv | ||
689 | +vmulwod_q_d 0111 00001001 00111 ..... ..... ..... @vvv | ||
690 | +vmulwev_h_bu 0111 00001001 10000 ..... ..... ..... @vvv | ||
691 | +vmulwev_w_hu 0111 00001001 10001 ..... ..... ..... @vvv | ||
692 | +vmulwev_d_wu 0111 00001001 10010 ..... ..... ..... @vvv | ||
693 | +vmulwev_q_du 0111 00001001 10011 ..... ..... ..... @vvv | ||
694 | +vmulwod_h_bu 0111 00001001 10100 ..... ..... ..... @vvv | ||
695 | +vmulwod_w_hu 0111 00001001 10101 ..... ..... ..... @vvv | ||
696 | +vmulwod_d_wu 0111 00001001 10110 ..... ..... ..... @vvv | ||
697 | +vmulwod_q_du 0111 00001001 10111 ..... ..... ..... @vvv | ||
698 | +vmulwev_h_bu_b 0111 00001010 00000 ..... ..... ..... @vvv | ||
699 | +vmulwev_w_hu_h 0111 00001010 00001 ..... ..... ..... @vvv | ||
700 | +vmulwev_d_wu_w 0111 00001010 00010 ..... ..... ..... @vvv | ||
701 | +vmulwev_q_du_d 0111 00001010 00011 ..... ..... ..... @vvv | ||
702 | +vmulwod_h_bu_b 0111 00001010 00100 ..... ..... ..... @vvv | ||
703 | +vmulwod_w_hu_h 0111 00001010 00101 ..... ..... ..... @vvv | ||
704 | +vmulwod_d_wu_w 0111 00001010 00110 ..... ..... ..... @vvv | ||
705 | +vmulwod_q_du_d 0111 00001010 00111 ..... ..... ..... @vvv | ||
706 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
707 | index XXXXXXX..XXXXXXX 100644 | ||
708 | --- a/target/loongarch/lsx_helper.c | ||
709 | +++ b/target/loongarch/lsx_helper.c | ||
710 | @@ -XXX,XX +XXX,XX @@ VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) | ||
711 | VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) | ||
712 | VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) | ||
713 | VMINMAXI(vmaxi_du, 64, UD, DO_MAX) | ||
714 | + | ||
715 | +#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ | ||
716 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
717 | +{ \ | ||
718 | + int i; \ | ||
719 | + VReg *Vd = (VReg *)vd; \ | ||
720 | + VReg *Vj = (VReg *)vj; \ | ||
721 | + VReg *Vk = (VReg *)vk; \ | ||
722 | + typedef __typeof(Vd->E1(0)) T; \ | ||
723 | + \ | ||
724 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
725 | + Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ | ||
726 | + } \ | ||
727 | +} | ||
728 | + | ||
729 | +void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v) | ||
730 | +{ | ||
731 | + uint64_t l, h1, h2; | ||
732 | + VReg *Vd = (VReg *)vd; | ||
733 | + VReg *Vj = (VReg *)vj; | ||
734 | + VReg *Vk = (VReg *)vk; | ||
735 | + | ||
736 | + muls64(&l, &h1, Vj->D(0), Vk->D(0)); | ||
737 | + muls64(&l, &h2, Vj->D(1), Vk->D(1)); | ||
738 | + | ||
739 | + Vd->D(0) = h1; | ||
740 | + Vd->D(1) = h2; | ||
741 | +} | ||
742 | + | ||
743 | +DO_VMUH(vmuh_b, 8, H, B, DO_MUH) | ||
744 | +DO_VMUH(vmuh_h, 16, W, H, DO_MUH) | ||
745 | +DO_VMUH(vmuh_w, 32, D, W, DO_MUH) | ||
746 | + | ||
747 | +void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v) | ||
748 | +{ | ||
749 | + uint64_t l, h1, h2; | ||
750 | + VReg *Vd = (VReg *)vd; | ||
751 | + VReg *Vj = (VReg *)vj; | ||
752 | + VReg *Vk = (VReg *)vk; | ||
753 | + | ||
754 | + mulu64(&l, &h1, Vj->D(0), Vk->D(0)); | ||
755 | + mulu64(&l, &h2, Vj->D(1), Vk->D(1)); | ||
756 | + | ||
757 | + Vd->D(0) = h1; | ||
758 | + Vd->D(1) = h2; | ||
759 | +} | ||
760 | + | ||
761 | +DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) | ||
762 | +DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) | ||
763 | +DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) | ||
764 | + | ||
765 | +#define DO_MUL(a, b) (a * b) | ||
766 | + | ||
767 | +DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) | ||
768 | +DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) | ||
769 | +DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) | ||
770 | + | ||
771 | +DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) | ||
772 | +DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) | ||
773 | +DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) | ||
774 | + | ||
775 | +DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) | ||
776 | +DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) | ||
777 | +DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) | ||
778 | + | ||
779 | +DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) | ||
780 | +DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) | ||
781 | +DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) | ||
782 | + | ||
783 | +DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
784 | +DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
785 | +DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
786 | + | ||
787 | +DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
788 | +DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
789 | +DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
790 | -- | ||
791 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VMADD.{B/H/W/D}; | ||
3 | - VMSUB.{B/H/W/D}; | ||
4 | - VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U]; | ||
5 | - VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}. | ||
6 | 1 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230504122810.4094787-16-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/disas.c | 34 ++ | ||
12 | target/loongarch/helper.h | 30 + | ||
13 | target/loongarch/insn_trans/trans_lsx.c.inc | 612 ++++++++++++++++++++ | ||
14 | target/loongarch/insns.decode | 34 ++ | ||
15 | target/loongarch/lsx_helper.c | 107 ++++ | ||
16 | 5 files changed, 817 insertions(+) | ||
17 | |||
18 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/loongarch/disas.c | ||
21 | +++ b/target/loongarch/disas.c | ||
22 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmulwod_h_bu_b, vvv) | ||
23 | INSN_LSX(vmulwod_w_hu_h, vvv) | ||
24 | INSN_LSX(vmulwod_d_wu_w, vvv) | ||
25 | INSN_LSX(vmulwod_q_du_d, vvv) | ||
26 | + | ||
27 | +INSN_LSX(vmadd_b, vvv) | ||
28 | +INSN_LSX(vmadd_h, vvv) | ||
29 | +INSN_LSX(vmadd_w, vvv) | ||
30 | +INSN_LSX(vmadd_d, vvv) | ||
31 | +INSN_LSX(vmsub_b, vvv) | ||
32 | +INSN_LSX(vmsub_h, vvv) | ||
33 | +INSN_LSX(vmsub_w, vvv) | ||
34 | +INSN_LSX(vmsub_d, vvv) | ||
35 | + | ||
36 | +INSN_LSX(vmaddwev_h_b, vvv) | ||
37 | +INSN_LSX(vmaddwev_w_h, vvv) | ||
38 | +INSN_LSX(vmaddwev_d_w, vvv) | ||
39 | +INSN_LSX(vmaddwev_q_d, vvv) | ||
40 | +INSN_LSX(vmaddwod_h_b, vvv) | ||
41 | +INSN_LSX(vmaddwod_w_h, vvv) | ||
42 | +INSN_LSX(vmaddwod_d_w, vvv) | ||
43 | +INSN_LSX(vmaddwod_q_d, vvv) | ||
44 | +INSN_LSX(vmaddwev_h_bu, vvv) | ||
45 | +INSN_LSX(vmaddwev_w_hu, vvv) | ||
46 | +INSN_LSX(vmaddwev_d_wu, vvv) | ||
47 | +INSN_LSX(vmaddwev_q_du, vvv) | ||
48 | +INSN_LSX(vmaddwod_h_bu, vvv) | ||
49 | +INSN_LSX(vmaddwod_w_hu, vvv) | ||
50 | +INSN_LSX(vmaddwod_d_wu, vvv) | ||
51 | +INSN_LSX(vmaddwod_q_du, vvv) | ||
52 | +INSN_LSX(vmaddwev_h_bu_b, vvv) | ||
53 | +INSN_LSX(vmaddwev_w_hu_h, vvv) | ||
54 | +INSN_LSX(vmaddwev_d_wu_w, vvv) | ||
55 | +INSN_LSX(vmaddwev_q_du_d, vvv) | ||
56 | +INSN_LSX(vmaddwod_h_bu_b, vvv) | ||
57 | +INSN_LSX(vmaddwod_w_hu_h, vvv) | ||
58 | +INSN_LSX(vmaddwod_d_wu_w, vvv) | ||
59 | +INSN_LSX(vmaddwod_q_du_d, vvv) | ||
60 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/helper.h | ||
63 | +++ b/target/loongarch/helper.h | ||
64 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
65 | DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
66 | DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
67 | DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
68 | + | ||
69 | +DEF_HELPER_FLAGS_4(vmadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
70 | +DEF_HELPER_FLAGS_4(vmadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
71 | +DEF_HELPER_FLAGS_4(vmadd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
72 | +DEF_HELPER_FLAGS_4(vmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
73 | +DEF_HELPER_FLAGS_4(vmsub_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
74 | +DEF_HELPER_FLAGS_4(vmsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
75 | +DEF_HELPER_FLAGS_4(vmsub_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
76 | +DEF_HELPER_FLAGS_4(vmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
77 | + | ||
78 | +DEF_HELPER_FLAGS_4(vmaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vmaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vmaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vmaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vmaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vmaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
84 | + | ||
85 | +DEF_HELPER_FLAGS_4(vmaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
86 | +DEF_HELPER_FLAGS_4(vmaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vmaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
88 | +DEF_HELPER_FLAGS_4(vmaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
89 | +DEF_HELPER_FLAGS_4(vmaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
90 | +DEF_HELPER_FLAGS_4(vmaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
91 | + | ||
92 | +DEF_HELPER_FLAGS_4(vmaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
93 | +DEF_HELPER_FLAGS_4(vmaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
94 | +DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
95 | +DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
96 | +DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
98 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
101 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
102 | @@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
103 | TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s) | ||
104 | TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s) | ||
105 | TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s) | ||
106 | + | ||
107 | +static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
108 | +{ | ||
109 | + TCGv_vec t1; | ||
110 | + | ||
111 | + t1 = tcg_temp_new_vec_matching(t); | ||
112 | + tcg_gen_mul_vec(vece, t1, a, b); | ||
113 | + tcg_gen_add_vec(vece, t, t, t1); | ||
114 | +} | ||
115 | + | ||
116 | +static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
117 | +{ | ||
118 | + TCGv_i32 t1; | ||
119 | + | ||
120 | + t1 = tcg_temp_new_i32(); | ||
121 | + tcg_gen_mul_i32(t1, a, b); | ||
122 | + tcg_gen_add_i32(t, t, t1); | ||
123 | +} | ||
124 | + | ||
125 | +static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
126 | +{ | ||
127 | + TCGv_i64 t1; | ||
128 | + | ||
129 | + t1 = tcg_temp_new_i64(); | ||
130 | + tcg_gen_mul_i64(t1, a, b); | ||
131 | + tcg_gen_add_i64(t, t, t1); | ||
132 | +} | ||
133 | + | ||
134 | +static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
135 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
136 | +{ | ||
137 | + static const TCGOpcode vecop_list[] = { | ||
138 | + INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
139 | + }; | ||
140 | + static const GVecGen3 op[4] = { | ||
141 | + { | ||
142 | + .fniv = gen_vmadd, | ||
143 | + .fno = gen_helper_vmadd_b, | ||
144 | + .load_dest = true, | ||
145 | + .opt_opc = vecop_list, | ||
146 | + .vece = MO_8 | ||
147 | + }, | ||
148 | + { | ||
149 | + .fniv = gen_vmadd, | ||
150 | + .fno = gen_helper_vmadd_h, | ||
151 | + .load_dest = true, | ||
152 | + .opt_opc = vecop_list, | ||
153 | + .vece = MO_16 | ||
154 | + }, | ||
155 | + { | ||
156 | + .fni4 = gen_vmadd_w, | ||
157 | + .fniv = gen_vmadd, | ||
158 | + .fno = gen_helper_vmadd_w, | ||
159 | + .load_dest = true, | ||
160 | + .opt_opc = vecop_list, | ||
161 | + .vece = MO_32 | ||
162 | + }, | ||
163 | + { | ||
164 | + .fni8 = gen_vmadd_d, | ||
165 | + .fniv = gen_vmadd, | ||
166 | + .fno = gen_helper_vmadd_d, | ||
167 | + .load_dest = true, | ||
168 | + .opt_opc = vecop_list, | ||
169 | + .vece = MO_64 | ||
170 | + }, | ||
171 | + }; | ||
172 | + | ||
173 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
174 | +} | ||
175 | + | ||
176 | +TRANS(vmadd_b, gvec_vvv, MO_8, do_vmadd) | ||
177 | +TRANS(vmadd_h, gvec_vvv, MO_16, do_vmadd) | ||
178 | +TRANS(vmadd_w, gvec_vvv, MO_32, do_vmadd) | ||
179 | +TRANS(vmadd_d, gvec_vvv, MO_64, do_vmadd) | ||
180 | + | ||
181 | +static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
182 | +{ | ||
183 | + TCGv_vec t1; | ||
184 | + | ||
185 | + t1 = tcg_temp_new_vec_matching(t); | ||
186 | + tcg_gen_mul_vec(vece, t1, a, b); | ||
187 | + tcg_gen_sub_vec(vece, t, t, t1); | ||
188 | +} | ||
189 | + | ||
190 | +static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
191 | +{ | ||
192 | + TCGv_i32 t1; | ||
193 | + | ||
194 | + t1 = tcg_temp_new_i32(); | ||
195 | + tcg_gen_mul_i32(t1, a, b); | ||
196 | + tcg_gen_sub_i32(t, t, t1); | ||
197 | +} | ||
198 | + | ||
199 | +static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
200 | +{ | ||
201 | + TCGv_i64 t1; | ||
202 | + | ||
203 | + t1 = tcg_temp_new_i64(); | ||
204 | + tcg_gen_mul_i64(t1, a, b); | ||
205 | + tcg_gen_sub_i64(t, t, t1); | ||
206 | +} | ||
207 | + | ||
208 | +static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
209 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
210 | +{ | ||
211 | + static const TCGOpcode vecop_list[] = { | ||
212 | + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 | ||
213 | + }; | ||
214 | + static const GVecGen3 op[4] = { | ||
215 | + { | ||
216 | + .fniv = gen_vmsub, | ||
217 | + .fno = gen_helper_vmsub_b, | ||
218 | + .load_dest = true, | ||
219 | + .opt_opc = vecop_list, | ||
220 | + .vece = MO_8 | ||
221 | + }, | ||
222 | + { | ||
223 | + .fniv = gen_vmsub, | ||
224 | + .fno = gen_helper_vmsub_h, | ||
225 | + .load_dest = true, | ||
226 | + .opt_opc = vecop_list, | ||
227 | + .vece = MO_16 | ||
228 | + }, | ||
229 | + { | ||
230 | + .fni4 = gen_vmsub_w, | ||
231 | + .fniv = gen_vmsub, | ||
232 | + .fno = gen_helper_vmsub_w, | ||
233 | + .load_dest = true, | ||
234 | + .opt_opc = vecop_list, | ||
235 | + .vece = MO_32 | ||
236 | + }, | ||
237 | + { | ||
238 | + .fni8 = gen_vmsub_d, | ||
239 | + .fniv = gen_vmsub, | ||
240 | + .fno = gen_helper_vmsub_d, | ||
241 | + .load_dest = true, | ||
242 | + .opt_opc = vecop_list, | ||
243 | + .vece = MO_64 | ||
244 | + }, | ||
245 | + }; | ||
246 | + | ||
247 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
248 | +} | ||
249 | + | ||
250 | +TRANS(vmsub_b, gvec_vvv, MO_8, do_vmsub) | ||
251 | +TRANS(vmsub_h, gvec_vvv, MO_16, do_vmsub) | ||
252 | +TRANS(vmsub_w, gvec_vvv, MO_32, do_vmsub) | ||
253 | +TRANS(vmsub_d, gvec_vvv, MO_64, do_vmsub) | ||
254 | + | ||
255 | +static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
256 | +{ | ||
257 | + TCGv_vec t1, t2, t3; | ||
258 | + int halfbits = 4 << vece; | ||
259 | + | ||
260 | + t1 = tcg_temp_new_vec_matching(a); | ||
261 | + t2 = tcg_temp_new_vec_matching(b); | ||
262 | + t3 = tcg_temp_new_vec_matching(t); | ||
263 | + tcg_gen_shli_vec(vece, t1, a, halfbits); | ||
264 | + tcg_gen_sari_vec(vece, t1, t1, halfbits); | ||
265 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
266 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
267 | + tcg_gen_mul_vec(vece, t3, t1, t2); | ||
268 | + tcg_gen_add_vec(vece, t, t, t3); | ||
269 | +} | ||
270 | + | ||
271 | +static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
272 | +{ | ||
273 | + TCGv_i32 t1; | ||
274 | + | ||
275 | + t1 = tcg_temp_new_i32(); | ||
276 | + gen_vmulwev_w_h(t1, a, b); | ||
277 | + tcg_gen_add_i32(t, t, t1); | ||
278 | +} | ||
279 | + | ||
280 | +static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
281 | +{ | ||
282 | + TCGv_i64 t1; | ||
283 | + | ||
284 | + t1 = tcg_temp_new_i64(); | ||
285 | + gen_vmulwev_d_w(t1, a, b); | ||
286 | + tcg_gen_add_i64(t, t, t1); | ||
287 | +} | ||
288 | + | ||
289 | +static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
290 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
291 | +{ | ||
292 | + static const TCGOpcode vecop_list[] = { | ||
293 | + INDEX_op_shli_vec, INDEX_op_sari_vec, | ||
294 | + INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
295 | + }; | ||
296 | + static const GVecGen3 op[3] = { | ||
297 | + { | ||
298 | + .fniv = gen_vmaddwev_s, | ||
299 | + .fno = gen_helper_vmaddwev_h_b, | ||
300 | + .load_dest = true, | ||
301 | + .opt_opc = vecop_list, | ||
302 | + .vece = MO_16 | ||
303 | + }, | ||
304 | + { | ||
305 | + .fni4 = gen_vmaddwev_w_h, | ||
306 | + .fniv = gen_vmaddwev_s, | ||
307 | + .fno = gen_helper_vmaddwev_w_h, | ||
308 | + .load_dest = true, | ||
309 | + .opt_opc = vecop_list, | ||
310 | + .vece = MO_32 | ||
311 | + }, | ||
312 | + { | ||
313 | + .fni8 = gen_vmaddwev_d_w, | ||
314 | + .fniv = gen_vmaddwev_s, | ||
315 | + .fno = gen_helper_vmaddwev_d_w, | ||
316 | + .load_dest = true, | ||
317 | + .opt_opc = vecop_list, | ||
318 | + .vece = MO_64 | ||
319 | + }, | ||
320 | + }; | ||
321 | + | ||
322 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
323 | +} | ||
324 | + | ||
325 | +TRANS(vmaddwev_h_b, gvec_vvv, MO_8, do_vmaddwev_s) | ||
326 | +TRANS(vmaddwev_w_h, gvec_vvv, MO_16, do_vmaddwev_s) | ||
327 | +TRANS(vmaddwev_d_w, gvec_vvv, MO_32, do_vmaddwev_s) | ||
328 | + | ||
329 | +#define VMADD_Q(NAME, FN, idx1, idx2) \ | ||
330 | +static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \ | ||
331 | +{ \ | ||
332 | + TCGv_i64 rh, rl, arg1, arg2, th, tl; \ | ||
333 | + \ | ||
334 | + rh = tcg_temp_new_i64(); \ | ||
335 | + rl = tcg_temp_new_i64(); \ | ||
336 | + arg1 = tcg_temp_new_i64(); \ | ||
337 | + arg2 = tcg_temp_new_i64(); \ | ||
338 | + th = tcg_temp_new_i64(); \ | ||
339 | + tl = tcg_temp_new_i64(); \ | ||
340 | + \ | ||
341 | + get_vreg64(arg1, a->vj, idx1); \ | ||
342 | + get_vreg64(arg2, a->vk, idx2); \ | ||
343 | + get_vreg64(rh, a->vd, 1); \ | ||
344 | + get_vreg64(rl, a->vd, 0); \ | ||
345 | + \ | ||
346 | + tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \ | ||
347 | + tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \ | ||
348 | + \ | ||
349 | + set_vreg64(rh, a->vd, 1); \ | ||
350 | + set_vreg64(rl, a->vd, 0); \ | ||
351 | + \ | ||
352 | + return true; \ | ||
353 | +} | ||
354 | + | ||
355 | +VMADD_Q(vmaddwev_q_d, muls2, 0, 0) | ||
356 | +VMADD_Q(vmaddwod_q_d, muls2, 1, 1) | ||
357 | +VMADD_Q(vmaddwev_q_du, mulu2, 0, 0) | ||
358 | +VMADD_Q(vmaddwod_q_du, mulu2, 1, 1) | ||
359 | +VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0) | ||
360 | +VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1) | ||
361 | + | ||
362 | +static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
363 | +{ | ||
364 | + TCGv_vec t1, t2, t3; | ||
365 | + int halfbits = 4 << vece; | ||
366 | + | ||
367 | + t1 = tcg_temp_new_vec_matching(a); | ||
368 | + t2 = tcg_temp_new_vec_matching(b); | ||
369 | + t3 = tcg_temp_new_vec_matching(t); | ||
370 | + tcg_gen_sari_vec(vece, t1, a, halfbits); | ||
371 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
372 | + tcg_gen_mul_vec(vece, t3, t1, t2); | ||
373 | + tcg_gen_add_vec(vece, t, t, t3); | ||
374 | +} | ||
375 | + | ||
376 | +static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
377 | +{ | ||
378 | + TCGv_i32 t1; | ||
379 | + | ||
380 | + t1 = tcg_temp_new_i32(); | ||
381 | + gen_vmulwod_w_h(t1, a, b); | ||
382 | + tcg_gen_add_i32(t, t, t1); | ||
383 | +} | ||
384 | + | ||
385 | +static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
386 | +{ | ||
387 | + TCGv_i64 t1; | ||
388 | + | ||
389 | + t1 = tcg_temp_new_i64(); | ||
390 | + gen_vmulwod_d_w(t1, a, b); | ||
391 | + tcg_gen_add_i64(t, t, t1); | ||
392 | +} | ||
393 | + | ||
394 | +static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
395 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
396 | +{ | ||
397 | + static const TCGOpcode vecop_list[] = { | ||
398 | + INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
399 | + }; | ||
400 | + static const GVecGen3 op[3] = { | ||
401 | + { | ||
402 | + .fniv = gen_vmaddwod_s, | ||
403 | + .fno = gen_helper_vmaddwod_h_b, | ||
404 | + .load_dest = true, | ||
405 | + .opt_opc = vecop_list, | ||
406 | + .vece = MO_16 | ||
407 | + }, | ||
408 | + { | ||
409 | + .fni4 = gen_vmaddwod_w_h, | ||
410 | + .fniv = gen_vmaddwod_s, | ||
411 | + .fno = gen_helper_vmaddwod_w_h, | ||
412 | + .load_dest = true, | ||
413 | + .opt_opc = vecop_list, | ||
414 | + .vece = MO_32 | ||
415 | + }, | ||
416 | + { | ||
417 | + .fni8 = gen_vmaddwod_d_w, | ||
418 | + .fniv = gen_vmaddwod_s, | ||
419 | + .fno = gen_helper_vmaddwod_d_w, | ||
420 | + .load_dest = true, | ||
421 | + .opt_opc = vecop_list, | ||
422 | + .vece = MO_64 | ||
423 | + }, | ||
424 | + }; | ||
425 | + | ||
426 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
427 | +} | ||
428 | + | ||
429 | +TRANS(vmaddwod_h_b, gvec_vvv, MO_8, do_vmaddwod_s) | ||
430 | +TRANS(vmaddwod_w_h, gvec_vvv, MO_16, do_vmaddwod_s) | ||
431 | +TRANS(vmaddwod_d_w, gvec_vvv, MO_32, do_vmaddwod_s) | ||
432 | + | ||
433 | +static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
434 | +{ | ||
435 | + TCGv_vec t1, t2, mask; | ||
436 | + | ||
437 | + t1 = tcg_temp_new_vec_matching(t); | ||
438 | + t2 = tcg_temp_new_vec_matching(b); | ||
439 | + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
440 | + tcg_gen_and_vec(vece, t1, a, mask); | ||
441 | + tcg_gen_and_vec(vece, t2, b, mask); | ||
442 | + tcg_gen_mul_vec(vece, t1, t1, t2); | ||
443 | + tcg_gen_add_vec(vece, t, t, t1); | ||
444 | +} | ||
445 | + | ||
446 | +static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
447 | +{ | ||
448 | + TCGv_i32 t1; | ||
449 | + | ||
450 | + t1 = tcg_temp_new_i32(); | ||
451 | + gen_vmulwev_w_hu(t1, a, b); | ||
452 | + tcg_gen_add_i32(t, t, t1); | ||
453 | +} | ||
454 | + | ||
455 | +static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
456 | +{ | ||
457 | + TCGv_i64 t1; | ||
458 | + | ||
459 | + t1 = tcg_temp_new_i64(); | ||
460 | + gen_vmulwev_d_wu(t1, a, b); | ||
461 | + tcg_gen_add_i64(t, t, t1); | ||
462 | +} | ||
463 | + | ||
464 | +static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
465 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
466 | +{ | ||
467 | + static const TCGOpcode vecop_list[] = { | ||
468 | + INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
469 | + }; | ||
470 | + static const GVecGen3 op[3] = { | ||
471 | + { | ||
472 | + .fniv = gen_vmaddwev_u, | ||
473 | + .fno = gen_helper_vmaddwev_h_bu, | ||
474 | + .load_dest = true, | ||
475 | + .opt_opc = vecop_list, | ||
476 | + .vece = MO_16 | ||
477 | + }, | ||
478 | + { | ||
479 | + .fni4 = gen_vmaddwev_w_hu, | ||
480 | + .fniv = gen_vmaddwev_u, | ||
481 | + .fno = gen_helper_vmaddwev_w_hu, | ||
482 | + .load_dest = true, | ||
483 | + .opt_opc = vecop_list, | ||
484 | + .vece = MO_32 | ||
485 | + }, | ||
486 | + { | ||
487 | + .fni8 = gen_vmaddwev_d_wu, | ||
488 | + .fniv = gen_vmaddwev_u, | ||
489 | + .fno = gen_helper_vmaddwev_d_wu, | ||
490 | + .load_dest = true, | ||
491 | + .opt_opc = vecop_list, | ||
492 | + .vece = MO_64 | ||
493 | + }, | ||
494 | + }; | ||
495 | + | ||
496 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
497 | +} | ||
498 | + | ||
499 | +TRANS(vmaddwev_h_bu, gvec_vvv, MO_8, do_vmaddwev_u) | ||
500 | +TRANS(vmaddwev_w_hu, gvec_vvv, MO_16, do_vmaddwev_u) | ||
501 | +TRANS(vmaddwev_d_wu, gvec_vvv, MO_32, do_vmaddwev_u) | ||
502 | + | ||
503 | +static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
504 | +{ | ||
505 | + TCGv_vec t1, t2, t3; | ||
506 | + int halfbits = 4 << vece; | ||
507 | + | ||
508 | + t1 = tcg_temp_new_vec_matching(a); | ||
509 | + t2 = tcg_temp_new_vec_matching(b); | ||
510 | + t3 = tcg_temp_new_vec_matching(t); | ||
511 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
512 | + tcg_gen_shri_vec(vece, t2, b, halfbits); | ||
513 | + tcg_gen_mul_vec(vece, t3, t1, t2); | ||
514 | + tcg_gen_add_vec(vece, t, t, t3); | ||
515 | +} | ||
516 | + | ||
517 | +static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
518 | +{ | ||
519 | + TCGv_i32 t1; | ||
520 | + | ||
521 | + t1 = tcg_temp_new_i32(); | ||
522 | + gen_vmulwod_w_hu(t1, a, b); | ||
523 | + tcg_gen_add_i32(t, t, t1); | ||
524 | +} | ||
525 | + | ||
526 | +static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
527 | +{ | ||
528 | + TCGv_i64 t1; | ||
529 | + | ||
530 | + t1 = tcg_temp_new_i64(); | ||
531 | + gen_vmulwod_d_wu(t1, a, b); | ||
532 | + tcg_gen_add_i64(t, t, t1); | ||
533 | +} | ||
534 | + | ||
535 | +static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
536 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
537 | +{ | ||
538 | + static const TCGOpcode vecop_list[] = { | ||
539 | + INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
540 | + }; | ||
541 | + static const GVecGen3 op[3] = { | ||
542 | + { | ||
543 | + .fniv = gen_vmaddwod_u, | ||
544 | + .fno = gen_helper_vmaddwod_h_bu, | ||
545 | + .load_dest = true, | ||
546 | + .opt_opc = vecop_list, | ||
547 | + .vece = MO_16 | ||
548 | + }, | ||
549 | + { | ||
550 | + .fni4 = gen_vmaddwod_w_hu, | ||
551 | + .fniv = gen_vmaddwod_u, | ||
552 | + .fno = gen_helper_vmaddwod_w_hu, | ||
553 | + .load_dest = true, | ||
554 | + .opt_opc = vecop_list, | ||
555 | + .vece = MO_32 | ||
556 | + }, | ||
557 | + { | ||
558 | + .fni8 = gen_vmaddwod_d_wu, | ||
559 | + .fniv = gen_vmaddwod_u, | ||
560 | + .fno = gen_helper_vmaddwod_d_wu, | ||
561 | + .load_dest = true, | ||
562 | + .opt_opc = vecop_list, | ||
563 | + .vece = MO_64 | ||
564 | + }, | ||
565 | + }; | ||
566 | + | ||
567 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
568 | +} | ||
569 | + | ||
570 | +TRANS(vmaddwod_h_bu, gvec_vvv, MO_8, do_vmaddwod_u) | ||
571 | +TRANS(vmaddwod_w_hu, gvec_vvv, MO_16, do_vmaddwod_u) | ||
572 | +TRANS(vmaddwod_d_wu, gvec_vvv, MO_32, do_vmaddwod_u) | ||
573 | + | ||
574 | +static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
575 | +{ | ||
576 | + TCGv_vec t1, t2, mask; | ||
577 | + int halfbits = 4 << vece; | ||
578 | + | ||
579 | + t1 = tcg_temp_new_vec_matching(a); | ||
580 | + t2 = tcg_temp_new_vec_matching(b); | ||
581 | + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); | ||
582 | + tcg_gen_and_vec(vece, t1, a, mask); | ||
583 | + tcg_gen_shli_vec(vece, t2, b, halfbits); | ||
584 | + tcg_gen_sari_vec(vece, t2, t2, halfbits); | ||
585 | + tcg_gen_mul_vec(vece, t1, t1, t2); | ||
586 | + tcg_gen_add_vec(vece, t, t, t1); | ||
587 | +} | ||
588 | + | ||
589 | +static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
590 | +{ | ||
591 | + TCGv_i32 t1; | ||
592 | + | ||
593 | + t1 = tcg_temp_new_i32(); | ||
594 | + gen_vmulwev_w_hu_h(t1, a, b); | ||
595 | + tcg_gen_add_i32(t, t, t1); | ||
596 | +} | ||
597 | + | ||
598 | +static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
599 | +{ | ||
600 | + TCGv_i64 t1; | ||
601 | + | ||
602 | + t1 = tcg_temp_new_i64(); | ||
603 | + gen_vmulwev_d_wu_w(t1, a, b); | ||
604 | + tcg_gen_add_i64(t, t, t1); | ||
605 | +} | ||
606 | + | ||
607 | +static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
608 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
609 | +{ | ||
610 | + static const TCGOpcode vecop_list[] = { | ||
611 | + INDEX_op_shli_vec, INDEX_op_sari_vec, | ||
612 | + INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
613 | + }; | ||
614 | + static const GVecGen3 op[3] = { | ||
615 | + { | ||
616 | + .fniv = gen_vmaddwev_u_s, | ||
617 | + .fno = gen_helper_vmaddwev_h_bu_b, | ||
618 | + .load_dest = true, | ||
619 | + .opt_opc = vecop_list, | ||
620 | + .vece = MO_16 | ||
621 | + }, | ||
622 | + { | ||
623 | + .fni4 = gen_vmaddwev_w_hu_h, | ||
624 | + .fniv = gen_vmaddwev_u_s, | ||
625 | + .fno = gen_helper_vmaddwev_w_hu_h, | ||
626 | + .load_dest = true, | ||
627 | + .opt_opc = vecop_list, | ||
628 | + .vece = MO_32 | ||
629 | + }, | ||
630 | + { | ||
631 | + .fni8 = gen_vmaddwev_d_wu_w, | ||
632 | + .fniv = gen_vmaddwev_u_s, | ||
633 | + .fno = gen_helper_vmaddwev_d_wu_w, | ||
634 | + .load_dest = true, | ||
635 | + .opt_opc = vecop_list, | ||
636 | + .vece = MO_64 | ||
637 | + }, | ||
638 | + }; | ||
639 | + | ||
640 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
641 | +} | ||
642 | + | ||
643 | +TRANS(vmaddwev_h_bu_b, gvec_vvv, MO_8, do_vmaddwev_u_s) | ||
644 | +TRANS(vmaddwev_w_hu_h, gvec_vvv, MO_16, do_vmaddwev_u_s) | ||
645 | +TRANS(vmaddwev_d_wu_w, gvec_vvv, MO_32, do_vmaddwev_u_s) | ||
646 | + | ||
647 | +static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
648 | +{ | ||
649 | + TCGv_vec t1, t2, t3; | ||
650 | + int halfbits = 4 << vece; | ||
651 | + | ||
652 | + t1 = tcg_temp_new_vec_matching(a); | ||
653 | + t2 = tcg_temp_new_vec_matching(b); | ||
654 | + t3 = tcg_temp_new_vec_matching(t); | ||
655 | + tcg_gen_shri_vec(vece, t1, a, halfbits); | ||
656 | + tcg_gen_sari_vec(vece, t2, b, halfbits); | ||
657 | + tcg_gen_mul_vec(vece, t3, t1, t2); | ||
658 | + tcg_gen_add_vec(vece, t, t, t3); | ||
659 | +} | ||
660 | + | ||
661 | +static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) | ||
662 | +{ | ||
663 | + TCGv_i32 t1; | ||
664 | + | ||
665 | + t1 = tcg_temp_new_i32(); | ||
666 | + gen_vmulwod_w_hu_h(t1, a, b); | ||
667 | + tcg_gen_add_i32(t, t, t1); | ||
668 | +} | ||
669 | + | ||
670 | +static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) | ||
671 | +{ | ||
672 | + TCGv_i64 t1; | ||
673 | + | ||
674 | + t1 = tcg_temp_new_i64(); | ||
675 | + gen_vmulwod_d_wu_w(t1, a, b); | ||
676 | + tcg_gen_add_i64(t, t, t1); | ||
677 | +} | ||
678 | + | ||
679 | +static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
680 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
681 | +{ | ||
682 | + static const TCGOpcode vecop_list[] = { | ||
683 | + INDEX_op_shri_vec, INDEX_op_sari_vec, | ||
684 | + INDEX_op_mul_vec, INDEX_op_add_vec, 0 | ||
685 | + }; | ||
686 | + static const GVecGen3 op[3] = { | ||
687 | + { | ||
688 | + .fniv = gen_vmaddwod_u_s, | ||
689 | + .fno = gen_helper_vmaddwod_h_bu_b, | ||
690 | + .load_dest = true, | ||
691 | + .opt_opc = vecop_list, | ||
692 | + .vece = MO_16 | ||
693 | + }, | ||
694 | + { | ||
695 | + .fni4 = gen_vmaddwod_w_hu_h, | ||
696 | + .fniv = gen_vmaddwod_u_s, | ||
697 | + .fno = gen_helper_vmaddwod_w_hu_h, | ||
698 | + .load_dest = true, | ||
699 | + .opt_opc = vecop_list, | ||
700 | + .vece = MO_32 | ||
701 | + }, | ||
702 | + { | ||
703 | + .fni8 = gen_vmaddwod_d_wu_w, | ||
704 | + .fniv = gen_vmaddwod_u_s, | ||
705 | + .fno = gen_helper_vmaddwod_d_wu_w, | ||
706 | + .load_dest = true, | ||
707 | + .opt_opc = vecop_list, | ||
708 | + .vece = MO_64 | ||
709 | + }, | ||
710 | + }; | ||
711 | + | ||
712 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
713 | +} | ||
714 | + | ||
715 | +TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s) | ||
716 | +TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s) | ||
717 | +TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s) | ||
718 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
719 | index XXXXXXX..XXXXXXX 100644 | ||
720 | --- a/target/loongarch/insns.decode | ||
721 | +++ b/target/loongarch/insns.decode | ||
722 | @@ -XXX,XX +XXX,XX @@ vmulwod_h_bu_b 0111 00001010 00100 ..... ..... ..... @vvv | ||
723 | vmulwod_w_hu_h 0111 00001010 00101 ..... ..... ..... @vvv | ||
724 | vmulwod_d_wu_w 0111 00001010 00110 ..... ..... ..... @vvv | ||
725 | vmulwod_q_du_d 0111 00001010 00111 ..... ..... ..... @vvv | ||
726 | + | ||
727 | +vmadd_b 0111 00001010 10000 ..... ..... ..... @vvv | ||
728 | +vmadd_h 0111 00001010 10001 ..... ..... ..... @vvv | ||
729 | +vmadd_w 0111 00001010 10010 ..... ..... ..... @vvv | ||
730 | +vmadd_d 0111 00001010 10011 ..... ..... ..... @vvv | ||
731 | +vmsub_b 0111 00001010 10100 ..... ..... ..... @vvv | ||
732 | +vmsub_h 0111 00001010 10101 ..... ..... ..... @vvv | ||
733 | +vmsub_w 0111 00001010 10110 ..... ..... ..... @vvv | ||
734 | +vmsub_d 0111 00001010 10111 ..... ..... ..... @vvv | ||
735 | + | ||
736 | +vmaddwev_h_b 0111 00001010 11000 ..... ..... ..... @vvv | ||
737 | +vmaddwev_w_h 0111 00001010 11001 ..... ..... ..... @vvv | ||
738 | +vmaddwev_d_w 0111 00001010 11010 ..... ..... ..... @vvv | ||
739 | +vmaddwev_q_d 0111 00001010 11011 ..... ..... ..... @vvv | ||
740 | +vmaddwod_h_b 0111 00001010 11100 ..... ..... ..... @vvv | ||
741 | +vmaddwod_w_h 0111 00001010 11101 ..... ..... ..... @vvv | ||
742 | +vmaddwod_d_w 0111 00001010 11110 ..... ..... ..... @vvv | ||
743 | +vmaddwod_q_d 0111 00001010 11111 ..... ..... ..... @vvv | ||
744 | +vmaddwev_h_bu 0111 00001011 01000 ..... ..... ..... @vvv | ||
745 | +vmaddwev_w_hu 0111 00001011 01001 ..... ..... ..... @vvv | ||
746 | +vmaddwev_d_wu 0111 00001011 01010 ..... ..... ..... @vvv | ||
747 | +vmaddwev_q_du 0111 00001011 01011 ..... ..... ..... @vvv | ||
748 | +vmaddwod_h_bu 0111 00001011 01100 ..... ..... ..... @vvv | ||
749 | +vmaddwod_w_hu 0111 00001011 01101 ..... ..... ..... @vvv | ||
750 | +vmaddwod_d_wu 0111 00001011 01110 ..... ..... ..... @vvv | ||
751 | +vmaddwod_q_du 0111 00001011 01111 ..... ..... ..... @vvv | ||
752 | +vmaddwev_h_bu_b 0111 00001011 11000 ..... ..... ..... @vvv | ||
753 | +vmaddwev_w_hu_h 0111 00001011 11001 ..... ..... ..... @vvv | ||
754 | +vmaddwev_d_wu_w 0111 00001011 11010 ..... ..... ..... @vvv | ||
755 | +vmaddwev_q_du_d 0111 00001011 11011 ..... ..... ..... @vvv | ||
756 | +vmaddwod_h_bu_b 0111 00001011 11100 ..... ..... ..... @vvv | ||
757 | +vmaddwod_w_hu_h 0111 00001011 11101 ..... ..... ..... @vvv | ||
758 | +vmaddwod_d_wu_w 0111 00001011 11110 ..... ..... ..... @vvv | ||
759 | +vmaddwod_q_du_d 0111 00001011 11111 ..... ..... ..... @vvv | ||
760 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
761 | index XXXXXXX..XXXXXXX 100644 | ||
762 | --- a/target/loongarch/lsx_helper.c | ||
763 | +++ b/target/loongarch/lsx_helper.c | ||
764 | @@ -XXX,XX +XXX,XX @@ DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
765 | DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
766 | DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
767 | DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
768 | + | ||
769 | +#define DO_MADD(a, b, c) (a + b * c) | ||
770 | +#define DO_MSUB(a, b, c) (a - b * c) | ||
771 | + | ||
772 | +#define VMADDSUB(NAME, BIT, E, DO_OP) \ | ||
773 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
774 | +{ \ | ||
775 | + int i; \ | ||
776 | + VReg *Vd = (VReg *)vd; \ | ||
777 | + VReg *Vj = (VReg *)vj; \ | ||
778 | + VReg *Vk = (VReg *)vk; \ | ||
779 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
780 | + Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ | ||
781 | + } \ | ||
782 | +} | ||
783 | + | ||
784 | +VMADDSUB(vmadd_b, 8, B, DO_MADD) | ||
785 | +VMADDSUB(vmadd_h, 16, H, DO_MADD) | ||
786 | +VMADDSUB(vmadd_w, 32, W, DO_MADD) | ||
787 | +VMADDSUB(vmadd_d, 64, D, DO_MADD) | ||
788 | +VMADDSUB(vmsub_b, 8, B, DO_MSUB) | ||
789 | +VMADDSUB(vmsub_h, 16, H, DO_MSUB) | ||
790 | +VMADDSUB(vmsub_w, 32, W, DO_MSUB) | ||
791 | +VMADDSUB(vmsub_d, 64, D, DO_MSUB) | ||
792 | + | ||
793 | +#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ | ||
794 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
795 | +{ \ | ||
796 | + int i; \ | ||
797 | + VReg *Vd = (VReg *)vd; \ | ||
798 | + VReg *Vj = (VReg *)vj; \ | ||
799 | + VReg *Vk = (VReg *)vk; \ | ||
800 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
801 | + \ | ||
802 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
803 | + Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ | ||
804 | + } \ | ||
805 | +} | ||
806 | + | ||
807 | +VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) | ||
808 | +VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) | ||
809 | +VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) | ||
810 | +VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) | ||
811 | +VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) | ||
812 | +VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) | ||
813 | + | ||
814 | +#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ | ||
815 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
816 | +{ \ | ||
817 | + int i; \ | ||
818 | + VReg *Vd = (VReg *)vd; \ | ||
819 | + VReg *Vj = (VReg *)vj; \ | ||
820 | + VReg *Vk = (VReg *)vk; \ | ||
821 | + typedef __typeof(Vd->E1(0)) TD; \ | ||
822 | + \ | ||
823 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
824 | + Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ | ||
825 | + (TD)Vk->E2(2 * i + 1)); \ | ||
826 | + } \ | ||
827 | +} | ||
828 | + | ||
829 | +VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) | ||
830 | +VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) | ||
831 | +VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) | ||
832 | +VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) | ||
833 | +VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) | ||
834 | +VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) | ||
835 | + | ||
836 | +#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
837 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
838 | +{ \ | ||
839 | + int i; \ | ||
840 | + VReg *Vd = (VReg *)vd; \ | ||
841 | + VReg *Vj = (VReg *)vj; \ | ||
842 | + VReg *Vk = (VReg *)vk; \ | ||
843 | + typedef __typeof(Vd->ES1(0)) TS1; \ | ||
844 | + typedef __typeof(Vd->EU1(0)) TU1; \ | ||
845 | + \ | ||
846 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
847 | + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ | ||
848 | + (TS1)Vk->ES2(2 * i)); \ | ||
849 | + } \ | ||
850 | +} | ||
851 | + | ||
852 | +VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
853 | +VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
854 | +VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
855 | + | ||
856 | +#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ | ||
857 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
858 | +{ \ | ||
859 | + int i; \ | ||
860 | + VReg *Vd = (VReg *)vd; \ | ||
861 | + VReg *Vj = (VReg *)vj; \ | ||
862 | + VReg *Vk = (VReg *)vk; \ | ||
863 | + typedef __typeof(Vd->ES1(0)) TS1; \ | ||
864 | + typedef __typeof(Vd->EU1(0)) TU1; \ | ||
865 | + \ | ||
866 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
867 | + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ | ||
868 | + (TS1)Vk->ES2(2 * i + 1)); \ | ||
869 | + } \ | ||
870 | +} | ||
871 | + | ||
872 | +VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
873 | +VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
874 | +VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
875 | -- | ||
876 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VDIV.{B/H/W/D}[U]; | ||
3 | - VMOD.{B/H/W/D}[U]. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-17-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 17 ++++++++++ | ||
10 | target/loongarch/helper.h | 17 ++++++++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 17 ++++++++++ | ||
12 | target/loongarch/insns.decode | 17 ++++++++++ | ||
13 | target/loongarch/lsx_helper.c | 37 +++++++++++++++++++++ | ||
14 | 5 files changed, 105 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmaddwod_h_bu_b, vvv) | ||
21 | INSN_LSX(vmaddwod_w_hu_h, vvv) | ||
22 | INSN_LSX(vmaddwod_d_wu_w, vvv) | ||
23 | INSN_LSX(vmaddwod_q_du_d, vvv) | ||
24 | + | ||
25 | +INSN_LSX(vdiv_b, vvv) | ||
26 | +INSN_LSX(vdiv_h, vvv) | ||
27 | +INSN_LSX(vdiv_w, vvv) | ||
28 | +INSN_LSX(vdiv_d, vvv) | ||
29 | +INSN_LSX(vdiv_bu, vvv) | ||
30 | +INSN_LSX(vdiv_hu, vvv) | ||
31 | +INSN_LSX(vdiv_wu, vvv) | ||
32 | +INSN_LSX(vdiv_du, vvv) | ||
33 | +INSN_LSX(vmod_b, vvv) | ||
34 | +INSN_LSX(vmod_h, vvv) | ||
35 | +INSN_LSX(vmod_w, vvv) | ||
36 | +INSN_LSX(vmod_d, vvv) | ||
37 | +INSN_LSX(vmod_bu, vvv) | ||
38 | +INSN_LSX(vmod_hu, vvv) | ||
39 | +INSN_LSX(vmod_wu, vvv) | ||
40 | +INSN_LSX(vmod_du, vvv) | ||
41 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/helper.h | ||
44 | +++ b/target/loongarch/helper.h | ||
45 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
46 | DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
47 | DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
48 | DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
49 | + | ||
50 | +DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32) | ||
51 | +DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32) | ||
52 | +DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32) | ||
53 | +DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32) | ||
55 | +DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32) | ||
56 | +DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32) | ||
57 | +DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32) | ||
58 | +DEF_HELPER_4(vmod_b, void, env, i32, i32, i32) | ||
59 | +DEF_HELPER_4(vmod_h, void, env, i32, i32, i32) | ||
60 | +DEF_HELPER_4(vmod_w, void, env, i32, i32, i32) | ||
61 | +DEF_HELPER_4(vmod_d, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vmod_du, void, env, i32, i32, i32) | ||
66 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
69 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
70 | @@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
71 | TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s) | ||
72 | TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s) | ||
73 | TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s) | ||
74 | + | ||
75 | +TRANS(vdiv_b, gen_vvv, gen_helper_vdiv_b) | ||
76 | +TRANS(vdiv_h, gen_vvv, gen_helper_vdiv_h) | ||
77 | +TRANS(vdiv_w, gen_vvv, gen_helper_vdiv_w) | ||
78 | +TRANS(vdiv_d, gen_vvv, gen_helper_vdiv_d) | ||
79 | +TRANS(vdiv_bu, gen_vvv, gen_helper_vdiv_bu) | ||
80 | +TRANS(vdiv_hu, gen_vvv, gen_helper_vdiv_hu) | ||
81 | +TRANS(vdiv_wu, gen_vvv, gen_helper_vdiv_wu) | ||
82 | +TRANS(vdiv_du, gen_vvv, gen_helper_vdiv_du) | ||
83 | +TRANS(vmod_b, gen_vvv, gen_helper_vmod_b) | ||
84 | +TRANS(vmod_h, gen_vvv, gen_helper_vmod_h) | ||
85 | +TRANS(vmod_w, gen_vvv, gen_helper_vmod_w) | ||
86 | +TRANS(vmod_d, gen_vvv, gen_helper_vmod_d) | ||
87 | +TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu) | ||
88 | +TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu) | ||
89 | +TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu) | ||
90 | +TRANS(vmod_du, gen_vvv, gen_helper_vmod_du) | ||
91 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
92 | index XXXXXXX..XXXXXXX 100644 | ||
93 | --- a/target/loongarch/insns.decode | ||
94 | +++ b/target/loongarch/insns.decode | ||
95 | @@ -XXX,XX +XXX,XX @@ vmaddwod_h_bu_b 0111 00001011 11100 ..... ..... ..... @vvv | ||
96 | vmaddwod_w_hu_h 0111 00001011 11101 ..... ..... ..... @vvv | ||
97 | vmaddwod_d_wu_w 0111 00001011 11110 ..... ..... ..... @vvv | ||
98 | vmaddwod_q_du_d 0111 00001011 11111 ..... ..... ..... @vvv | ||
99 | + | ||
100 | +vdiv_b 0111 00001110 00000 ..... ..... ..... @vvv | ||
101 | +vdiv_h 0111 00001110 00001 ..... ..... ..... @vvv | ||
102 | +vdiv_w 0111 00001110 00010 ..... ..... ..... @vvv | ||
103 | +vdiv_d 0111 00001110 00011 ..... ..... ..... @vvv | ||
104 | +vdiv_bu 0111 00001110 01000 ..... ..... ..... @vvv | ||
105 | +vdiv_hu 0111 00001110 01001 ..... ..... ..... @vvv | ||
106 | +vdiv_wu 0111 00001110 01010 ..... ..... ..... @vvv | ||
107 | +vdiv_du 0111 00001110 01011 ..... ..... ..... @vvv | ||
108 | +vmod_b 0111 00001110 00100 ..... ..... ..... @vvv | ||
109 | +vmod_h 0111 00001110 00101 ..... ..... ..... @vvv | ||
110 | +vmod_w 0111 00001110 00110 ..... ..... ..... @vvv | ||
111 | +vmod_d 0111 00001110 00111 ..... ..... ..... @vvv | ||
112 | +vmod_bu 0111 00001110 01100 ..... ..... ..... @vvv | ||
113 | +vmod_hu 0111 00001110 01101 ..... ..... ..... @vvv | ||
114 | +vmod_wu 0111 00001110 01110 ..... ..... ..... @vvv | ||
115 | +vmod_du 0111 00001110 01111 ..... ..... ..... @vvv | ||
116 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/target/loongarch/lsx_helper.c | ||
119 | +++ b/target/loongarch/lsx_helper.c | ||
120 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
121 | VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) | ||
122 | VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) | ||
123 | VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) | ||
124 | + | ||
125 | +#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) | ||
126 | +#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) | ||
127 | +#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ | ||
128 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) | ||
129 | +#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ | ||
130 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | ||
131 | + | ||
132 | +#define VDIV(NAME, BIT, E, DO_OP) \ | ||
133 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
134 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
135 | +{ \ | ||
136 | + int i; \ | ||
137 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
138 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
139 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
140 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
141 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ | ||
142 | + } \ | ||
143 | +} | ||
144 | + | ||
145 | +VDIV(vdiv_b, 8, B, DO_DIV) | ||
146 | +VDIV(vdiv_h, 16, H, DO_DIV) | ||
147 | +VDIV(vdiv_w, 32, W, DO_DIV) | ||
148 | +VDIV(vdiv_d, 64, D, DO_DIV) | ||
149 | +VDIV(vdiv_bu, 8, UB, DO_DIVU) | ||
150 | +VDIV(vdiv_hu, 16, UH, DO_DIVU) | ||
151 | +VDIV(vdiv_wu, 32, UW, DO_DIVU) | ||
152 | +VDIV(vdiv_du, 64, UD, DO_DIVU) | ||
153 | +VDIV(vmod_b, 8, B, DO_REM) | ||
154 | +VDIV(vmod_h, 16, H, DO_REM) | ||
155 | +VDIV(vmod_w, 32, W, DO_REM) | ||
156 | +VDIV(vmod_d, 64, D, DO_REM) | ||
157 | +VDIV(vmod_bu, 8, UB, DO_REMU) | ||
158 | +VDIV(vmod_hu, 16, UH, DO_REMU) | ||
159 | +VDIV(vmod_wu, 32, UW, DO_REMU) | ||
160 | +VDIV(vmod_du, 64, UD, DO_REMU) | ||
161 | -- | ||
162 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSAT.{B/H/W/D}[U]. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-18-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 9 ++ | ||
9 | target/loongarch/helper.h | 9 ++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 101 ++++++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 12 +++ | ||
12 | target/loongarch/lsx_helper.c | 37 +++++++ | ||
13 | 5 files changed, 168 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmod_bu, vvv) | ||
20 | INSN_LSX(vmod_hu, vvv) | ||
21 | INSN_LSX(vmod_wu, vvv) | ||
22 | INSN_LSX(vmod_du, vvv) | ||
23 | + | ||
24 | +INSN_LSX(vsat_b, vv_i) | ||
25 | +INSN_LSX(vsat_h, vv_i) | ||
26 | +INSN_LSX(vsat_w, vv_i) | ||
27 | +INSN_LSX(vsat_d, vv_i) | ||
28 | +INSN_LSX(vsat_bu, vv_i) | ||
29 | +INSN_LSX(vsat_hu, vv_i) | ||
30 | +INSN_LSX(vsat_wu, vv_i) | ||
31 | +INSN_LSX(vsat_du, vv_i) | ||
32 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/loongarch/helper.h | ||
35 | +++ b/target/loongarch/helper.h | ||
36 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32) | ||
37 | DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32) | ||
38 | DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32) | ||
39 | DEF_HELPER_4(vmod_du, void, env, i32, i32, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
42 | +DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
43 | +DEF_HELPER_FLAGS_4(vsat_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vsat_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
45 | +DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
46 | +DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
47 | +DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
48 | +DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
49 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
52 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
53 | @@ -XXX,XX +XXX,XX @@ TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu) | ||
54 | TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu) | ||
55 | TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu) | ||
56 | TRANS(vmod_du, gen_vvv, gen_helper_vmod_du) | ||
57 | + | ||
58 | +static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) | ||
59 | +{ | ||
60 | + TCGv_vec min; | ||
61 | + | ||
62 | + min = tcg_temp_new_vec_matching(t); | ||
63 | + tcg_gen_not_vec(vece, min, max); | ||
64 | + tcg_gen_smax_vec(vece, t, a, min); | ||
65 | + tcg_gen_smin_vec(vece, t, t, max); | ||
66 | +} | ||
67 | + | ||
68 | +static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
69 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
70 | +{ | ||
71 | + static const TCGOpcode vecop_list[] = { | ||
72 | + INDEX_op_smax_vec, INDEX_op_smin_vec, 0 | ||
73 | + }; | ||
74 | + static const GVecGen2s op[4] = { | ||
75 | + { | ||
76 | + .fniv = gen_vsat_s, | ||
77 | + .fno = gen_helper_vsat_b, | ||
78 | + .opt_opc = vecop_list, | ||
79 | + .vece = MO_8 | ||
80 | + }, | ||
81 | + { | ||
82 | + .fniv = gen_vsat_s, | ||
83 | + .fno = gen_helper_vsat_h, | ||
84 | + .opt_opc = vecop_list, | ||
85 | + .vece = MO_16 | ||
86 | + }, | ||
87 | + { | ||
88 | + .fniv = gen_vsat_s, | ||
89 | + .fno = gen_helper_vsat_w, | ||
90 | + .opt_opc = vecop_list, | ||
91 | + .vece = MO_32 | ||
92 | + }, | ||
93 | + { | ||
94 | + .fniv = gen_vsat_s, | ||
95 | + .fno = gen_helper_vsat_d, | ||
96 | + .opt_opc = vecop_list, | ||
97 | + .vece = MO_64 | ||
98 | + }, | ||
99 | + }; | ||
100 | + | ||
101 | + tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, | ||
102 | + tcg_constant_i64((1ll<< imm) -1), &op[vece]); | ||
103 | +} | ||
104 | + | ||
105 | +TRANS(vsat_b, gvec_vv_i, MO_8, do_vsat_s) | ||
106 | +TRANS(vsat_h, gvec_vv_i, MO_16, do_vsat_s) | ||
107 | +TRANS(vsat_w, gvec_vv_i, MO_32, do_vsat_s) | ||
108 | +TRANS(vsat_d, gvec_vv_i, MO_64, do_vsat_s) | ||
109 | + | ||
110 | +static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) | ||
111 | +{ | ||
112 | + tcg_gen_umin_vec(vece, t, a, max); | ||
113 | +} | ||
114 | + | ||
115 | +static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
116 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
117 | +{ | ||
118 | + uint64_t max; | ||
119 | + static const TCGOpcode vecop_list[] = { | ||
120 | + INDEX_op_umin_vec, 0 | ||
121 | + }; | ||
122 | + static const GVecGen2s op[4] = { | ||
123 | + { | ||
124 | + .fniv = gen_vsat_u, | ||
125 | + .fno = gen_helper_vsat_bu, | ||
126 | + .opt_opc = vecop_list, | ||
127 | + .vece = MO_8 | ||
128 | + }, | ||
129 | + { | ||
130 | + .fniv = gen_vsat_u, | ||
131 | + .fno = gen_helper_vsat_hu, | ||
132 | + .opt_opc = vecop_list, | ||
133 | + .vece = MO_16 | ||
134 | + }, | ||
135 | + { | ||
136 | + .fniv = gen_vsat_u, | ||
137 | + .fno = gen_helper_vsat_wu, | ||
138 | + .opt_opc = vecop_list, | ||
139 | + .vece = MO_32 | ||
140 | + }, | ||
141 | + { | ||
142 | + .fniv = gen_vsat_u, | ||
143 | + .fno = gen_helper_vsat_du, | ||
144 | + .opt_opc = vecop_list, | ||
145 | + .vece = MO_64 | ||
146 | + }, | ||
147 | + }; | ||
148 | + | ||
149 | + max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1; | ||
150 | + tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, | ||
151 | + tcg_constant_i64(max), &op[vece]); | ||
152 | +} | ||
153 | + | ||
154 | +TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u) | ||
155 | +TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u) | ||
156 | +TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u) | ||
157 | +TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u) | ||
158 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
159 | index XXXXXXX..XXXXXXX 100644 | ||
160 | --- a/target/loongarch/insns.decode | ||
161 | +++ b/target/loongarch/insns.decode | ||
162 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
163 | # | ||
164 | @vv .... ........ ..... ..... vj:5 vd:5 &vv | ||
165 | @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv | ||
166 | +@vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i | ||
167 | +@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i | ||
168 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
169 | +@vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i | ||
170 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
171 | |||
172 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
173 | @@ -XXX,XX +XXX,XX @@ vmod_bu 0111 00001110 01100 ..... ..... ..... @vvv | ||
174 | vmod_hu 0111 00001110 01101 ..... ..... ..... @vvv | ||
175 | vmod_wu 0111 00001110 01110 ..... ..... ..... @vvv | ||
176 | vmod_du 0111 00001110 01111 ..... ..... ..... @vvv | ||
177 | + | ||
178 | +vsat_b 0111 00110010 01000 01 ... ..... ..... @vv_ui3 | ||
179 | +vsat_h 0111 00110010 01000 1 .... ..... ..... @vv_ui4 | ||
180 | +vsat_w 0111 00110010 01001 ..... ..... ..... @vv_ui5 | ||
181 | +vsat_d 0111 00110010 0101 ...... ..... ..... @vv_ui6 | ||
182 | +vsat_bu 0111 00110010 10000 01 ... ..... ..... @vv_ui3 | ||
183 | +vsat_hu 0111 00110010 10000 1 .... ..... ..... @vv_ui4 | ||
184 | +vsat_wu 0111 00110010 10001 ..... ..... ..... @vv_ui5 | ||
185 | +vsat_du 0111 00110010 1001 ...... ..... ..... @vv_ui6 | ||
186 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
187 | index XXXXXXX..XXXXXXX 100644 | ||
188 | --- a/target/loongarch/lsx_helper.c | ||
189 | +++ b/target/loongarch/lsx_helper.c | ||
190 | @@ -XXX,XX +XXX,XX @@ VDIV(vmod_bu, 8, UB, DO_REMU) | ||
191 | VDIV(vmod_hu, 16, UH, DO_REMU) | ||
192 | VDIV(vmod_wu, 32, UW, DO_REMU) | ||
193 | VDIV(vmod_du, 64, UD, DO_REMU) | ||
194 | + | ||
195 | +#define VSAT_S(NAME, BIT, E) \ | ||
196 | +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ | ||
197 | +{ \ | ||
198 | + int i; \ | ||
199 | + VReg *Vd = (VReg *)vd; \ | ||
200 | + VReg *Vj = (VReg *)vj; \ | ||
201 | + typedef __typeof(Vd->E(0)) TD; \ | ||
202 | + \ | ||
203 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
204 | + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ | ||
205 | + Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ | ||
206 | + } \ | ||
207 | +} | ||
208 | + | ||
209 | +VSAT_S(vsat_b, 8, B) | ||
210 | +VSAT_S(vsat_h, 16, H) | ||
211 | +VSAT_S(vsat_w, 32, W) | ||
212 | +VSAT_S(vsat_d, 64, D) | ||
213 | + | ||
214 | +#define VSAT_U(NAME, BIT, E) \ | ||
215 | +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ | ||
216 | +{ \ | ||
217 | + int i; \ | ||
218 | + VReg *Vd = (VReg *)vd; \ | ||
219 | + VReg *Vj = (VReg *)vj; \ | ||
220 | + typedef __typeof(Vd->E(0)) TD; \ | ||
221 | + \ | ||
222 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
223 | + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ | ||
224 | + } \ | ||
225 | +} | ||
226 | + | ||
227 | +VSAT_U(vsat_bu, 8, UB) | ||
228 | +VSAT_U(vsat_hu, 16, UH) | ||
229 | +VSAT_U(vsat_wu, 32, UW) | ||
230 | +VSAT_U(vsat_du, 64, UD) | ||
231 | -- | ||
232 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VEXTH.{H.B/W.H/D.W/Q.D}; | ||
3 | - VEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-19-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 9 ++++++ | ||
10 | target/loongarch/helper.h | 9 ++++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 20 ++++++++++++ | ||
12 | target/loongarch/insns.decode | 9 ++++++ | ||
13 | target/loongarch/lsx_helper.c | 35 +++++++++++++++++++++ | ||
14 | 5 files changed, 82 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsat_bu, vv_i) | ||
21 | INSN_LSX(vsat_hu, vv_i) | ||
22 | INSN_LSX(vsat_wu, vv_i) | ||
23 | INSN_LSX(vsat_du, vv_i) | ||
24 | + | ||
25 | +INSN_LSX(vexth_h_b, vv) | ||
26 | +INSN_LSX(vexth_w_h, vv) | ||
27 | +INSN_LSX(vexth_d_w, vv) | ||
28 | +INSN_LSX(vexth_q_d, vv) | ||
29 | +INSN_LSX(vexth_hu_bu, vv) | ||
30 | +INSN_LSX(vexth_wu_hu, vv) | ||
31 | +INSN_LSX(vexth_du_wu, vv) | ||
32 | +INSN_LSX(vexth_qu_du, vv) | ||
33 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/loongarch/helper.h | ||
36 | +++ b/target/loongarch/helper.h | ||
37 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
38 | DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
39 | DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
40 | DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
41 | + | ||
42 | +DEF_HELPER_3(vexth_h_b, void, env, i32, i32) | ||
43 | +DEF_HELPER_3(vexth_w_h, void, env, i32, i32) | ||
44 | +DEF_HELPER_3(vexth_d_w, void, env, i32, i32) | ||
45 | +DEF_HELPER_3(vexth_q_d, void, env, i32, i32) | ||
46 | +DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32) | ||
47 | +DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32) | ||
48 | +DEF_HELPER_3(vexth_du_wu, void, env, i32, i32) | ||
49 | +DEF_HELPER_3(vexth_qu_du, void, env, i32, i32) | ||
50 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
53 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a, | ||
55 | return true; | ||
56 | } | ||
57 | |||
58 | +static bool gen_vv(DisasContext *ctx, arg_vv *a, | ||
59 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32)) | ||
60 | +{ | ||
61 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
62 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
63 | + | ||
64 | + CHECK_SXE; | ||
65 | + func(cpu_env, vd, vj); | ||
66 | + return true; | ||
67 | +} | ||
68 | + | ||
69 | static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
70 | void (*func)(unsigned, uint32_t, uint32_t, | ||
71 | uint32_t, uint32_t, uint32_t)) | ||
72 | @@ -XXX,XX +XXX,XX @@ TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u) | ||
73 | TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u) | ||
74 | TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u) | ||
75 | TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u) | ||
76 | + | ||
77 | +TRANS(vexth_h_b, gen_vv, gen_helper_vexth_h_b) | ||
78 | +TRANS(vexth_w_h, gen_vv, gen_helper_vexth_w_h) | ||
79 | +TRANS(vexth_d_w, gen_vv, gen_helper_vexth_d_w) | ||
80 | +TRANS(vexth_q_d, gen_vv, gen_helper_vexth_q_d) | ||
81 | +TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu) | ||
82 | +TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu) | ||
83 | +TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu) | ||
84 | +TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du) | ||
85 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/loongarch/insns.decode | ||
88 | +++ b/target/loongarch/insns.decode | ||
89 | @@ -XXX,XX +XXX,XX @@ vsat_bu 0111 00110010 10000 01 ... ..... ..... @vv_ui3 | ||
90 | vsat_hu 0111 00110010 10000 1 .... ..... ..... @vv_ui4 | ||
91 | vsat_wu 0111 00110010 10001 ..... ..... ..... @vv_ui5 | ||
92 | vsat_du 0111 00110010 1001 ...... ..... ..... @vv_ui6 | ||
93 | + | ||
94 | +vexth_h_b 0111 00101001 11101 11000 ..... ..... @vv | ||
95 | +vexth_w_h 0111 00101001 11101 11001 ..... ..... @vv | ||
96 | +vexth_d_w 0111 00101001 11101 11010 ..... ..... @vv | ||
97 | +vexth_q_d 0111 00101001 11101 11011 ..... ..... @vv | ||
98 | +vexth_hu_bu 0111 00101001 11101 11100 ..... ..... @vv | ||
99 | +vexth_wu_hu 0111 00101001 11101 11101 ..... ..... @vv | ||
100 | +vexth_du_wu 0111 00101001 11101 11110 ..... ..... @vv | ||
101 | +vexth_qu_du 0111 00101001 11101 11111 ..... ..... @vv | ||
102 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/loongarch/lsx_helper.c | ||
105 | +++ b/target/loongarch/lsx_helper.c | ||
106 | @@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_bu, 8, UB) | ||
107 | VSAT_U(vsat_hu, 16, UH) | ||
108 | VSAT_U(vsat_wu, 32, UW) | ||
109 | VSAT_U(vsat_du, 64, UD) | ||
110 | + | ||
111 | +#define VEXTH(NAME, BIT, E1, E2) \ | ||
112 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
113 | +{ \ | ||
114 | + int i; \ | ||
115 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
116 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
117 | + \ | ||
118 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
119 | + Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ | ||
120 | + } \ | ||
121 | +} | ||
122 | + | ||
123 | +void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
124 | +{ | ||
125 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
126 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
127 | + | ||
128 | + Vd->Q(0) = int128_makes64(Vj->D(1)); | ||
129 | +} | ||
130 | + | ||
131 | +void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
132 | +{ | ||
133 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
134 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
135 | + | ||
136 | + Vd->Q(0) = int128_make64((uint64_t)Vj->D(1)); | ||
137 | +} | ||
138 | + | ||
139 | +VEXTH(vexth_h_b, 16, H, B) | ||
140 | +VEXTH(vexth_w_h, 32, W, H) | ||
141 | +VEXTH(vexth_d_w, 64, D, W) | ||
142 | +VEXTH(vexth_hu_bu, 16, UH, UB) | ||
143 | +VEXTH(vexth_wu_hu, 32, UW, UH) | ||
144 | +VEXTH(vexth_du_wu, 64, UD, UW) | ||
145 | -- | ||
146 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSIGNCOV.{B/H/W/D}. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-20-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 5 ++ | ||
9 | target/loongarch/helper.h | 5 ++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 53 +++++++++++++++++++++ | ||
11 | target/loongarch/insns.decode | 5 ++ | ||
12 | target/loongarch/lsx_helper.c | 7 +++ | ||
13 | 5 files changed, 75 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vexth_hu_bu, vv) | ||
20 | INSN_LSX(vexth_wu_hu, vv) | ||
21 | INSN_LSX(vexth_du_wu, vv) | ||
22 | INSN_LSX(vexth_qu_du, vv) | ||
23 | + | ||
24 | +INSN_LSX(vsigncov_b, vvv) | ||
25 | +INSN_LSX(vsigncov_h, vvv) | ||
26 | +INSN_LSX(vsigncov_w, vvv) | ||
27 | +INSN_LSX(vsigncov_d, vvv) | ||
28 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/loongarch/helper.h | ||
31 | +++ b/target/loongarch/helper.h | ||
32 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32) | ||
33 | DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32) | ||
34 | DEF_HELPER_3(vexth_du_wu, void, env, i32, i32) | ||
35 | DEF_HELPER_3(vexth_qu_du, void, env, i32, i32) | ||
36 | + | ||
37 | +DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | +DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | +DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | +DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
41 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
44 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
45 | @@ -XXX,XX +XXX,XX @@ TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu) | ||
46 | TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu) | ||
47 | TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu) | ||
48 | TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du) | ||
49 | + | ||
50 | +static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
51 | +{ | ||
52 | + TCGv_vec t1, zero; | ||
53 | + | ||
54 | + t1 = tcg_temp_new_vec_matching(t); | ||
55 | + zero = tcg_constant_vec_matching(t, vece, 0); | ||
56 | + | ||
57 | + tcg_gen_neg_vec(vece, t1, b); | ||
58 | + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b); | ||
59 | + tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t); | ||
60 | +} | ||
61 | + | ||
62 | +static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
63 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
64 | +{ | ||
65 | + static const TCGOpcode vecop_list[] = { | ||
66 | + INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0 | ||
67 | + }; | ||
68 | + static const GVecGen3 op[4] = { | ||
69 | + { | ||
70 | + .fniv = gen_vsigncov, | ||
71 | + .fno = gen_helper_vsigncov_b, | ||
72 | + .opt_opc = vecop_list, | ||
73 | + .vece = MO_8 | ||
74 | + }, | ||
75 | + { | ||
76 | + .fniv = gen_vsigncov, | ||
77 | + .fno = gen_helper_vsigncov_h, | ||
78 | + .opt_opc = vecop_list, | ||
79 | + .vece = MO_16 | ||
80 | + }, | ||
81 | + { | ||
82 | + .fniv = gen_vsigncov, | ||
83 | + .fno = gen_helper_vsigncov_w, | ||
84 | + .opt_opc = vecop_list, | ||
85 | + .vece = MO_32 | ||
86 | + }, | ||
87 | + { | ||
88 | + .fniv = gen_vsigncov, | ||
89 | + .fno = gen_helper_vsigncov_d, | ||
90 | + .opt_opc = vecop_list, | ||
91 | + .vece = MO_64 | ||
92 | + }, | ||
93 | + }; | ||
94 | + | ||
95 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
96 | +} | ||
97 | + | ||
98 | +TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov) | ||
99 | +TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov) | ||
100 | +TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov) | ||
101 | +TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov) | ||
102 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/target/loongarch/insns.decode | ||
105 | +++ b/target/loongarch/insns.decode | ||
106 | @@ -XXX,XX +XXX,XX @@ vexth_hu_bu 0111 00101001 11101 11100 ..... ..... @vv | ||
107 | vexth_wu_hu 0111 00101001 11101 11101 ..... ..... @vv | ||
108 | vexth_du_wu 0111 00101001 11101 11110 ..... ..... @vv | ||
109 | vexth_qu_du 0111 00101001 11101 11111 ..... ..... @vv | ||
110 | + | ||
111 | +vsigncov_b 0111 00010010 11100 ..... ..... ..... @vvv | ||
112 | +vsigncov_h 0111 00010010 11101 ..... ..... ..... @vvv | ||
113 | +vsigncov_w 0111 00010010 11110 ..... ..... ..... @vvv | ||
114 | +vsigncov_d 0111 00010010 11111 ..... ..... ..... @vvv | ||
115 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/target/loongarch/lsx_helper.c | ||
118 | +++ b/target/loongarch/lsx_helper.c | ||
119 | @@ -XXX,XX +XXX,XX @@ VEXTH(vexth_d_w, 64, D, W) | ||
120 | VEXTH(vexth_hu_bu, 16, UH, UB) | ||
121 | VEXTH(vexth_wu_hu, 32, UW, UH) | ||
122 | VEXTH(vexth_du_wu, 64, UD, UW) | ||
123 | + | ||
124 | +#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) | ||
125 | + | ||
126 | +DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) | ||
127 | +DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) | ||
128 | +DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) | ||
129 | +DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) | ||
130 | -- | ||
131 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VMSKLTZ.{B/H/W/D}; | ||
3 | - VMSKGEZ.B; | ||
4 | - VMSKNZ.B. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-21-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 7 ++ | ||
11 | target/loongarch/helper.h | 7 ++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 7 ++ | ||
13 | target/loongarch/insns.decode | 7 ++ | ||
14 | target/loongarch/lsx_helper.c | 113 ++++++++++++++++++++ | ||
15 | 5 files changed, 141 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/disas.c | ||
20 | +++ b/target/loongarch/disas.c | ||
21 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsigncov_b, vvv) | ||
22 | INSN_LSX(vsigncov_h, vvv) | ||
23 | INSN_LSX(vsigncov_w, vvv) | ||
24 | INSN_LSX(vsigncov_d, vvv) | ||
25 | + | ||
26 | +INSN_LSX(vmskltz_b, vv) | ||
27 | +INSN_LSX(vmskltz_h, vv) | ||
28 | +INSN_LSX(vmskltz_w, vv) | ||
29 | +INSN_LSX(vmskltz_d, vv) | ||
30 | +INSN_LSX(vmskgez_b, vv) | ||
31 | +INSN_LSX(vmsknz_b, vv) | ||
32 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/loongarch/helper.h | ||
35 | +++ b/target/loongarch/helper.h | ||
36 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
37 | DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
38 | DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | + | ||
41 | +DEF_HELPER_3(vmskltz_b, void, env, i32, i32) | ||
42 | +DEF_HELPER_3(vmskltz_h, void, env, i32, i32) | ||
43 | +DEF_HELPER_3(vmskltz_w, void, env, i32, i32) | ||
44 | +DEF_HELPER_3(vmskltz_d, void, env, i32, i32) | ||
45 | +DEF_HELPER_3(vmskgez_b, void, env, i32, i32) | ||
46 | +DEF_HELPER_3(vmsknz_b, void, env, i32,i32) | ||
47 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
51 | @@ -XXX,XX +XXX,XX @@ TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov) | ||
52 | TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov) | ||
53 | TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov) | ||
54 | TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov) | ||
55 | + | ||
56 | +TRANS(vmskltz_b, gen_vv, gen_helper_vmskltz_b) | ||
57 | +TRANS(vmskltz_h, gen_vv, gen_helper_vmskltz_h) | ||
58 | +TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w) | ||
59 | +TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d) | ||
60 | +TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b) | ||
61 | +TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b) | ||
62 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/loongarch/insns.decode | ||
65 | +++ b/target/loongarch/insns.decode | ||
66 | @@ -XXX,XX +XXX,XX @@ vsigncov_b 0111 00010010 11100 ..... ..... ..... @vvv | ||
67 | vsigncov_h 0111 00010010 11101 ..... ..... ..... @vvv | ||
68 | vsigncov_w 0111 00010010 11110 ..... ..... ..... @vvv | ||
69 | vsigncov_d 0111 00010010 11111 ..... ..... ..... @vvv | ||
70 | + | ||
71 | +vmskltz_b 0111 00101001 11000 10000 ..... ..... @vv | ||
72 | +vmskltz_h 0111 00101001 11000 10001 ..... ..... @vv | ||
73 | +vmskltz_w 0111 00101001 11000 10010 ..... ..... @vv | ||
74 | +vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv | ||
75 | +vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv | ||
76 | +vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv | ||
77 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/loongarch/lsx_helper.c | ||
80 | +++ b/target/loongarch/lsx_helper.c | ||
81 | @@ -XXX,XX +XXX,XX @@ DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) | ||
82 | DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) | ||
83 | DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) | ||
84 | DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) | ||
85 | + | ||
86 | +static uint64_t do_vmskltz_b(int64_t val) | ||
87 | +{ | ||
88 | + uint64_t m = 0x8080808080808080ULL; | ||
89 | + uint64_t c = val & m; | ||
90 | + c |= c << 7; | ||
91 | + c |= c << 14; | ||
92 | + c |= c << 28; | ||
93 | + return c >> 56; | ||
94 | +} | ||
95 | + | ||
96 | +void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
97 | +{ | ||
98 | + uint16_t temp = 0; | ||
99 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
100 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
101 | + | ||
102 | + temp = do_vmskltz_b(Vj->D(0)); | ||
103 | + temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
104 | + Vd->D(0) = temp; | ||
105 | + Vd->D(1) = 0; | ||
106 | +} | ||
107 | + | ||
108 | +static uint64_t do_vmskltz_h(int64_t val) | ||
109 | +{ | ||
110 | + uint64_t m = 0x8000800080008000ULL; | ||
111 | + uint64_t c = val & m; | ||
112 | + c |= c << 15; | ||
113 | + c |= c << 30; | ||
114 | + return c >> 60; | ||
115 | +} | ||
116 | + | ||
117 | +void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
118 | +{ | ||
119 | + uint16_t temp = 0; | ||
120 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
121 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
122 | + | ||
123 | + temp = do_vmskltz_h(Vj->D(0)); | ||
124 | + temp |= (do_vmskltz_h(Vj->D(1)) << 4); | ||
125 | + Vd->D(0) = temp; | ||
126 | + Vd->D(1) = 0; | ||
127 | +} | ||
128 | + | ||
129 | +static uint64_t do_vmskltz_w(int64_t val) | ||
130 | +{ | ||
131 | + uint64_t m = 0x8000000080000000ULL; | ||
132 | + uint64_t c = val & m; | ||
133 | + c |= c << 31; | ||
134 | + return c >> 62; | ||
135 | +} | ||
136 | + | ||
137 | +void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
138 | +{ | ||
139 | + uint16_t temp = 0; | ||
140 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
141 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
142 | + | ||
143 | + temp = do_vmskltz_w(Vj->D(0)); | ||
144 | + temp |= (do_vmskltz_w(Vj->D(1)) << 2); | ||
145 | + Vd->D(0) = temp; | ||
146 | + Vd->D(1) = 0; | ||
147 | +} | ||
148 | + | ||
149 | +static uint64_t do_vmskltz_d(int64_t val) | ||
150 | +{ | ||
151 | + return (uint64_t)val >> 63; | ||
152 | +} | ||
153 | +void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
154 | +{ | ||
155 | + uint16_t temp = 0; | ||
156 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
157 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
158 | + | ||
159 | + temp = do_vmskltz_d(Vj->D(0)); | ||
160 | + temp |= (do_vmskltz_d(Vj->D(1)) << 1); | ||
161 | + Vd->D(0) = temp; | ||
162 | + Vd->D(1) = 0; | ||
163 | +} | ||
164 | + | ||
165 | +void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
166 | +{ | ||
167 | + uint16_t temp = 0; | ||
168 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
169 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
170 | + | ||
171 | + temp = do_vmskltz_b(Vj->D(0)); | ||
172 | + temp |= (do_vmskltz_b(Vj->D(1)) << 8); | ||
173 | + Vd->D(0) = (uint16_t)(~temp); | ||
174 | + Vd->D(1) = 0; | ||
175 | +} | ||
176 | + | ||
177 | +static uint64_t do_vmskez_b(uint64_t a) | ||
178 | +{ | ||
179 | + uint64_t m = 0x7f7f7f7f7f7f7f7fULL; | ||
180 | + uint64_t c = ~(((a & m) + m) | a | m); | ||
181 | + c |= c << 7; | ||
182 | + c |= c << 14; | ||
183 | + c |= c << 28; | ||
184 | + return c >> 56; | ||
185 | +} | ||
186 | + | ||
187 | +void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
188 | +{ | ||
189 | + uint16_t temp = 0; | ||
190 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
191 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
192 | + | ||
193 | + temp = do_vmskez_b(Vj->D(0)); | ||
194 | + temp |= (do_vmskez_b(Vj->D(1)) << 8); | ||
195 | + Vd->D(0) = (uint16_t)(~temp); | ||
196 | + Vd->D(1) = 0; | ||
197 | +} | ||
198 | -- | ||
199 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - V{AND/OR/XOR/NOR/ANDN/ORN}.V; | ||
3 | - V{AND/OR/XOR/NOR}I.B. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-22-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 12 +++++ | ||
10 | target/loongarch/helper.h | 2 + | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 56 +++++++++++++++++++++ | ||
12 | target/loongarch/insns.decode | 13 +++++ | ||
13 | target/loongarch/lsx_helper.c | 11 ++++ | ||
14 | 5 files changed, 94 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmskltz_w, vv) | ||
21 | INSN_LSX(vmskltz_d, vv) | ||
22 | INSN_LSX(vmskgez_b, vv) | ||
23 | INSN_LSX(vmsknz_b, vv) | ||
24 | + | ||
25 | +INSN_LSX(vand_v, vvv) | ||
26 | +INSN_LSX(vor_v, vvv) | ||
27 | +INSN_LSX(vxor_v, vvv) | ||
28 | +INSN_LSX(vnor_v, vvv) | ||
29 | +INSN_LSX(vandn_v, vvv) | ||
30 | +INSN_LSX(vorn_v, vvv) | ||
31 | + | ||
32 | +INSN_LSX(vandi_b, vv_i) | ||
33 | +INSN_LSX(vori_b, vv_i) | ||
34 | +INSN_LSX(vxori_b, vv_i) | ||
35 | +INSN_LSX(vnori_b, vv_i) | ||
36 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/loongarch/helper.h | ||
39 | +++ b/target/loongarch/helper.h | ||
40 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vmskltz_w, void, env, i32, i32) | ||
41 | DEF_HELPER_3(vmskltz_d, void, env, i32, i32) | ||
42 | DEF_HELPER_3(vmskgez_b, void, env, i32, i32) | ||
43 | DEF_HELPER_3(vmsknz_b, void, env, i32,i32) | ||
44 | + | ||
45 | +DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
46 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
49 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
50 | @@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w) | ||
51 | TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d) | ||
52 | TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b) | ||
53 | TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b) | ||
54 | + | ||
55 | +TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and) | ||
56 | +TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or) | ||
57 | +TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor) | ||
58 | +TRANS(vnor_v, gvec_vvv, MO_64, tcg_gen_gvec_nor) | ||
59 | + | ||
60 | +static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a) | ||
61 | +{ | ||
62 | + uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
63 | + | ||
64 | + CHECK_SXE; | ||
65 | + | ||
66 | + vd_ofs = vec_full_offset(a->vd); | ||
67 | + vj_ofs = vec_full_offset(a->vj); | ||
68 | + vk_ofs = vec_full_offset(a->vk); | ||
69 | + | ||
70 | + tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8); | ||
71 | + return true; | ||
72 | +} | ||
73 | +TRANS(vorn_v, gvec_vvv, MO_64, tcg_gen_gvec_orc) | ||
74 | +TRANS(vandi_b, gvec_vv_i, MO_8, tcg_gen_gvec_andi) | ||
75 | +TRANS(vori_b, gvec_vv_i, MO_8, tcg_gen_gvec_ori) | ||
76 | +TRANS(vxori_b, gvec_vv_i, MO_8, tcg_gen_gvec_xori) | ||
77 | + | ||
78 | +static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
79 | +{ | ||
80 | + TCGv_vec t1; | ||
81 | + | ||
82 | + t1 = tcg_constant_vec_matching(t, vece, imm); | ||
83 | + tcg_gen_nor_vec(vece, t, a, t1); | ||
84 | +} | ||
85 | + | ||
86 | +static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm) | ||
87 | +{ | ||
88 | + tcg_gen_movi_i64(t, dup_const(MO_8, imm)); | ||
89 | + tcg_gen_nor_i64(t, a, t); | ||
90 | +} | ||
91 | + | ||
92 | +static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
93 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
94 | +{ | ||
95 | + static const TCGOpcode vecop_list[] = { | ||
96 | + INDEX_op_nor_vec, 0 | ||
97 | + }; | ||
98 | + static const GVecGen2i op = { | ||
99 | + .fni8 = gen_vnori_b, | ||
100 | + .fniv = gen_vnori, | ||
101 | + .fnoi = gen_helper_vnori_b, | ||
102 | + .opt_opc = vecop_list, | ||
103 | + .vece = MO_8 | ||
104 | + }; | ||
105 | + | ||
106 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op); | ||
107 | +} | ||
108 | + | ||
109 | +TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b) | ||
110 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/target/loongarch/insns.decode | ||
113 | +++ b/target/loongarch/insns.decode | ||
114 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
115 | @vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i | ||
116 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
117 | @vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i | ||
118 | +@vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i | ||
119 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
120 | |||
121 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
122 | @@ -XXX,XX +XXX,XX @@ vmskltz_w 0111 00101001 11000 10010 ..... ..... @vv | ||
123 | vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv | ||
124 | vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv | ||
125 | vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv | ||
126 | + | ||
127 | +vand_v 0111 00010010 01100 ..... ..... ..... @vvv | ||
128 | +vor_v 0111 00010010 01101 ..... ..... ..... @vvv | ||
129 | +vxor_v 0111 00010010 01110 ..... ..... ..... @vvv | ||
130 | +vnor_v 0111 00010010 01111 ..... ..... ..... @vvv | ||
131 | +vandn_v 0111 00010010 10000 ..... ..... ..... @vvv | ||
132 | +vorn_v 0111 00010010 10001 ..... ..... ..... @vvv | ||
133 | + | ||
134 | +vandi_b 0111 00111101 00 ........ ..... ..... @vv_ui8 | ||
135 | +vori_b 0111 00111101 01 ........ ..... ..... @vv_ui8 | ||
136 | +vxori_b 0111 00111101 10 ........ ..... ..... @vv_ui8 | ||
137 | +vnori_b 0111 00111101 11 ........ ..... ..... @vv_ui8 | ||
138 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
139 | index XXXXXXX..XXXXXXX 100644 | ||
140 | --- a/target/loongarch/lsx_helper.c | ||
141 | +++ b/target/loongarch/lsx_helper.c | ||
142 | @@ -XXX,XX +XXX,XX @@ void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
143 | Vd->D(0) = (uint16_t)(~temp); | ||
144 | Vd->D(1) = 0; | ||
145 | } | ||
146 | + | ||
147 | +void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
148 | +{ | ||
149 | + int i; | ||
150 | + VReg *Vd = (VReg *)vd; | ||
151 | + VReg *Vj = (VReg *)vj; | ||
152 | + | ||
153 | + for (i = 0; i < LSX_LEN/8; i++) { | ||
154 | + Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); | ||
155 | + } | ||
156 | +} | ||
157 | -- | ||
158 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSLL[I].{B/H/W/D}; | ||
3 | - VSRL[I].{B/H/W/D}; | ||
4 | - VSRA[I].{B/H/W/D}; | ||
5 | - VROTR[I].{B/H/W/D}. | ||
6 | 1 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230504122810.4094787-23-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/disas.c | 36 +++++++++++++++++++++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 36 +++++++++++++++++++++ | ||
13 | target/loongarch/insns.decode | 36 +++++++++++++++++++++ | ||
14 | 3 files changed, 108 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vandi_b, vv_i) | ||
21 | INSN_LSX(vori_b, vv_i) | ||
22 | INSN_LSX(vxori_b, vv_i) | ||
23 | INSN_LSX(vnori_b, vv_i) | ||
24 | + | ||
25 | +INSN_LSX(vsll_b, vvv) | ||
26 | +INSN_LSX(vsll_h, vvv) | ||
27 | +INSN_LSX(vsll_w, vvv) | ||
28 | +INSN_LSX(vsll_d, vvv) | ||
29 | +INSN_LSX(vslli_b, vv_i) | ||
30 | +INSN_LSX(vslli_h, vv_i) | ||
31 | +INSN_LSX(vslli_w, vv_i) | ||
32 | +INSN_LSX(vslli_d, vv_i) | ||
33 | + | ||
34 | +INSN_LSX(vsrl_b, vvv) | ||
35 | +INSN_LSX(vsrl_h, vvv) | ||
36 | +INSN_LSX(vsrl_w, vvv) | ||
37 | +INSN_LSX(vsrl_d, vvv) | ||
38 | +INSN_LSX(vsrli_b, vv_i) | ||
39 | +INSN_LSX(vsrli_h, vv_i) | ||
40 | +INSN_LSX(vsrli_w, vv_i) | ||
41 | +INSN_LSX(vsrli_d, vv_i) | ||
42 | + | ||
43 | +INSN_LSX(vsra_b, vvv) | ||
44 | +INSN_LSX(vsra_h, vvv) | ||
45 | +INSN_LSX(vsra_w, vvv) | ||
46 | +INSN_LSX(vsra_d, vvv) | ||
47 | +INSN_LSX(vsrai_b, vv_i) | ||
48 | +INSN_LSX(vsrai_h, vv_i) | ||
49 | +INSN_LSX(vsrai_w, vv_i) | ||
50 | +INSN_LSX(vsrai_d, vv_i) | ||
51 | + | ||
52 | +INSN_LSX(vrotr_b, vvv) | ||
53 | +INSN_LSX(vrotr_h, vvv) | ||
54 | +INSN_LSX(vrotr_w, vvv) | ||
55 | +INSN_LSX(vrotr_d, vvv) | ||
56 | +INSN_LSX(vrotri_b, vv_i) | ||
57 | +INSN_LSX(vrotri_h, vv_i) | ||
58 | +INSN_LSX(vrotri_w, vv_i) | ||
59 | +INSN_LSX(vrotri_d, vv_i) | ||
60 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
63 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
64 | @@ -XXX,XX +XXX,XX @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
65 | } | ||
66 | |||
67 | TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b) | ||
68 | + | ||
69 | +TRANS(vsll_b, gvec_vvv, MO_8, tcg_gen_gvec_shlv) | ||
70 | +TRANS(vsll_h, gvec_vvv, MO_16, tcg_gen_gvec_shlv) | ||
71 | +TRANS(vsll_w, gvec_vvv, MO_32, tcg_gen_gvec_shlv) | ||
72 | +TRANS(vsll_d, gvec_vvv, MO_64, tcg_gen_gvec_shlv) | ||
73 | +TRANS(vslli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shli) | ||
74 | +TRANS(vslli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shli) | ||
75 | +TRANS(vslli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shli) | ||
76 | +TRANS(vslli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shli) | ||
77 | + | ||
78 | +TRANS(vsrl_b, gvec_vvv, MO_8, tcg_gen_gvec_shrv) | ||
79 | +TRANS(vsrl_h, gvec_vvv, MO_16, tcg_gen_gvec_shrv) | ||
80 | +TRANS(vsrl_w, gvec_vvv, MO_32, tcg_gen_gvec_shrv) | ||
81 | +TRANS(vsrl_d, gvec_vvv, MO_64, tcg_gen_gvec_shrv) | ||
82 | +TRANS(vsrli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shri) | ||
83 | +TRANS(vsrli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shri) | ||
84 | +TRANS(vsrli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shri) | ||
85 | +TRANS(vsrli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shri) | ||
86 | + | ||
87 | +TRANS(vsra_b, gvec_vvv, MO_8, tcg_gen_gvec_sarv) | ||
88 | +TRANS(vsra_h, gvec_vvv, MO_16, tcg_gen_gvec_sarv) | ||
89 | +TRANS(vsra_w, gvec_vvv, MO_32, tcg_gen_gvec_sarv) | ||
90 | +TRANS(vsra_d, gvec_vvv, MO_64, tcg_gen_gvec_sarv) | ||
91 | +TRANS(vsrai_b, gvec_vv_i, MO_8, tcg_gen_gvec_sari) | ||
92 | +TRANS(vsrai_h, gvec_vv_i, MO_16, tcg_gen_gvec_sari) | ||
93 | +TRANS(vsrai_w, gvec_vv_i, MO_32, tcg_gen_gvec_sari) | ||
94 | +TRANS(vsrai_d, gvec_vv_i, MO_64, tcg_gen_gvec_sari) | ||
95 | + | ||
96 | +TRANS(vrotr_b, gvec_vvv, MO_8, tcg_gen_gvec_rotrv) | ||
97 | +TRANS(vrotr_h, gvec_vvv, MO_16, tcg_gen_gvec_rotrv) | ||
98 | +TRANS(vrotr_w, gvec_vvv, MO_32, tcg_gen_gvec_rotrv) | ||
99 | +TRANS(vrotr_d, gvec_vvv, MO_64, tcg_gen_gvec_rotrv) | ||
100 | +TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri) | ||
101 | +TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri) | ||
102 | +TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri) | ||
103 | +TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri) | ||
104 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/loongarch/insns.decode | ||
107 | +++ b/target/loongarch/insns.decode | ||
108 | @@ -XXX,XX +XXX,XX @@ vandi_b 0111 00111101 00 ........ ..... ..... @vv_ui8 | ||
109 | vori_b 0111 00111101 01 ........ ..... ..... @vv_ui8 | ||
110 | vxori_b 0111 00111101 10 ........ ..... ..... @vv_ui8 | ||
111 | vnori_b 0111 00111101 11 ........ ..... ..... @vv_ui8 | ||
112 | + | ||
113 | +vsll_b 0111 00001110 10000 ..... ..... ..... @vvv | ||
114 | +vsll_h 0111 00001110 10001 ..... ..... ..... @vvv | ||
115 | +vsll_w 0111 00001110 10010 ..... ..... ..... @vvv | ||
116 | +vsll_d 0111 00001110 10011 ..... ..... ..... @vvv | ||
117 | +vslli_b 0111 00110010 11000 01 ... ..... ..... @vv_ui3 | ||
118 | +vslli_h 0111 00110010 11000 1 .... ..... ..... @vv_ui4 | ||
119 | +vslli_w 0111 00110010 11001 ..... ..... ..... @vv_ui5 | ||
120 | +vslli_d 0111 00110010 1101 ...... ..... ..... @vv_ui6 | ||
121 | + | ||
122 | +vsrl_b 0111 00001110 10100 ..... ..... ..... @vvv | ||
123 | +vsrl_h 0111 00001110 10101 ..... ..... ..... @vvv | ||
124 | +vsrl_w 0111 00001110 10110 ..... ..... ..... @vvv | ||
125 | +vsrl_d 0111 00001110 10111 ..... ..... ..... @vvv | ||
126 | +vsrli_b 0111 00110011 00000 01 ... ..... ..... @vv_ui3 | ||
127 | +vsrli_h 0111 00110011 00000 1 .... ..... ..... @vv_ui4 | ||
128 | +vsrli_w 0111 00110011 00001 ..... ..... ..... @vv_ui5 | ||
129 | +vsrli_d 0111 00110011 0001 ...... ..... ..... @vv_ui6 | ||
130 | + | ||
131 | +vsra_b 0111 00001110 11000 ..... ..... ..... @vvv | ||
132 | +vsra_h 0111 00001110 11001 ..... ..... ..... @vvv | ||
133 | +vsra_w 0111 00001110 11010 ..... ..... ..... @vvv | ||
134 | +vsra_d 0111 00001110 11011 ..... ..... ..... @vvv | ||
135 | +vsrai_b 0111 00110011 01000 01 ... ..... ..... @vv_ui3 | ||
136 | +vsrai_h 0111 00110011 01000 1 .... ..... ..... @vv_ui4 | ||
137 | +vsrai_w 0111 00110011 01001 ..... ..... ..... @vv_ui5 | ||
138 | +vsrai_d 0111 00110011 0101 ...... ..... ..... @vv_ui6 | ||
139 | + | ||
140 | +vrotr_b 0111 00001110 11100 ..... ..... ..... @vvv | ||
141 | +vrotr_h 0111 00001110 11101 ..... ..... ..... @vvv | ||
142 | +vrotr_w 0111 00001110 11110 ..... ..... ..... @vvv | ||
143 | +vrotr_d 0111 00001110 11111 ..... ..... ..... @vvv | ||
144 | +vrotri_b 0111 00101010 00000 01 ... ..... ..... @vv_ui3 | ||
145 | +vrotri_h 0111 00101010 00000 1 .... ..... ..... @vv_ui4 | ||
146 | +vrotri_w 0111 00101010 00001 ..... ..... ..... @vv_ui5 | ||
147 | +vrotri_d 0111 00101010 0001 ...... ..... ..... @vv_ui6 | ||
148 | -- | ||
149 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSLLWIL.{H.B/W.H/D.W}; | ||
3 | - VSLLWIL.{HU.BU/WU.HU/DU.WU}; | ||
4 | - VEXTL.Q.D, VEXTL.QU.DU. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-24-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 9 +++++ | ||
11 | target/loongarch/helper.h | 9 +++++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 21 +++++++++++ | ||
13 | target/loongarch/insns.decode | 9 +++++ | ||
14 | target/loongarch/lsx_helper.c | 41 +++++++++++++++++++++ | ||
15 | 5 files changed, 89 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/disas.c | ||
20 | +++ b/target/loongarch/disas.c | ||
21 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vrotri_b, vv_i) | ||
22 | INSN_LSX(vrotri_h, vv_i) | ||
23 | INSN_LSX(vrotri_w, vv_i) | ||
24 | INSN_LSX(vrotri_d, vv_i) | ||
25 | + | ||
26 | +INSN_LSX(vsllwil_h_b, vv_i) | ||
27 | +INSN_LSX(vsllwil_w_h, vv_i) | ||
28 | +INSN_LSX(vsllwil_d_w, vv_i) | ||
29 | +INSN_LSX(vextl_q_d, vv) | ||
30 | +INSN_LSX(vsllwil_hu_bu, vv_i) | ||
31 | +INSN_LSX(vsllwil_wu_hu, vv_i) | ||
32 | +INSN_LSX(vsllwil_du_wu, vv_i) | ||
33 | +INSN_LSX(vextl_qu_du, vv) | ||
34 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/loongarch/helper.h | ||
37 | +++ b/target/loongarch/helper.h | ||
38 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vmskgez_b, void, env, i32, i32) | ||
39 | DEF_HELPER_3(vmsknz_b, void, env, i32,i32) | ||
40 | |||
41 | DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
42 | + | ||
43 | +DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32) | ||
44 | +DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32) | ||
45 | +DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32) | ||
46 | +DEF_HELPER_3(vextl_q_d, void, env, i32, i32) | ||
47 | +DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32) | ||
48 | +DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32) | ||
49 | +DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32) | ||
50 | +DEF_HELPER_3(vextl_qu_du, void, env, i32, i32) | ||
51 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
54 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
55 | @@ -XXX,XX +XXX,XX @@ static bool gen_vv(DisasContext *ctx, arg_vv *a, | ||
56 | return true; | ||
57 | } | ||
58 | |||
59 | +static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, | ||
60 | + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) | ||
61 | +{ | ||
62 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
63 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
64 | + TCGv_i32 imm = tcg_constant_i32(a->imm); | ||
65 | + | ||
66 | + CHECK_SXE; | ||
67 | + func(cpu_env, vd, vj, imm); | ||
68 | + return true; | ||
69 | +} | ||
70 | + | ||
71 | static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, | ||
72 | void (*func)(unsigned, uint32_t, uint32_t, | ||
73 | uint32_t, uint32_t, uint32_t)) | ||
74 | @@ -XXX,XX +XXX,XX @@ TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri) | ||
75 | TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri) | ||
76 | TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri) | ||
77 | TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri) | ||
78 | + | ||
79 | +TRANS(vsllwil_h_b, gen_vv_i, gen_helper_vsllwil_h_b) | ||
80 | +TRANS(vsllwil_w_h, gen_vv_i, gen_helper_vsllwil_w_h) | ||
81 | +TRANS(vsllwil_d_w, gen_vv_i, gen_helper_vsllwil_d_w) | ||
82 | +TRANS(vextl_q_d, gen_vv, gen_helper_vextl_q_d) | ||
83 | +TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu) | ||
84 | +TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu) | ||
85 | +TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu) | ||
86 | +TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du) | ||
87 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/loongarch/insns.decode | ||
90 | +++ b/target/loongarch/insns.decode | ||
91 | @@ -XXX,XX +XXX,XX @@ vrotri_b 0111 00101010 00000 01 ... ..... ..... @vv_ui3 | ||
92 | vrotri_h 0111 00101010 00000 1 .... ..... ..... @vv_ui4 | ||
93 | vrotri_w 0111 00101010 00001 ..... ..... ..... @vv_ui5 | ||
94 | vrotri_d 0111 00101010 0001 ...... ..... ..... @vv_ui6 | ||
95 | + | ||
96 | +vsllwil_h_b 0111 00110000 10000 01 ... ..... ..... @vv_ui3 | ||
97 | +vsllwil_w_h 0111 00110000 10000 1 .... ..... ..... @vv_ui4 | ||
98 | +vsllwil_d_w 0111 00110000 10001 ..... ..... ..... @vv_ui5 | ||
99 | +vextl_q_d 0111 00110000 10010 00000 ..... ..... @vv | ||
100 | +vsllwil_hu_bu 0111 00110000 11000 01 ... ..... ..... @vv_ui3 | ||
101 | +vsllwil_wu_hu 0111 00110000 11000 1 .... ..... ..... @vv_ui4 | ||
102 | +vsllwil_du_wu 0111 00110000 11001 ..... ..... ..... @vv_ui5 | ||
103 | +vextl_qu_du 0111 00110000 11010 00000 ..... ..... @vv | ||
104 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/target/loongarch/lsx_helper.c | ||
107 | +++ b/target/loongarch/lsx_helper.c | ||
108 | @@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) | ||
109 | Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); | ||
110 | } | ||
111 | } | ||
112 | + | ||
113 | +#define VSLLWIL(NAME, BIT, E1, E2) \ | ||
114 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
115 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
116 | +{ \ | ||
117 | + int i; \ | ||
118 | + VReg temp; \ | ||
119 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
120 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
121 | + typedef __typeof(temp.E1(0)) TD; \ | ||
122 | + \ | ||
123 | + temp.D(0) = 0; \ | ||
124 | + temp.D(1) = 0; \ | ||
125 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
126 | + temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ | ||
127 | + } \ | ||
128 | + *Vd = temp; \ | ||
129 | +} | ||
130 | + | ||
131 | +void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
132 | +{ | ||
133 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
134 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
135 | + | ||
136 | + Vd->Q(0) = int128_makes64(Vj->D(0)); | ||
137 | +} | ||
138 | + | ||
139 | +void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
140 | +{ | ||
141 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
142 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
143 | + | ||
144 | + Vd->Q(0) = int128_make64(Vj->D(0)); | ||
145 | +} | ||
146 | + | ||
147 | +VSLLWIL(vsllwil_h_b, 16, H, B) | ||
148 | +VSLLWIL(vsllwil_w_h, 32, W, H) | ||
149 | +VSLLWIL(vsllwil_d_w, 64, D, W) | ||
150 | +VSLLWIL(vsllwil_hu_bu, 16, UH, UB) | ||
151 | +VSLLWIL(vsllwil_wu_hu, 32, UW, UH) | ||
152 | +VSLLWIL(vsllwil_du_wu, 64, UD, UW) | ||
153 | -- | ||
154 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSRLR[I].{B/H/W/D}; | ||
3 | - VSRAR[I].{B/H/W/D}. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-25-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 18 ++++ | ||
10 | target/loongarch/helper.h | 18 ++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 18 ++++ | ||
12 | target/loongarch/insns.decode | 18 ++++ | ||
13 | target/loongarch/lsx_helper.c | 104 ++++++++++++++++++++ | ||
14 | 5 files changed, 176 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsllwil_hu_bu, vv_i) | ||
21 | INSN_LSX(vsllwil_wu_hu, vv_i) | ||
22 | INSN_LSX(vsllwil_du_wu, vv_i) | ||
23 | INSN_LSX(vextl_qu_du, vv) | ||
24 | + | ||
25 | +INSN_LSX(vsrlr_b, vvv) | ||
26 | +INSN_LSX(vsrlr_h, vvv) | ||
27 | +INSN_LSX(vsrlr_w, vvv) | ||
28 | +INSN_LSX(vsrlr_d, vvv) | ||
29 | +INSN_LSX(vsrlri_b, vv_i) | ||
30 | +INSN_LSX(vsrlri_h, vv_i) | ||
31 | +INSN_LSX(vsrlri_w, vv_i) | ||
32 | +INSN_LSX(vsrlri_d, vv_i) | ||
33 | + | ||
34 | +INSN_LSX(vsrar_b, vvv) | ||
35 | +INSN_LSX(vsrar_h, vvv) | ||
36 | +INSN_LSX(vsrar_w, vvv) | ||
37 | +INSN_LSX(vsrar_d, vvv) | ||
38 | +INSN_LSX(vsrari_b, vv_i) | ||
39 | +INSN_LSX(vsrari_h, vv_i) | ||
40 | +INSN_LSX(vsrari_w, vv_i) | ||
41 | +INSN_LSX(vsrari_d, vv_i) | ||
42 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/loongarch/helper.h | ||
45 | +++ b/target/loongarch/helper.h | ||
46 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32) | ||
47 | DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32) | ||
48 | DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32) | ||
49 | DEF_HELPER_3(vextl_qu_du, void, env, i32, i32) | ||
50 | + | ||
51 | +DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32) | ||
52 | +DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32) | ||
53 | +DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32) | ||
55 | +DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32) | ||
56 | +DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32) | ||
57 | +DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32) | ||
58 | +DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32) | ||
59 | + | ||
60 | +DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32) | ||
61 | +DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32) | ||
66 | +DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32) | ||
67 | +DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32) | ||
68 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
71 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
72 | @@ -XXX,XX +XXX,XX @@ TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu) | ||
73 | TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu) | ||
74 | TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu) | ||
75 | TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du) | ||
76 | + | ||
77 | +TRANS(vsrlr_b, gen_vvv, gen_helper_vsrlr_b) | ||
78 | +TRANS(vsrlr_h, gen_vvv, gen_helper_vsrlr_h) | ||
79 | +TRANS(vsrlr_w, gen_vvv, gen_helper_vsrlr_w) | ||
80 | +TRANS(vsrlr_d, gen_vvv, gen_helper_vsrlr_d) | ||
81 | +TRANS(vsrlri_b, gen_vv_i, gen_helper_vsrlri_b) | ||
82 | +TRANS(vsrlri_h, gen_vv_i, gen_helper_vsrlri_h) | ||
83 | +TRANS(vsrlri_w, gen_vv_i, gen_helper_vsrlri_w) | ||
84 | +TRANS(vsrlri_d, gen_vv_i, gen_helper_vsrlri_d) | ||
85 | + | ||
86 | +TRANS(vsrar_b, gen_vvv, gen_helper_vsrar_b) | ||
87 | +TRANS(vsrar_h, gen_vvv, gen_helper_vsrar_h) | ||
88 | +TRANS(vsrar_w, gen_vvv, gen_helper_vsrar_w) | ||
89 | +TRANS(vsrar_d, gen_vvv, gen_helper_vsrar_d) | ||
90 | +TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b) | ||
91 | +TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h) | ||
92 | +TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w) | ||
93 | +TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d) | ||
94 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
95 | index XXXXXXX..XXXXXXX 100644 | ||
96 | --- a/target/loongarch/insns.decode | ||
97 | +++ b/target/loongarch/insns.decode | ||
98 | @@ -XXX,XX +XXX,XX @@ vsllwil_hu_bu 0111 00110000 11000 01 ... ..... ..... @vv_ui3 | ||
99 | vsllwil_wu_hu 0111 00110000 11000 1 .... ..... ..... @vv_ui4 | ||
100 | vsllwil_du_wu 0111 00110000 11001 ..... ..... ..... @vv_ui5 | ||
101 | vextl_qu_du 0111 00110000 11010 00000 ..... ..... @vv | ||
102 | + | ||
103 | +vsrlr_b 0111 00001111 00000 ..... ..... ..... @vvv | ||
104 | +vsrlr_h 0111 00001111 00001 ..... ..... ..... @vvv | ||
105 | +vsrlr_w 0111 00001111 00010 ..... ..... ..... @vvv | ||
106 | +vsrlr_d 0111 00001111 00011 ..... ..... ..... @vvv | ||
107 | +vsrlri_b 0111 00101010 01000 01 ... ..... ..... @vv_ui3 | ||
108 | +vsrlri_h 0111 00101010 01000 1 .... ..... ..... @vv_ui4 | ||
109 | +vsrlri_w 0111 00101010 01001 ..... ..... ..... @vv_ui5 | ||
110 | +vsrlri_d 0111 00101010 0101 ...... ..... ..... @vv_ui6 | ||
111 | + | ||
112 | +vsrar_b 0111 00001111 00100 ..... ..... ..... @vvv | ||
113 | +vsrar_h 0111 00001111 00101 ..... ..... ..... @vvv | ||
114 | +vsrar_w 0111 00001111 00110 ..... ..... ..... @vvv | ||
115 | +vsrar_d 0111 00001111 00111 ..... ..... ..... @vvv | ||
116 | +vsrari_b 0111 00101010 10000 01 ... ..... ..... @vv_ui3 | ||
117 | +vsrari_h 0111 00101010 10000 1 .... ..... ..... @vv_ui4 | ||
118 | +vsrari_w 0111 00101010 10001 ..... ..... ..... @vv_ui5 | ||
119 | +vsrari_d 0111 00101010 1001 ...... ..... ..... @vv_ui6 | ||
120 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/target/loongarch/lsx_helper.c | ||
123 | +++ b/target/loongarch/lsx_helper.c | ||
124 | @@ -XXX,XX +XXX,XX @@ VSLLWIL(vsllwil_d_w, 64, D, W) | ||
125 | VSLLWIL(vsllwil_hu_bu, 16, UH, UB) | ||
126 | VSLLWIL(vsllwil_wu_hu, 32, UW, UH) | ||
127 | VSLLWIL(vsllwil_du_wu, 64, UD, UW) | ||
128 | + | ||
129 | +#define do_vsrlr(E, T) \ | ||
130 | +static T do_vsrlr_ ##E(T s1, int sh) \ | ||
131 | +{ \ | ||
132 | + if (sh == 0) { \ | ||
133 | + return s1; \ | ||
134 | + } else { \ | ||
135 | + return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ | ||
136 | + } \ | ||
137 | +} | ||
138 | + | ||
139 | +do_vsrlr(B, uint8_t) | ||
140 | +do_vsrlr(H, uint16_t) | ||
141 | +do_vsrlr(W, uint32_t) | ||
142 | +do_vsrlr(D, uint64_t) | ||
143 | + | ||
144 | +#define VSRLR(NAME, BIT, T, E) \ | ||
145 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
146 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
147 | +{ \ | ||
148 | + int i; \ | ||
149 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
150 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
151 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
152 | + \ | ||
153 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
154 | + Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
155 | + } \ | ||
156 | +} | ||
157 | + | ||
158 | +VSRLR(vsrlr_b, 8, uint8_t, B) | ||
159 | +VSRLR(vsrlr_h, 16, uint16_t, H) | ||
160 | +VSRLR(vsrlr_w, 32, uint32_t, W) | ||
161 | +VSRLR(vsrlr_d, 64, uint64_t, D) | ||
162 | + | ||
163 | +#define VSRLRI(NAME, BIT, E) \ | ||
164 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
165 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
166 | +{ \ | ||
167 | + int i; \ | ||
168 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
169 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
170 | + \ | ||
171 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
172 | + Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ | ||
173 | + } \ | ||
174 | +} | ||
175 | + | ||
176 | +VSRLRI(vsrlri_b, 8, B) | ||
177 | +VSRLRI(vsrlri_h, 16, H) | ||
178 | +VSRLRI(vsrlri_w, 32, W) | ||
179 | +VSRLRI(vsrlri_d, 64, D) | ||
180 | + | ||
181 | +#define do_vsrar(E, T) \ | ||
182 | +static T do_vsrar_ ##E(T s1, int sh) \ | ||
183 | +{ \ | ||
184 | + if (sh == 0) { \ | ||
185 | + return s1; \ | ||
186 | + } else { \ | ||
187 | + return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ | ||
188 | + } \ | ||
189 | +} | ||
190 | + | ||
191 | +do_vsrar(B, int8_t) | ||
192 | +do_vsrar(H, int16_t) | ||
193 | +do_vsrar(W, int32_t) | ||
194 | +do_vsrar(D, int64_t) | ||
195 | + | ||
196 | +#define VSRAR(NAME, BIT, T, E) \ | ||
197 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
198 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
199 | +{ \ | ||
200 | + int i; \ | ||
201 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
202 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
203 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
204 | + \ | ||
205 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
206 | + Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ | ||
207 | + } \ | ||
208 | +} | ||
209 | + | ||
210 | +VSRAR(vsrar_b, 8, uint8_t, B) | ||
211 | +VSRAR(vsrar_h, 16, uint16_t, H) | ||
212 | +VSRAR(vsrar_w, 32, uint32_t, W) | ||
213 | +VSRAR(vsrar_d, 64, uint64_t, D) | ||
214 | + | ||
215 | +#define VSRARI(NAME, BIT, E) \ | ||
216 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
217 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
218 | +{ \ | ||
219 | + int i; \ | ||
220 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
221 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
222 | + \ | ||
223 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
224 | + Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ | ||
225 | + } \ | ||
226 | +} | ||
227 | + | ||
228 | +VSRARI(vsrari_b, 8, B) | ||
229 | +VSRARI(vsrari_h, 16, H) | ||
230 | +VSRARI(vsrari_w, 32, W) | ||
231 | +VSRARI(vsrari_d, 64, D) | ||
232 | -- | ||
233 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSRLN.{B.H/H.W/W.D}; | ||
3 | - VSRAN.{B.H/H.W/W.D}; | ||
4 | - VSRLNI.{B.H/H.W/W.D/D.Q}; | ||
5 | - VSRANI.{B.H/H.W/W.D/D.Q}. | ||
6 | 1 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230504122810.4094787-26-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/disas.c | 16 +++ | ||
12 | target/loongarch/helper.h | 16 +++ | ||
13 | target/loongarch/insn_trans/trans_lsx.c.inc | 16 +++ | ||
14 | target/loongarch/insns.decode | 17 +++ | ||
15 | target/loongarch/lsx_helper.c | 114 ++++++++++++++++++++ | ||
16 | 5 files changed, 179 insertions(+) | ||
17 | |||
18 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/loongarch/disas.c | ||
21 | +++ b/target/loongarch/disas.c | ||
22 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrari_b, vv_i) | ||
23 | INSN_LSX(vsrari_h, vv_i) | ||
24 | INSN_LSX(vsrari_w, vv_i) | ||
25 | INSN_LSX(vsrari_d, vv_i) | ||
26 | + | ||
27 | +INSN_LSX(vsrln_b_h, vvv) | ||
28 | +INSN_LSX(vsrln_h_w, vvv) | ||
29 | +INSN_LSX(vsrln_w_d, vvv) | ||
30 | +INSN_LSX(vsran_b_h, vvv) | ||
31 | +INSN_LSX(vsran_h_w, vvv) | ||
32 | +INSN_LSX(vsran_w_d, vvv) | ||
33 | + | ||
34 | +INSN_LSX(vsrlni_b_h, vv_i) | ||
35 | +INSN_LSX(vsrlni_h_w, vv_i) | ||
36 | +INSN_LSX(vsrlni_w_d, vv_i) | ||
37 | +INSN_LSX(vsrlni_d_q, vv_i) | ||
38 | +INSN_LSX(vsrani_b_h, vv_i) | ||
39 | +INSN_LSX(vsrani_h_w, vv_i) | ||
40 | +INSN_LSX(vsrani_w_d, vv_i) | ||
41 | +INSN_LSX(vsrani_d_q, vv_i) | ||
42 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/loongarch/helper.h | ||
45 | +++ b/target/loongarch/helper.h | ||
46 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32) | ||
47 | DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32) | ||
48 | DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32) | ||
49 | DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32) | ||
50 | + | ||
51 | +DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32) | ||
52 | +DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32) | ||
53 | +DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32) | ||
55 | +DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32) | ||
56 | +DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32) | ||
57 | + | ||
58 | +DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32) | ||
59 | +DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32) | ||
60 | +DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32) | ||
61 | +DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32) | ||
66 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
69 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
70 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b) | ||
71 | TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h) | ||
72 | TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w) | ||
73 | TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d) | ||
74 | + | ||
75 | +TRANS(vsrln_b_h, gen_vvv, gen_helper_vsrln_b_h) | ||
76 | +TRANS(vsrln_h_w, gen_vvv, gen_helper_vsrln_h_w) | ||
77 | +TRANS(vsrln_w_d, gen_vvv, gen_helper_vsrln_w_d) | ||
78 | +TRANS(vsran_b_h, gen_vvv, gen_helper_vsran_b_h) | ||
79 | +TRANS(vsran_h_w, gen_vvv, gen_helper_vsran_h_w) | ||
80 | +TRANS(vsran_w_d, gen_vvv, gen_helper_vsran_w_d) | ||
81 | + | ||
82 | +TRANS(vsrlni_b_h, gen_vv_i, gen_helper_vsrlni_b_h) | ||
83 | +TRANS(vsrlni_h_w, gen_vv_i, gen_helper_vsrlni_h_w) | ||
84 | +TRANS(vsrlni_w_d, gen_vv_i, gen_helper_vsrlni_w_d) | ||
85 | +TRANS(vsrlni_d_q, gen_vv_i, gen_helper_vsrlni_d_q) | ||
86 | +TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h) | ||
87 | +TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w) | ||
88 | +TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d) | ||
89 | +TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q) | ||
90 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/target/loongarch/insns.decode | ||
93 | +++ b/target/loongarch/insns.decode | ||
94 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
95 | @vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i | ||
96 | @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i | ||
97 | @vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i | ||
98 | +@vv_ui7 .... ........ ... imm:7 vj:5 vd:5 &vv_i | ||
99 | @vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i | ||
100 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
101 | |||
102 | @@ -XXX,XX +XXX,XX @@ vsrari_b 0111 00101010 10000 01 ... ..... ..... @vv_ui3 | ||
103 | vsrari_h 0111 00101010 10000 1 .... ..... ..... @vv_ui4 | ||
104 | vsrari_w 0111 00101010 10001 ..... ..... ..... @vv_ui5 | ||
105 | vsrari_d 0111 00101010 1001 ...... ..... ..... @vv_ui6 | ||
106 | + | ||
107 | +vsrln_b_h 0111 00001111 01001 ..... ..... ..... @vvv | ||
108 | +vsrln_h_w 0111 00001111 01010 ..... ..... ..... @vvv | ||
109 | +vsrln_w_d 0111 00001111 01011 ..... ..... ..... @vvv | ||
110 | +vsran_b_h 0111 00001111 01101 ..... ..... ..... @vvv | ||
111 | +vsran_h_w 0111 00001111 01110 ..... ..... ..... @vvv | ||
112 | +vsran_w_d 0111 00001111 01111 ..... ..... ..... @vvv | ||
113 | + | ||
114 | +vsrlni_b_h 0111 00110100 00000 1 .... ..... ..... @vv_ui4 | ||
115 | +vsrlni_h_w 0111 00110100 00001 ..... ..... ..... @vv_ui5 | ||
116 | +vsrlni_w_d 0111 00110100 0001 ...... ..... ..... @vv_ui6 | ||
117 | +vsrlni_d_q 0111 00110100 001 ....... ..... ..... @vv_ui7 | ||
118 | +vsrani_b_h 0111 00110101 10000 1 .... ..... ..... @vv_ui4 | ||
119 | +vsrani_h_w 0111 00110101 10001 ..... ..... ..... @vv_ui5 | ||
120 | +vsrani_w_d 0111 00110101 1001 ...... ..... ..... @vv_ui6 | ||
121 | +vsrani_d_q 0111 00110101 101 ....... ..... ..... @vv_ui7 | ||
122 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
123 | index XXXXXXX..XXXXXXX 100644 | ||
124 | --- a/target/loongarch/lsx_helper.c | ||
125 | +++ b/target/loongarch/lsx_helper.c | ||
126 | @@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_b, 8, B) | ||
127 | VSRARI(vsrari_h, 16, H) | ||
128 | VSRARI(vsrari_w, 32, W) | ||
129 | VSRARI(vsrari_d, 64, D) | ||
130 | + | ||
131 | +#define R_SHIFT(a, b) (a >> b) | ||
132 | + | ||
133 | +#define VSRLN(NAME, BIT, T, E1, E2) \ | ||
134 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
135 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
136 | +{ \ | ||
137 | + int i; \ | ||
138 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
139 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
140 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
141 | + \ | ||
142 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
143 | + Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ | ||
144 | + } \ | ||
145 | + Vd->D(1) = 0; \ | ||
146 | +} | ||
147 | + | ||
148 | +VSRLN(vsrln_b_h, 16, uint16_t, B, H) | ||
149 | +VSRLN(vsrln_h_w, 32, uint32_t, H, W) | ||
150 | +VSRLN(vsrln_w_d, 64, uint64_t, W, D) | ||
151 | + | ||
152 | +#define VSRAN(NAME, BIT, T, E1, E2) \ | ||
153 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
154 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
155 | +{ \ | ||
156 | + int i; \ | ||
157 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
158 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
159 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
160 | + \ | ||
161 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
162 | + Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ | ||
163 | + } \ | ||
164 | + Vd->D(1) = 0; \ | ||
165 | +} | ||
166 | + | ||
167 | +VSRAN(vsran_b_h, 16, uint16_t, B, H) | ||
168 | +VSRAN(vsran_h_w, 32, uint32_t, H, W) | ||
169 | +VSRAN(vsran_w_d, 64, uint64_t, W, D) | ||
170 | + | ||
171 | +#define VSRLNI(NAME, BIT, T, E1, E2) \ | ||
172 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
173 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
174 | +{ \ | ||
175 | + int i, max; \ | ||
176 | + VReg temp; \ | ||
177 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
178 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
179 | + \ | ||
180 | + temp.D(0) = 0; \ | ||
181 | + temp.D(1) = 0; \ | ||
182 | + max = LSX_LEN/BIT; \ | ||
183 | + for (i = 0; i < max; i++) { \ | ||
184 | + temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ | ||
185 | + temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ | ||
186 | + } \ | ||
187 | + *Vd = temp; \ | ||
188 | +} | ||
189 | + | ||
190 | +void HELPER(vsrlni_d_q)(CPULoongArchState *env, | ||
191 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
192 | +{ | ||
193 | + VReg temp; | ||
194 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
195 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
196 | + | ||
197 | + temp.D(0) = 0; | ||
198 | + temp.D(1) = 0; | ||
199 | + temp.D(0) = int128_urshift(Vj->Q(0), imm % 128); | ||
200 | + temp.D(1) = int128_urshift(Vd->Q(0), imm % 128); | ||
201 | + *Vd = temp; | ||
202 | +} | ||
203 | + | ||
204 | +VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) | ||
205 | +VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) | ||
206 | +VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) | ||
207 | + | ||
208 | +#define VSRANI(NAME, BIT, E1, E2) \ | ||
209 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
210 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
211 | +{ \ | ||
212 | + int i, max; \ | ||
213 | + VReg temp; \ | ||
214 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
215 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
216 | + \ | ||
217 | + temp.D(0) = 0; \ | ||
218 | + temp.D(1) = 0; \ | ||
219 | + max = LSX_LEN/BIT; \ | ||
220 | + for (i = 0; i < max; i++) { \ | ||
221 | + temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ | ||
222 | + temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ | ||
223 | + } \ | ||
224 | + *Vd = temp; \ | ||
225 | +} | ||
226 | + | ||
227 | +void HELPER(vsrani_d_q)(CPULoongArchState *env, | ||
228 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
229 | +{ | ||
230 | + VReg temp; | ||
231 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
232 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
233 | + | ||
234 | + temp.D(0) = 0; | ||
235 | + temp.D(1) = 0; | ||
236 | + temp.D(0) = int128_rshift(Vj->Q(0), imm % 128); | ||
237 | + temp.D(1) = int128_rshift(Vd->Q(0), imm % 128); | ||
238 | + *Vd = temp; | ||
239 | +} | ||
240 | + | ||
241 | +VSRANI(vsrani_b_h, 16, B, H) | ||
242 | +VSRANI(vsrani_h_w, 32, H, W) | ||
243 | +VSRANI(vsrani_w_d, 64, W, D) | ||
244 | -- | ||
245 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSRLRN.{B.H/H.W/W.D}; | ||
3 | - VSRARN.{B.H/H.W/W.D}; | ||
4 | - VSRLRNI.{B.H/H.W/W.D/D.Q}; | ||
5 | - VSRARNI.{B.H/H.W/W.D/D.Q}. | ||
6 | 1 | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
9 | Message-Id: <20230504122810.4094787-27-gaosong@loongson.cn> | ||
10 | --- | ||
11 | target/loongarch/disas.c | 16 +++ | ||
12 | target/loongarch/helper.h | 16 +++ | ||
13 | target/loongarch/insn_trans/trans_lsx.c.inc | 16 +++ | ||
14 | target/loongarch/insns.decode | 16 +++ | ||
15 | target/loongarch/lsx_helper.c | 126 ++++++++++++++++++++ | ||
16 | 5 files changed, 190 insertions(+) | ||
17 | |||
18 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/loongarch/disas.c | ||
21 | +++ b/target/loongarch/disas.c | ||
22 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrani_b_h, vv_i) | ||
23 | INSN_LSX(vsrani_h_w, vv_i) | ||
24 | INSN_LSX(vsrani_w_d, vv_i) | ||
25 | INSN_LSX(vsrani_d_q, vv_i) | ||
26 | + | ||
27 | +INSN_LSX(vsrlrn_b_h, vvv) | ||
28 | +INSN_LSX(vsrlrn_h_w, vvv) | ||
29 | +INSN_LSX(vsrlrn_w_d, vvv) | ||
30 | +INSN_LSX(vsrarn_b_h, vvv) | ||
31 | +INSN_LSX(vsrarn_h_w, vvv) | ||
32 | +INSN_LSX(vsrarn_w_d, vvv) | ||
33 | + | ||
34 | +INSN_LSX(vsrlrni_b_h, vv_i) | ||
35 | +INSN_LSX(vsrlrni_h_w, vv_i) | ||
36 | +INSN_LSX(vsrlrni_w_d, vv_i) | ||
37 | +INSN_LSX(vsrlrni_d_q, vv_i) | ||
38 | +INSN_LSX(vsrarni_b_h, vv_i) | ||
39 | +INSN_LSX(vsrarni_h_w, vv_i) | ||
40 | +INSN_LSX(vsrarni_w_d, vv_i) | ||
41 | +INSN_LSX(vsrarni_d_q, vv_i) | ||
42 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/loongarch/helper.h | ||
45 | +++ b/target/loongarch/helper.h | ||
46 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32) | ||
47 | DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32) | ||
48 | DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32) | ||
49 | DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32) | ||
50 | + | ||
51 | +DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32) | ||
52 | +DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32) | ||
53 | +DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32) | ||
54 | +DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32) | ||
55 | +DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32) | ||
56 | +DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32) | ||
57 | + | ||
58 | +DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32) | ||
59 | +DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32) | ||
60 | +DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32) | ||
61 | +DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32) | ||
66 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
69 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
70 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h) | ||
71 | TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w) | ||
72 | TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d) | ||
73 | TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q) | ||
74 | + | ||
75 | +TRANS(vsrlrn_b_h, gen_vvv, gen_helper_vsrlrn_b_h) | ||
76 | +TRANS(vsrlrn_h_w, gen_vvv, gen_helper_vsrlrn_h_w) | ||
77 | +TRANS(vsrlrn_w_d, gen_vvv, gen_helper_vsrlrn_w_d) | ||
78 | +TRANS(vsrarn_b_h, gen_vvv, gen_helper_vsrarn_b_h) | ||
79 | +TRANS(vsrarn_h_w, gen_vvv, gen_helper_vsrarn_h_w) | ||
80 | +TRANS(vsrarn_w_d, gen_vvv, gen_helper_vsrarn_w_d) | ||
81 | + | ||
82 | +TRANS(vsrlrni_b_h, gen_vv_i, gen_helper_vsrlrni_b_h) | ||
83 | +TRANS(vsrlrni_h_w, gen_vv_i, gen_helper_vsrlrni_h_w) | ||
84 | +TRANS(vsrlrni_w_d, gen_vv_i, gen_helper_vsrlrni_w_d) | ||
85 | +TRANS(vsrlrni_d_q, gen_vv_i, gen_helper_vsrlrni_d_q) | ||
86 | +TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h) | ||
87 | +TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w) | ||
88 | +TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d) | ||
89 | +TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q) | ||
90 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/target/loongarch/insns.decode | ||
93 | +++ b/target/loongarch/insns.decode | ||
94 | @@ -XXX,XX +XXX,XX @@ vsrani_b_h 0111 00110101 10000 1 .... ..... ..... @vv_ui4 | ||
95 | vsrani_h_w 0111 00110101 10001 ..... ..... ..... @vv_ui5 | ||
96 | vsrani_w_d 0111 00110101 1001 ...... ..... ..... @vv_ui6 | ||
97 | vsrani_d_q 0111 00110101 101 ....... ..... ..... @vv_ui7 | ||
98 | + | ||
99 | +vsrlrn_b_h 0111 00001111 10001 ..... ..... ..... @vvv | ||
100 | +vsrlrn_h_w 0111 00001111 10010 ..... ..... ..... @vvv | ||
101 | +vsrlrn_w_d 0111 00001111 10011 ..... ..... ..... @vvv | ||
102 | +vsrarn_b_h 0111 00001111 10101 ..... ..... ..... @vvv | ||
103 | +vsrarn_h_w 0111 00001111 10110 ..... ..... ..... @vvv | ||
104 | +vsrarn_w_d 0111 00001111 10111 ..... ..... ..... @vvv | ||
105 | + | ||
106 | +vsrlrni_b_h 0111 00110100 01000 1 .... ..... ..... @vv_ui4 | ||
107 | +vsrlrni_h_w 0111 00110100 01001 ..... ..... ..... @vv_ui5 | ||
108 | +vsrlrni_w_d 0111 00110100 0101 ...... ..... ..... @vv_ui6 | ||
109 | +vsrlrni_d_q 0111 00110100 011 ....... ..... ..... @vv_ui7 | ||
110 | +vsrarni_b_h 0111 00110101 11000 1 .... ..... ..... @vv_ui4 | ||
111 | +vsrarni_h_w 0111 00110101 11001 ..... ..... ..... @vv_ui5 | ||
112 | +vsrarni_w_d 0111 00110101 1101 ...... ..... ..... @vv_ui6 | ||
113 | +vsrarni_d_q 0111 00110101 111 ....... ..... ..... @vv_ui7 | ||
114 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
115 | index XXXXXXX..XXXXXXX 100644 | ||
116 | --- a/target/loongarch/lsx_helper.c | ||
117 | +++ b/target/loongarch/lsx_helper.c | ||
118 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsrani_d_q)(CPULoongArchState *env, | ||
119 | VSRANI(vsrani_b_h, 16, B, H) | ||
120 | VSRANI(vsrani_h_w, 32, H, W) | ||
121 | VSRANI(vsrani_w_d, 64, W, D) | ||
122 | + | ||
123 | +#define VSRLRN(NAME, BIT, T, E1, E2) \ | ||
124 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
125 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
126 | +{ \ | ||
127 | + int i; \ | ||
128 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
129 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
130 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
131 | + \ | ||
132 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
133 | + Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
134 | + } \ | ||
135 | + Vd->D(1) = 0; \ | ||
136 | +} | ||
137 | + | ||
138 | +VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H) | ||
139 | +VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) | ||
140 | +VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) | ||
141 | + | ||
142 | +#define VSRARN(NAME, BIT, T, E1, E2) \ | ||
143 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
144 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
145 | +{ \ | ||
146 | + int i; \ | ||
147 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
148 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
149 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
150 | + \ | ||
151 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
152 | + Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ | ||
153 | + } \ | ||
154 | + Vd->D(1) = 0; \ | ||
155 | +} | ||
156 | + | ||
157 | +VSRARN(vsrarn_b_h, 16, uint8_t, B, H) | ||
158 | +VSRARN(vsrarn_h_w, 32, uint16_t, H, W) | ||
159 | +VSRARN(vsrarn_w_d, 64, uint32_t, W, D) | ||
160 | + | ||
161 | +#define VSRLRNI(NAME, BIT, E1, E2) \ | ||
162 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
163 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
164 | +{ \ | ||
165 | + int i, max; \ | ||
166 | + VReg temp; \ | ||
167 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
168 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
169 | + \ | ||
170 | + temp.D(0) = 0; \ | ||
171 | + temp.D(1) = 0; \ | ||
172 | + max = LSX_LEN/BIT; \ | ||
173 | + for (i = 0; i < max; i++) { \ | ||
174 | + temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ | ||
175 | + temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ | ||
176 | + } \ | ||
177 | + *Vd = temp; \ | ||
178 | +} | ||
179 | + | ||
180 | +void HELPER(vsrlrni_d_q)(CPULoongArchState *env, | ||
181 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
182 | +{ | ||
183 | + VReg temp; | ||
184 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
185 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
186 | + Int128 r1, r2; | ||
187 | + | ||
188 | + if (imm == 0) { | ||
189 | + temp.D(0) = int128_getlo(Vj->Q(0)); | ||
190 | + temp.D(1) = int128_getlo(Vd->Q(0)); | ||
191 | + } else { | ||
192 | + r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); | ||
193 | + r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); | ||
194 | + | ||
195 | + temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1)); | ||
196 | + temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2)); | ||
197 | + } | ||
198 | + *Vd = temp; | ||
199 | +} | ||
200 | + | ||
201 | +VSRLRNI(vsrlrni_b_h, 16, B, H) | ||
202 | +VSRLRNI(vsrlrni_h_w, 32, H, W) | ||
203 | +VSRLRNI(vsrlrni_w_d, 64, W, D) | ||
204 | + | ||
205 | +#define VSRARNI(NAME, BIT, E1, E2) \ | ||
206 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
207 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
208 | +{ \ | ||
209 | + int i, max; \ | ||
210 | + VReg temp; \ | ||
211 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
212 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
213 | + \ | ||
214 | + temp.D(0) = 0; \ | ||
215 | + temp.D(1) = 0; \ | ||
216 | + max = LSX_LEN/BIT; \ | ||
217 | + for (i = 0; i < max; i++) { \ | ||
218 | + temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ | ||
219 | + temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ | ||
220 | + } \ | ||
221 | + *Vd = temp; \ | ||
222 | +} | ||
223 | + | ||
224 | +void HELPER(vsrarni_d_q)(CPULoongArchState *env, | ||
225 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
226 | +{ | ||
227 | + VReg temp; | ||
228 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
229 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
230 | + Int128 r1, r2; | ||
231 | + | ||
232 | + if (imm == 0) { | ||
233 | + temp.D(0) = int128_getlo(Vj->Q(0)); | ||
234 | + temp.D(1) = int128_getlo(Vd->Q(0)); | ||
235 | + } else { | ||
236 | + r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
237 | + r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
238 | + | ||
239 | + temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1)); | ||
240 | + temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2)); | ||
241 | + } | ||
242 | + *Vd = temp; | ||
243 | +} | ||
244 | + | ||
245 | +VSRARNI(vsrarni_b_h, 16, B, H) | ||
246 | +VSRARNI(vsrarni_h_w, 32, H, W) | ||
247 | +VSRARNI(vsrarni_w_d, 64, W, D) | ||
248 | -- | ||
249 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSSRLN.{B.H/H.W/W.D}; | ||
3 | - VSSRAN.{B.H/H.W/W.D}; | ||
4 | - VSSRLN.{BU.H/HU.W/WU.D}; | ||
5 | - VSSRAN.{BU.H/HU.W/WU.D}; | ||
6 | - VSSRLNI.{B.H/H.W/W.D/D.Q}; | ||
7 | - VSSRANI.{B.H/H.W/W.D/D.Q}; | ||
8 | - VSSRLNI.{BU.H/HU.W/WU.D/DU.Q}; | ||
9 | - VSSRANI.{BU.H/HU.W/WU.D/DU.Q}. | ||
10 | 1 | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
13 | Message-Id: <20230504122810.4094787-28-gaosong@loongson.cn> | ||
14 | --- | ||
15 | target/loongarch/disas.c | 30 ++ | ||
16 | target/loongarch/helper.h | 30 ++ | ||
17 | target/loongarch/insn_trans/trans_lsx.c.inc | 30 ++ | ||
18 | target/loongarch/insns.decode | 30 ++ | ||
19 | target/loongarch/lsx_helper.c | 379 ++++++++++++++++++++ | ||
20 | 5 files changed, 499 insertions(+) | ||
21 | |||
22 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/target/loongarch/disas.c | ||
25 | +++ b/target/loongarch/disas.c | ||
26 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrarni_b_h, vv_i) | ||
27 | INSN_LSX(vsrarni_h_w, vv_i) | ||
28 | INSN_LSX(vsrarni_w_d, vv_i) | ||
29 | INSN_LSX(vsrarni_d_q, vv_i) | ||
30 | + | ||
31 | +INSN_LSX(vssrln_b_h, vvv) | ||
32 | +INSN_LSX(vssrln_h_w, vvv) | ||
33 | +INSN_LSX(vssrln_w_d, vvv) | ||
34 | +INSN_LSX(vssran_b_h, vvv) | ||
35 | +INSN_LSX(vssran_h_w, vvv) | ||
36 | +INSN_LSX(vssran_w_d, vvv) | ||
37 | +INSN_LSX(vssrln_bu_h, vvv) | ||
38 | +INSN_LSX(vssrln_hu_w, vvv) | ||
39 | +INSN_LSX(vssrln_wu_d, vvv) | ||
40 | +INSN_LSX(vssran_bu_h, vvv) | ||
41 | +INSN_LSX(vssran_hu_w, vvv) | ||
42 | +INSN_LSX(vssran_wu_d, vvv) | ||
43 | + | ||
44 | +INSN_LSX(vssrlni_b_h, vv_i) | ||
45 | +INSN_LSX(vssrlni_h_w, vv_i) | ||
46 | +INSN_LSX(vssrlni_w_d, vv_i) | ||
47 | +INSN_LSX(vssrlni_d_q, vv_i) | ||
48 | +INSN_LSX(vssrani_b_h, vv_i) | ||
49 | +INSN_LSX(vssrani_h_w, vv_i) | ||
50 | +INSN_LSX(vssrani_w_d, vv_i) | ||
51 | +INSN_LSX(vssrani_d_q, vv_i) | ||
52 | +INSN_LSX(vssrlni_bu_h, vv_i) | ||
53 | +INSN_LSX(vssrlni_hu_w, vv_i) | ||
54 | +INSN_LSX(vssrlni_wu_d, vv_i) | ||
55 | +INSN_LSX(vssrlni_du_q, vv_i) | ||
56 | +INSN_LSX(vssrani_bu_h, vv_i) | ||
57 | +INSN_LSX(vssrani_hu_w, vv_i) | ||
58 | +INSN_LSX(vssrani_wu_d, vv_i) | ||
59 | +INSN_LSX(vssrani_du_q, vv_i) | ||
60 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/helper.h | ||
63 | +++ b/target/loongarch/helper.h | ||
64 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32) | ||
65 | DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32) | ||
66 | DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32) | ||
67 | DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32) | ||
68 | + | ||
69 | +DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32) | ||
70 | +DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32) | ||
71 | +DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32) | ||
72 | +DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32) | ||
73 | +DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32) | ||
74 | +DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32) | ||
75 | +DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32) | ||
76 | +DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32) | ||
77 | +DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32) | ||
78 | +DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32) | ||
79 | +DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32) | ||
80 | +DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32) | ||
81 | + | ||
82 | +DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32) | ||
83 | +DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32) | ||
84 | +DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32) | ||
85 | +DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32) | ||
86 | +DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32) | ||
87 | +DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32) | ||
88 | +DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32) | ||
89 | +DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32) | ||
90 | +DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32) | ||
91 | +DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32) | ||
92 | +DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32) | ||
93 | +DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32) | ||
94 | +DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32) | ||
95 | +DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32) | ||
96 | +DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32) | ||
97 | +DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32) | ||
98 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
101 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
102 | @@ -XXX,XX +XXX,XX @@ TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h) | ||
103 | TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w) | ||
104 | TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d) | ||
105 | TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q) | ||
106 | + | ||
107 | +TRANS(vssrln_b_h, gen_vvv, gen_helper_vssrln_b_h) | ||
108 | +TRANS(vssrln_h_w, gen_vvv, gen_helper_vssrln_h_w) | ||
109 | +TRANS(vssrln_w_d, gen_vvv, gen_helper_vssrln_w_d) | ||
110 | +TRANS(vssran_b_h, gen_vvv, gen_helper_vssran_b_h) | ||
111 | +TRANS(vssran_h_w, gen_vvv, gen_helper_vssran_h_w) | ||
112 | +TRANS(vssran_w_d, gen_vvv, gen_helper_vssran_w_d) | ||
113 | +TRANS(vssrln_bu_h, gen_vvv, gen_helper_vssrln_bu_h) | ||
114 | +TRANS(vssrln_hu_w, gen_vvv, gen_helper_vssrln_hu_w) | ||
115 | +TRANS(vssrln_wu_d, gen_vvv, gen_helper_vssrln_wu_d) | ||
116 | +TRANS(vssran_bu_h, gen_vvv, gen_helper_vssran_bu_h) | ||
117 | +TRANS(vssran_hu_w, gen_vvv, gen_helper_vssran_hu_w) | ||
118 | +TRANS(vssran_wu_d, gen_vvv, gen_helper_vssran_wu_d) | ||
119 | + | ||
120 | +TRANS(vssrlni_b_h, gen_vv_i, gen_helper_vssrlni_b_h) | ||
121 | +TRANS(vssrlni_h_w, gen_vv_i, gen_helper_vssrlni_h_w) | ||
122 | +TRANS(vssrlni_w_d, gen_vv_i, gen_helper_vssrlni_w_d) | ||
123 | +TRANS(vssrlni_d_q, gen_vv_i, gen_helper_vssrlni_d_q) | ||
124 | +TRANS(vssrani_b_h, gen_vv_i, gen_helper_vssrani_b_h) | ||
125 | +TRANS(vssrani_h_w, gen_vv_i, gen_helper_vssrani_h_w) | ||
126 | +TRANS(vssrani_w_d, gen_vv_i, gen_helper_vssrani_w_d) | ||
127 | +TRANS(vssrani_d_q, gen_vv_i, gen_helper_vssrani_d_q) | ||
128 | +TRANS(vssrlni_bu_h, gen_vv_i, gen_helper_vssrlni_bu_h) | ||
129 | +TRANS(vssrlni_hu_w, gen_vv_i, gen_helper_vssrlni_hu_w) | ||
130 | +TRANS(vssrlni_wu_d, gen_vv_i, gen_helper_vssrlni_wu_d) | ||
131 | +TRANS(vssrlni_du_q, gen_vv_i, gen_helper_vssrlni_du_q) | ||
132 | +TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h) | ||
133 | +TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w) | ||
134 | +TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d) | ||
135 | +TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q) | ||
136 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
137 | index XXXXXXX..XXXXXXX 100644 | ||
138 | --- a/target/loongarch/insns.decode | ||
139 | +++ b/target/loongarch/insns.decode | ||
140 | @@ -XXX,XX +XXX,XX @@ vsrarni_b_h 0111 00110101 11000 1 .... ..... ..... @vv_ui4 | ||
141 | vsrarni_h_w 0111 00110101 11001 ..... ..... ..... @vv_ui5 | ||
142 | vsrarni_w_d 0111 00110101 1101 ...... ..... ..... @vv_ui6 | ||
143 | vsrarni_d_q 0111 00110101 111 ....... ..... ..... @vv_ui7 | ||
144 | + | ||
145 | +vssrln_b_h 0111 00001111 11001 ..... ..... ..... @vvv | ||
146 | +vssrln_h_w 0111 00001111 11010 ..... ..... ..... @vvv | ||
147 | +vssrln_w_d 0111 00001111 11011 ..... ..... ..... @vvv | ||
148 | +vssran_b_h 0111 00001111 11101 ..... ..... ..... @vvv | ||
149 | +vssran_h_w 0111 00001111 11110 ..... ..... ..... @vvv | ||
150 | +vssran_w_d 0111 00001111 11111 ..... ..... ..... @vvv | ||
151 | +vssrln_bu_h 0111 00010000 01001 ..... ..... ..... @vvv | ||
152 | +vssrln_hu_w 0111 00010000 01010 ..... ..... ..... @vvv | ||
153 | +vssrln_wu_d 0111 00010000 01011 ..... ..... ..... @vvv | ||
154 | +vssran_bu_h 0111 00010000 01101 ..... ..... ..... @vvv | ||
155 | +vssran_hu_w 0111 00010000 01110 ..... ..... ..... @vvv | ||
156 | +vssran_wu_d 0111 00010000 01111 ..... ..... ..... @vvv | ||
157 | + | ||
158 | +vssrlni_b_h 0111 00110100 10000 1 .... ..... ..... @vv_ui4 | ||
159 | +vssrlni_h_w 0111 00110100 10001 ..... ..... ..... @vv_ui5 | ||
160 | +vssrlni_w_d 0111 00110100 1001 ...... ..... ..... @vv_ui6 | ||
161 | +vssrlni_d_q 0111 00110100 101 ....... ..... ..... @vv_ui7 | ||
162 | +vssrani_b_h 0111 00110110 00000 1 .... ..... ..... @vv_ui4 | ||
163 | +vssrani_h_w 0111 00110110 00001 ..... ..... ..... @vv_ui5 | ||
164 | +vssrani_w_d 0111 00110110 0001 ...... ..... ..... @vv_ui6 | ||
165 | +vssrani_d_q 0111 00110110 001 ....... ..... ..... @vv_ui7 | ||
166 | +vssrlni_bu_h 0111 00110100 11000 1 .... ..... ..... @vv_ui4 | ||
167 | +vssrlni_hu_w 0111 00110100 11001 ..... ..... ..... @vv_ui5 | ||
168 | +vssrlni_wu_d 0111 00110100 1101 ...... ..... ..... @vv_ui6 | ||
169 | +vssrlni_du_q 0111 00110100 111 ....... ..... ..... @vv_ui7 | ||
170 | +vssrani_bu_h 0111 00110110 01000 1 .... ..... ..... @vv_ui4 | ||
171 | +vssrani_hu_w 0111 00110110 01001 ..... ..... ..... @vv_ui5 | ||
172 | +vssrani_wu_d 0111 00110110 0101 ...... ..... ..... @vv_ui6 | ||
173 | +vssrani_du_q 0111 00110110 011 ....... ..... ..... @vv_ui7 | ||
174 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/target/loongarch/lsx_helper.c | ||
177 | +++ b/target/loongarch/lsx_helper.c | ||
178 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsrarni_d_q)(CPULoongArchState *env, | ||
179 | VSRARNI(vsrarni_b_h, 16, B, H) | ||
180 | VSRARNI(vsrarni_h_w, 32, H, W) | ||
181 | VSRARNI(vsrarni_w_d, 64, W, D) | ||
182 | + | ||
183 | +#define SSRLNS(NAME, T1, T2, T3) \ | ||
184 | +static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ | ||
185 | +{ \ | ||
186 | + T1 shft_res; \ | ||
187 | + if (sa == 0) { \ | ||
188 | + shft_res = e2; \ | ||
189 | + } else { \ | ||
190 | + shft_res = (((T1)e2) >> sa); \ | ||
191 | + } \ | ||
192 | + T3 mask; \ | ||
193 | + mask = (1ull << sh) -1; \ | ||
194 | + if (shft_res > mask) { \ | ||
195 | + return mask; \ | ||
196 | + } else { \ | ||
197 | + return shft_res; \ | ||
198 | + } \ | ||
199 | +} | ||
200 | + | ||
201 | +SSRLNS(B, uint16_t, int16_t, uint8_t) | ||
202 | +SSRLNS(H, uint32_t, int32_t, uint16_t) | ||
203 | +SSRLNS(W, uint64_t, int64_t, uint32_t) | ||
204 | + | ||
205 | +#define VSSRLN(NAME, BIT, T, E1, E2) \ | ||
206 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
207 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
208 | +{ \ | ||
209 | + int i; \ | ||
210 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
211 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
212 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
213 | + \ | ||
214 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
215 | + Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ | ||
216 | + } \ | ||
217 | + Vd->D(1) = 0; \ | ||
218 | +} | ||
219 | + | ||
220 | +VSSRLN(vssrln_b_h, 16, uint16_t, B, H) | ||
221 | +VSSRLN(vssrln_h_w, 32, uint32_t, H, W) | ||
222 | +VSSRLN(vssrln_w_d, 64, uint64_t, W, D) | ||
223 | + | ||
224 | +#define SSRANS(E, T1, T2) \ | ||
225 | +static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ | ||
226 | +{ \ | ||
227 | + T1 shft_res; \ | ||
228 | + if (sa == 0) { \ | ||
229 | + shft_res = e2; \ | ||
230 | + } else { \ | ||
231 | + shft_res = e2 >> sa; \ | ||
232 | + } \ | ||
233 | + T2 mask; \ | ||
234 | + mask = (1ll << sh) -1; \ | ||
235 | + if (shft_res > mask) { \ | ||
236 | + return mask; \ | ||
237 | + } else if (shft_res < -(mask +1)) { \ | ||
238 | + return ~mask; \ | ||
239 | + } else { \ | ||
240 | + return shft_res; \ | ||
241 | + } \ | ||
242 | +} | ||
243 | + | ||
244 | +SSRANS(B, int16_t, int8_t) | ||
245 | +SSRANS(H, int32_t, int16_t) | ||
246 | +SSRANS(W, int64_t, int32_t) | ||
247 | + | ||
248 | +#define VSSRAN(NAME, BIT, T, E1, E2) \ | ||
249 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
250 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
251 | +{ \ | ||
252 | + int i; \ | ||
253 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
254 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
255 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
256 | + \ | ||
257 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
258 | + Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
259 | + } \ | ||
260 | + Vd->D(1) = 0; \ | ||
261 | +} | ||
262 | + | ||
263 | +VSSRAN(vssran_b_h, 16, uint16_t, B, H) | ||
264 | +VSSRAN(vssran_h_w, 32, uint32_t, H, W) | ||
265 | +VSSRAN(vssran_w_d, 64, uint64_t, W, D) | ||
266 | + | ||
267 | +#define SSRLNU(E, T1, T2, T3) \ | ||
268 | +static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ | ||
269 | +{ \ | ||
270 | + T1 shft_res; \ | ||
271 | + if (sa == 0) { \ | ||
272 | + shft_res = e2; \ | ||
273 | + } else { \ | ||
274 | + shft_res = (((T1)e2) >> sa); \ | ||
275 | + } \ | ||
276 | + T2 mask; \ | ||
277 | + mask = (1ull << sh) -1; \ | ||
278 | + if (shft_res > mask) { \ | ||
279 | + return mask; \ | ||
280 | + } else { \ | ||
281 | + return shft_res; \ | ||
282 | + } \ | ||
283 | +} | ||
284 | + | ||
285 | +SSRLNU(B, uint16_t, uint8_t, int16_t) | ||
286 | +SSRLNU(H, uint32_t, uint16_t, int32_t) | ||
287 | +SSRLNU(W, uint64_t, uint32_t, int64_t) | ||
288 | + | ||
289 | +#define VSSRLNU(NAME, BIT, T, E1, E2) \ | ||
290 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
291 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
292 | +{ \ | ||
293 | + int i; \ | ||
294 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
295 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
296 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
297 | + \ | ||
298 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
299 | + Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
300 | + } \ | ||
301 | + Vd->D(1) = 0; \ | ||
302 | +} | ||
303 | + | ||
304 | +VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H) | ||
305 | +VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W) | ||
306 | +VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D) | ||
307 | + | ||
308 | +#define SSRANU(E, T1, T2, T3) \ | ||
309 | +static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ | ||
310 | +{ \ | ||
311 | + T1 shft_res; \ | ||
312 | + if (sa == 0) { \ | ||
313 | + shft_res = e2; \ | ||
314 | + } else { \ | ||
315 | + shft_res = e2 >> sa; \ | ||
316 | + } \ | ||
317 | + if (e2 < 0) { \ | ||
318 | + shft_res = 0; \ | ||
319 | + } \ | ||
320 | + T2 mask; \ | ||
321 | + mask = (1ull << sh) -1; \ | ||
322 | + if (shft_res > mask) { \ | ||
323 | + return mask; \ | ||
324 | + } else { \ | ||
325 | + return shft_res; \ | ||
326 | + } \ | ||
327 | +} | ||
328 | + | ||
329 | +SSRANU(B, uint16_t, uint8_t, int16_t) | ||
330 | +SSRANU(H, uint32_t, uint16_t, int32_t) | ||
331 | +SSRANU(W, uint64_t, uint32_t, int64_t) | ||
332 | + | ||
333 | +#define VSSRANU(NAME, BIT, T, E1, E2) \ | ||
334 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
335 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
336 | +{ \ | ||
337 | + int i; \ | ||
338 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
339 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
340 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
341 | + \ | ||
342 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
343 | + Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
344 | + } \ | ||
345 | + Vd->D(1) = 0; \ | ||
346 | +} | ||
347 | + | ||
348 | +VSSRANU(vssran_bu_h, 16, uint16_t, B, H) | ||
349 | +VSSRANU(vssran_hu_w, 32, uint32_t, H, W) | ||
350 | +VSSRANU(vssran_wu_d, 64, uint64_t, W, D) | ||
351 | + | ||
352 | +#define VSSRLNI(NAME, BIT, E1, E2) \ | ||
353 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
354 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
355 | +{ \ | ||
356 | + int i; \ | ||
357 | + VReg temp; \ | ||
358 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
359 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
360 | + \ | ||
361 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
362 | + temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
363 | + temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | ||
364 | + } \ | ||
365 | + *Vd = temp; \ | ||
366 | +} | ||
367 | + | ||
368 | +void HELPER(vssrlni_d_q)(CPULoongArchState *env, | ||
369 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
370 | +{ | ||
371 | + Int128 shft_res1, shft_res2, mask; | ||
372 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
373 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
374 | + | ||
375 | + if (imm == 0) { | ||
376 | + shft_res1 = Vj->Q(0); | ||
377 | + shft_res2 = Vd->Q(0); | ||
378 | + } else { | ||
379 | + shft_res1 = int128_urshift(Vj->Q(0), imm); | ||
380 | + shft_res2 = int128_urshift(Vd->Q(0), imm); | ||
381 | + } | ||
382 | + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
383 | + | ||
384 | + if (int128_ult(mask, shft_res1)) { | ||
385 | + Vd->D(0) = int128_getlo(mask); | ||
386 | + }else { | ||
387 | + Vd->D(0) = int128_getlo(shft_res1); | ||
388 | + } | ||
389 | + | ||
390 | + if (int128_ult(mask, shft_res2)) { | ||
391 | + Vd->D(1) = int128_getlo(mask); | ||
392 | + }else { | ||
393 | + Vd->D(1) = int128_getlo(shft_res2); | ||
394 | + } | ||
395 | +} | ||
396 | + | ||
397 | +VSSRLNI(vssrlni_b_h, 16, B, H) | ||
398 | +VSSRLNI(vssrlni_h_w, 32, H, W) | ||
399 | +VSSRLNI(vssrlni_w_d, 64, W, D) | ||
400 | + | ||
401 | +#define VSSRANI(NAME, BIT, E1, E2) \ | ||
402 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
403 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
404 | +{ \ | ||
405 | + int i; \ | ||
406 | + VReg temp; \ | ||
407 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
408 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
409 | + \ | ||
410 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
411 | + temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
412 | + temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | ||
413 | + } \ | ||
414 | + *Vd = temp; \ | ||
415 | +} | ||
416 | + | ||
417 | +void HELPER(vssrani_d_q)(CPULoongArchState *env, | ||
418 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
419 | +{ | ||
420 | + Int128 shft_res1, shft_res2, mask, min; | ||
421 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
422 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
423 | + | ||
424 | + if (imm == 0) { | ||
425 | + shft_res1 = Vj->Q(0); | ||
426 | + shft_res2 = Vd->Q(0); | ||
427 | + } else { | ||
428 | + shft_res1 = int128_rshift(Vj->Q(0), imm); | ||
429 | + shft_res2 = int128_rshift(Vd->Q(0), imm); | ||
430 | + } | ||
431 | + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
432 | + min = int128_lshift(int128_one(), 63); | ||
433 | + | ||
434 | + if (int128_gt(shft_res1, mask)) { | ||
435 | + Vd->D(0) = int128_getlo(mask); | ||
436 | + } else if (int128_lt(shft_res1, int128_neg(min))) { | ||
437 | + Vd->D(0) = int128_getlo(min); | ||
438 | + } else { | ||
439 | + Vd->D(0) = int128_getlo(shft_res1); | ||
440 | + } | ||
441 | + | ||
442 | + if (int128_gt(shft_res2, mask)) { | ||
443 | + Vd->D(1) = int128_getlo(mask); | ||
444 | + } else if (int128_lt(shft_res2, int128_neg(min))) { | ||
445 | + Vd->D(1) = int128_getlo(min); | ||
446 | + } else { | ||
447 | + Vd->D(1) = int128_getlo(shft_res2); | ||
448 | + } | ||
449 | +} | ||
450 | + | ||
451 | +VSSRANI(vssrani_b_h, 16, B, H) | ||
452 | +VSSRANI(vssrani_h_w, 32, H, W) | ||
453 | +VSSRANI(vssrani_w_d, 64, W, D) | ||
454 | + | ||
455 | +#define VSSRLNUI(NAME, BIT, E1, E2) \ | ||
456 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
457 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
458 | +{ \ | ||
459 | + int i; \ | ||
460 | + VReg temp; \ | ||
461 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
462 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
463 | + \ | ||
464 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
465 | + temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
466 | + temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
467 | + } \ | ||
468 | + *Vd = temp; \ | ||
469 | +} | ||
470 | + | ||
471 | +void HELPER(vssrlni_du_q)(CPULoongArchState *env, | ||
472 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
473 | +{ | ||
474 | + Int128 shft_res1, shft_res2, mask; | ||
475 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
476 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
477 | + | ||
478 | + if (imm == 0) { | ||
479 | + shft_res1 = Vj->Q(0); | ||
480 | + shft_res2 = Vd->Q(0); | ||
481 | + } else { | ||
482 | + shft_res1 = int128_urshift(Vj->Q(0), imm); | ||
483 | + shft_res2 = int128_urshift(Vd->Q(0), imm); | ||
484 | + } | ||
485 | + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
486 | + | ||
487 | + if (int128_ult(mask, shft_res1)) { | ||
488 | + Vd->D(0) = int128_getlo(mask); | ||
489 | + }else { | ||
490 | + Vd->D(0) = int128_getlo(shft_res1); | ||
491 | + } | ||
492 | + | ||
493 | + if (int128_ult(mask, shft_res2)) { | ||
494 | + Vd->D(1) = int128_getlo(mask); | ||
495 | + }else { | ||
496 | + Vd->D(1) = int128_getlo(shft_res2); | ||
497 | + } | ||
498 | +} | ||
499 | + | ||
500 | +VSSRLNUI(vssrlni_bu_h, 16, B, H) | ||
501 | +VSSRLNUI(vssrlni_hu_w, 32, H, W) | ||
502 | +VSSRLNUI(vssrlni_wu_d, 64, W, D) | ||
503 | + | ||
504 | +#define VSSRANUI(NAME, BIT, E1, E2) \ | ||
505 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
506 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
507 | +{ \ | ||
508 | + int i; \ | ||
509 | + VReg temp; \ | ||
510 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
511 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
512 | + \ | ||
513 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
514 | + temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
515 | + temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
516 | + } \ | ||
517 | + *Vd = temp; \ | ||
518 | +} | ||
519 | + | ||
520 | +void HELPER(vssrani_du_q)(CPULoongArchState *env, | ||
521 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
522 | +{ | ||
523 | + Int128 shft_res1, shft_res2, mask; | ||
524 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
525 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
526 | + | ||
527 | + if (imm == 0) { | ||
528 | + shft_res1 = Vj->Q(0); | ||
529 | + shft_res2 = Vd->Q(0); | ||
530 | + } else { | ||
531 | + shft_res1 = int128_rshift(Vj->Q(0), imm); | ||
532 | + shft_res2 = int128_rshift(Vd->Q(0), imm); | ||
533 | + } | ||
534 | + | ||
535 | + if (int128_lt(Vj->Q(0), int128_zero())) { | ||
536 | + shft_res1 = int128_zero(); | ||
537 | + } | ||
538 | + | ||
539 | + if (int128_lt(Vd->Q(0), int128_zero())) { | ||
540 | + shft_res2 = int128_zero(); | ||
541 | + } | ||
542 | + | ||
543 | + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
544 | + | ||
545 | + if (int128_ult(mask, shft_res1)) { | ||
546 | + Vd->D(0) = int128_getlo(mask); | ||
547 | + }else { | ||
548 | + Vd->D(0) = int128_getlo(shft_res1); | ||
549 | + } | ||
550 | + | ||
551 | + if (int128_ult(mask, shft_res2)) { | ||
552 | + Vd->D(1) = int128_getlo(mask); | ||
553 | + }else { | ||
554 | + Vd->D(1) = int128_getlo(shft_res2); | ||
555 | + } | ||
556 | +} | ||
557 | + | ||
558 | +VSSRANUI(vssrani_bu_h, 16, B, H) | ||
559 | +VSSRANUI(vssrani_hu_w, 32, H, W) | ||
560 | +VSSRANUI(vssrani_wu_d, 64, W, D) | ||
561 | -- | ||
562 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSSRLRN.{B.H/H.W/W.D}; | ||
3 | - VSSRARN.{B.H/H.W/W.D}; | ||
4 | - VSSRLRN.{BU.H/HU.W/WU.D}; | ||
5 | - VSSRARN.{BU.H/HU.W/WU.D}; | ||
6 | - VSSRLRNI.{B.H/H.W/W.D/D.Q}; | ||
7 | - VSSRARNI.{B.H/H.W/W.D/D.Q}; | ||
8 | - VSSRLRNI.{BU.H/HU.W/WU.D/DU.Q}; | ||
9 | - VSSRARNI.{BU.H/HU.W/WU.D/DU.Q}. | ||
10 | 1 | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
13 | Message-Id: <20230504122810.4094787-29-gaosong@loongson.cn> | ||
14 | --- | ||
15 | target/loongarch/disas.c | 30 ++ | ||
16 | target/loongarch/helper.h | 30 ++ | ||
17 | target/loongarch/insn_trans/trans_lsx.c.inc | 30 ++ | ||
18 | target/loongarch/insns.decode | 30 ++ | ||
19 | target/loongarch/lsx_helper.c | 358 ++++++++++++++++++++ | ||
20 | 5 files changed, 478 insertions(+) | ||
21 | |||
22 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/target/loongarch/disas.c | ||
25 | +++ b/target/loongarch/disas.c | ||
26 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vssrani_bu_h, vv_i) | ||
27 | INSN_LSX(vssrani_hu_w, vv_i) | ||
28 | INSN_LSX(vssrani_wu_d, vv_i) | ||
29 | INSN_LSX(vssrani_du_q, vv_i) | ||
30 | + | ||
31 | +INSN_LSX(vssrlrn_b_h, vvv) | ||
32 | +INSN_LSX(vssrlrn_h_w, vvv) | ||
33 | +INSN_LSX(vssrlrn_w_d, vvv) | ||
34 | +INSN_LSX(vssrarn_b_h, vvv) | ||
35 | +INSN_LSX(vssrarn_h_w, vvv) | ||
36 | +INSN_LSX(vssrarn_w_d, vvv) | ||
37 | +INSN_LSX(vssrlrn_bu_h, vvv) | ||
38 | +INSN_LSX(vssrlrn_hu_w, vvv) | ||
39 | +INSN_LSX(vssrlrn_wu_d, vvv) | ||
40 | +INSN_LSX(vssrarn_bu_h, vvv) | ||
41 | +INSN_LSX(vssrarn_hu_w, vvv) | ||
42 | +INSN_LSX(vssrarn_wu_d, vvv) | ||
43 | + | ||
44 | +INSN_LSX(vssrlrni_b_h, vv_i) | ||
45 | +INSN_LSX(vssrlrni_h_w, vv_i) | ||
46 | +INSN_LSX(vssrlrni_w_d, vv_i) | ||
47 | +INSN_LSX(vssrlrni_d_q, vv_i) | ||
48 | +INSN_LSX(vssrlrni_bu_h, vv_i) | ||
49 | +INSN_LSX(vssrlrni_hu_w, vv_i) | ||
50 | +INSN_LSX(vssrlrni_wu_d, vv_i) | ||
51 | +INSN_LSX(vssrlrni_du_q, vv_i) | ||
52 | +INSN_LSX(vssrarni_b_h, vv_i) | ||
53 | +INSN_LSX(vssrarni_h_w, vv_i) | ||
54 | +INSN_LSX(vssrarni_w_d, vv_i) | ||
55 | +INSN_LSX(vssrarni_d_q, vv_i) | ||
56 | +INSN_LSX(vssrarni_bu_h, vv_i) | ||
57 | +INSN_LSX(vssrarni_hu_w, vv_i) | ||
58 | +INSN_LSX(vssrarni_wu_d, vv_i) | ||
59 | +INSN_LSX(vssrarni_du_q, vv_i) | ||
60 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/loongarch/helper.h | ||
63 | +++ b/target/loongarch/helper.h | ||
64 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32) | ||
65 | DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32) | ||
66 | DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32) | ||
67 | DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32) | ||
68 | + | ||
69 | +DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32) | ||
70 | +DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32) | ||
71 | +DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32) | ||
72 | +DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32) | ||
73 | +DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32) | ||
74 | +DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32) | ||
75 | +DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32) | ||
76 | +DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32) | ||
77 | +DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32) | ||
78 | +DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32) | ||
79 | +DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32) | ||
80 | +DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32) | ||
81 | + | ||
82 | +DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32) | ||
83 | +DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32) | ||
84 | +DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32) | ||
85 | +DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32) | ||
86 | +DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32) | ||
87 | +DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32) | ||
88 | +DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32) | ||
89 | +DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32) | ||
90 | +DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32) | ||
91 | +DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32) | ||
92 | +DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32) | ||
93 | +DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32) | ||
94 | +DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32) | ||
95 | +DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32) | ||
96 | +DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32) | ||
97 | +DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32) | ||
98 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
101 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
102 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h) | ||
103 | TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w) | ||
104 | TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d) | ||
105 | TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q) | ||
106 | + | ||
107 | +TRANS(vssrlrn_b_h, gen_vvv, gen_helper_vssrlrn_b_h) | ||
108 | +TRANS(vssrlrn_h_w, gen_vvv, gen_helper_vssrlrn_h_w) | ||
109 | +TRANS(vssrlrn_w_d, gen_vvv, gen_helper_vssrlrn_w_d) | ||
110 | +TRANS(vssrarn_b_h, gen_vvv, gen_helper_vssrarn_b_h) | ||
111 | +TRANS(vssrarn_h_w, gen_vvv, gen_helper_vssrarn_h_w) | ||
112 | +TRANS(vssrarn_w_d, gen_vvv, gen_helper_vssrarn_w_d) | ||
113 | +TRANS(vssrlrn_bu_h, gen_vvv, gen_helper_vssrlrn_bu_h) | ||
114 | +TRANS(vssrlrn_hu_w, gen_vvv, gen_helper_vssrlrn_hu_w) | ||
115 | +TRANS(vssrlrn_wu_d, gen_vvv, gen_helper_vssrlrn_wu_d) | ||
116 | +TRANS(vssrarn_bu_h, gen_vvv, gen_helper_vssrarn_bu_h) | ||
117 | +TRANS(vssrarn_hu_w, gen_vvv, gen_helper_vssrarn_hu_w) | ||
118 | +TRANS(vssrarn_wu_d, gen_vvv, gen_helper_vssrarn_wu_d) | ||
119 | + | ||
120 | +TRANS(vssrlrni_b_h, gen_vv_i, gen_helper_vssrlrni_b_h) | ||
121 | +TRANS(vssrlrni_h_w, gen_vv_i, gen_helper_vssrlrni_h_w) | ||
122 | +TRANS(vssrlrni_w_d, gen_vv_i, gen_helper_vssrlrni_w_d) | ||
123 | +TRANS(vssrlrni_d_q, gen_vv_i, gen_helper_vssrlrni_d_q) | ||
124 | +TRANS(vssrarni_b_h, gen_vv_i, gen_helper_vssrarni_b_h) | ||
125 | +TRANS(vssrarni_h_w, gen_vv_i, gen_helper_vssrarni_h_w) | ||
126 | +TRANS(vssrarni_w_d, gen_vv_i, gen_helper_vssrarni_w_d) | ||
127 | +TRANS(vssrarni_d_q, gen_vv_i, gen_helper_vssrarni_d_q) | ||
128 | +TRANS(vssrlrni_bu_h, gen_vv_i, gen_helper_vssrlrni_bu_h) | ||
129 | +TRANS(vssrlrni_hu_w, gen_vv_i, gen_helper_vssrlrni_hu_w) | ||
130 | +TRANS(vssrlrni_wu_d, gen_vv_i, gen_helper_vssrlrni_wu_d) | ||
131 | +TRANS(vssrlrni_du_q, gen_vv_i, gen_helper_vssrlrni_du_q) | ||
132 | +TRANS(vssrarni_bu_h, gen_vv_i, gen_helper_vssrarni_bu_h) | ||
133 | +TRANS(vssrarni_hu_w, gen_vv_i, gen_helper_vssrarni_hu_w) | ||
134 | +TRANS(vssrarni_wu_d, gen_vv_i, gen_helper_vssrarni_wu_d) | ||
135 | +TRANS(vssrarni_du_q, gen_vv_i, gen_helper_vssrarni_du_q) | ||
136 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
137 | index XXXXXXX..XXXXXXX 100644 | ||
138 | --- a/target/loongarch/insns.decode | ||
139 | +++ b/target/loongarch/insns.decode | ||
140 | @@ -XXX,XX +XXX,XX @@ vssrani_bu_h 0111 00110110 01000 1 .... ..... ..... @vv_ui4 | ||
141 | vssrani_hu_w 0111 00110110 01001 ..... ..... ..... @vv_ui5 | ||
142 | vssrani_wu_d 0111 00110110 0101 ...... ..... ..... @vv_ui6 | ||
143 | vssrani_du_q 0111 00110110 011 ....... ..... ..... @vv_ui7 | ||
144 | + | ||
145 | +vssrlrn_b_h 0111 00010000 00001 ..... ..... ..... @vvv | ||
146 | +vssrlrn_h_w 0111 00010000 00010 ..... ..... ..... @vvv | ||
147 | +vssrlrn_w_d 0111 00010000 00011 ..... ..... ..... @vvv | ||
148 | +vssrarn_b_h 0111 00010000 00101 ..... ..... ..... @vvv | ||
149 | +vssrarn_h_w 0111 00010000 00110 ..... ..... ..... @vvv | ||
150 | +vssrarn_w_d 0111 00010000 00111 ..... ..... ..... @vvv | ||
151 | +vssrlrn_bu_h 0111 00010000 10001 ..... ..... ..... @vvv | ||
152 | +vssrlrn_hu_w 0111 00010000 10010 ..... ..... ..... @vvv | ||
153 | +vssrlrn_wu_d 0111 00010000 10011 ..... ..... ..... @vvv | ||
154 | +vssrarn_bu_h 0111 00010000 10101 ..... ..... ..... @vvv | ||
155 | +vssrarn_hu_w 0111 00010000 10110 ..... ..... ..... @vvv | ||
156 | +vssrarn_wu_d 0111 00010000 10111 ..... ..... ..... @vvv | ||
157 | + | ||
158 | +vssrlrni_b_h 0111 00110101 00000 1 .... ..... ..... @vv_ui4 | ||
159 | +vssrlrni_h_w 0111 00110101 00001 ..... ..... ..... @vv_ui5 | ||
160 | +vssrlrni_w_d 0111 00110101 0001 ...... ..... ..... @vv_ui6 | ||
161 | +vssrlrni_d_q 0111 00110101 001 ....... ..... ..... @vv_ui7 | ||
162 | +vssrarni_b_h 0111 00110110 10000 1 .... ..... ..... @vv_ui4 | ||
163 | +vssrarni_h_w 0111 00110110 10001 ..... ..... ..... @vv_ui5 | ||
164 | +vssrarni_w_d 0111 00110110 1001 ...... ..... ..... @vv_ui6 | ||
165 | +vssrarni_d_q 0111 00110110 101 ....... ..... ..... @vv_ui7 | ||
166 | +vssrlrni_bu_h 0111 00110101 01000 1 .... ..... ..... @vv_ui4 | ||
167 | +vssrlrni_hu_w 0111 00110101 01001 ..... ..... ..... @vv_ui5 | ||
168 | +vssrlrni_wu_d 0111 00110101 0101 ...... ..... ..... @vv_ui6 | ||
169 | +vssrlrni_du_q 0111 00110101 011 ....... ..... ..... @vv_ui7 | ||
170 | +vssrarni_bu_h 0111 00110110 11000 1 .... ..... ..... @vv_ui4 | ||
171 | +vssrarni_hu_w 0111 00110110 11001 ..... ..... ..... @vv_ui5 | ||
172 | +vssrarni_wu_d 0111 00110110 1101 ...... ..... ..... @vv_ui6 | ||
173 | +vssrarni_du_q 0111 00110110 111 ....... ..... ..... @vv_ui7 | ||
174 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/target/loongarch/lsx_helper.c | ||
177 | +++ b/target/loongarch/lsx_helper.c | ||
178 | @@ -XXX,XX +XXX,XX @@ void HELPER(vssrani_du_q)(CPULoongArchState *env, | ||
179 | VSSRANUI(vssrani_bu_h, 16, B, H) | ||
180 | VSSRANUI(vssrani_hu_w, 32, H, W) | ||
181 | VSSRANUI(vssrani_wu_d, 64, W, D) | ||
182 | + | ||
183 | +#define SSRLRNS(E1, E2, T1, T2, T3) \ | ||
184 | +static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ | ||
185 | +{ \ | ||
186 | + T1 shft_res; \ | ||
187 | + \ | ||
188 | + shft_res = do_vsrlr_ ## E2(e2, sa); \ | ||
189 | + T1 mask; \ | ||
190 | + mask = (1ull << sh) -1; \ | ||
191 | + if (shft_res > mask) { \ | ||
192 | + return mask; \ | ||
193 | + } else { \ | ||
194 | + return shft_res; \ | ||
195 | + } \ | ||
196 | +} | ||
197 | + | ||
198 | +SSRLRNS(B, H, uint16_t, int16_t, uint8_t) | ||
199 | +SSRLRNS(H, W, uint32_t, int32_t, uint16_t) | ||
200 | +SSRLRNS(W, D, uint64_t, int64_t, uint32_t) | ||
201 | + | ||
202 | +#define VSSRLRN(NAME, BIT, T, E1, E2) \ | ||
203 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
204 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
205 | +{ \ | ||
206 | + int i; \ | ||
207 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
208 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
209 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
210 | + \ | ||
211 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
212 | + Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
213 | + } \ | ||
214 | + Vd->D(1) = 0; \ | ||
215 | +} | ||
216 | + | ||
217 | +VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H) | ||
218 | +VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W) | ||
219 | +VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D) | ||
220 | + | ||
221 | +#define SSRARNS(E1, E2, T1, T2) \ | ||
222 | +static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ | ||
223 | +{ \ | ||
224 | + T1 shft_res; \ | ||
225 | + \ | ||
226 | + shft_res = do_vsrar_ ## E2(e2, sa); \ | ||
227 | + T2 mask; \ | ||
228 | + mask = (1ll << sh) -1; \ | ||
229 | + if (shft_res > mask) { \ | ||
230 | + return mask; \ | ||
231 | + } else if (shft_res < -(mask +1)) { \ | ||
232 | + return ~mask; \ | ||
233 | + } else { \ | ||
234 | + return shft_res; \ | ||
235 | + } \ | ||
236 | +} | ||
237 | + | ||
238 | +SSRARNS(B, H, int16_t, int8_t) | ||
239 | +SSRARNS(H, W, int32_t, int16_t) | ||
240 | +SSRARNS(W, D, int64_t, int32_t) | ||
241 | + | ||
242 | +#define VSSRARN(NAME, BIT, T, E1, E2) \ | ||
243 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
244 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
245 | +{ \ | ||
246 | + int i; \ | ||
247 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
248 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
249 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
250 | + \ | ||
251 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
252 | + Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ | ||
253 | + } \ | ||
254 | + Vd->D(1) = 0; \ | ||
255 | +} | ||
256 | + | ||
257 | +VSSRARN(vssrarn_b_h, 16, uint16_t, B, H) | ||
258 | +VSSRARN(vssrarn_h_w, 32, uint32_t, H, W) | ||
259 | +VSSRARN(vssrarn_w_d, 64, uint64_t, W, D) | ||
260 | + | ||
261 | +#define SSRLRNU(E1, E2, T1, T2, T3) \ | ||
262 | +static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ | ||
263 | +{ \ | ||
264 | + T1 shft_res; \ | ||
265 | + \ | ||
266 | + shft_res = do_vsrlr_ ## E2(e2, sa); \ | ||
267 | + \ | ||
268 | + T2 mask; \ | ||
269 | + mask = (1ull << sh) -1; \ | ||
270 | + if (shft_res > mask) { \ | ||
271 | + return mask; \ | ||
272 | + } else { \ | ||
273 | + return shft_res; \ | ||
274 | + } \ | ||
275 | +} | ||
276 | + | ||
277 | +SSRLRNU(B, H, uint16_t, uint8_t, int16_t) | ||
278 | +SSRLRNU(H, W, uint32_t, uint16_t, int32_t) | ||
279 | +SSRLRNU(W, D, uint64_t, uint32_t, int64_t) | ||
280 | + | ||
281 | +#define VSSRLRNU(NAME, BIT, T, E1, E2) \ | ||
282 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
283 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
284 | +{ \ | ||
285 | + int i; \ | ||
286 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
287 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
288 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
289 | + \ | ||
290 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
291 | + Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
292 | + } \ | ||
293 | + Vd->D(1) = 0; \ | ||
294 | +} | ||
295 | + | ||
296 | +VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H) | ||
297 | +VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W) | ||
298 | +VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D) | ||
299 | + | ||
300 | +#define SSRARNU(E1, E2, T1, T2, T3) \ | ||
301 | +static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ | ||
302 | +{ \ | ||
303 | + T1 shft_res; \ | ||
304 | + \ | ||
305 | + if (e2 < 0) { \ | ||
306 | + shft_res = 0; \ | ||
307 | + } else { \ | ||
308 | + shft_res = do_vsrar_ ## E2(e2, sa); \ | ||
309 | + } \ | ||
310 | + T2 mask; \ | ||
311 | + mask = (1ull << sh) -1; \ | ||
312 | + if (shft_res > mask) { \ | ||
313 | + return mask; \ | ||
314 | + } else { \ | ||
315 | + return shft_res; \ | ||
316 | + } \ | ||
317 | +} | ||
318 | + | ||
319 | +SSRARNU(B, H, uint16_t, uint8_t, int16_t) | ||
320 | +SSRARNU(H, W, uint32_t, uint16_t, int32_t) | ||
321 | +SSRARNU(W, D, uint64_t, uint32_t, int64_t) | ||
322 | + | ||
323 | +#define VSSRARNU(NAME, BIT, T, E1, E2) \ | ||
324 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
325 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
326 | +{ \ | ||
327 | + int i; \ | ||
328 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
329 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
330 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
331 | + \ | ||
332 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
333 | + Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ | ||
334 | + } \ | ||
335 | + Vd->D(1) = 0; \ | ||
336 | +} | ||
337 | + | ||
338 | +VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H) | ||
339 | +VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) | ||
340 | +VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) | ||
341 | + | ||
342 | +#define VSSRLRNI(NAME, BIT, E1, E2) \ | ||
343 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
344 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
345 | +{ \ | ||
346 | + int i; \ | ||
347 | + VReg temp; \ | ||
348 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
349 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
350 | + \ | ||
351 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
352 | + temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
353 | + temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ | ||
354 | + } \ | ||
355 | + *Vd = temp; \ | ||
356 | +} | ||
357 | + | ||
358 | +#define VSSRLRNI_Q(NAME, sh) \ | ||
359 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
360 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
361 | +{ \ | ||
362 | + Int128 shft_res1, shft_res2, mask, r1, r2; \ | ||
363 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
364 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
365 | + \ | ||
366 | + if (imm == 0) { \ | ||
367 | + shft_res1 = Vj->Q(0); \ | ||
368 | + shft_res2 = Vd->Q(0); \ | ||
369 | + } else { \ | ||
370 | + r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \ | ||
371 | + r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \ | ||
372 | + \ | ||
373 | + shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \ | ||
374 | + shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \ | ||
375 | + } \ | ||
376 | + \ | ||
377 | + mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \ | ||
378 | + \ | ||
379 | + if (int128_ult(mask, shft_res1)) { \ | ||
380 | + Vd->D(0) = int128_getlo(mask); \ | ||
381 | + }else { \ | ||
382 | + Vd->D(0) = int128_getlo(shft_res1); \ | ||
383 | + } \ | ||
384 | + \ | ||
385 | + if (int128_ult(mask, shft_res2)) { \ | ||
386 | + Vd->D(1) = int128_getlo(mask); \ | ||
387 | + }else { \ | ||
388 | + Vd->D(1) = int128_getlo(shft_res2); \ | ||
389 | + } \ | ||
390 | +} | ||
391 | + | ||
392 | +VSSRLRNI(vssrlrni_b_h, 16, B, H) | ||
393 | +VSSRLRNI(vssrlrni_h_w, 32, H, W) | ||
394 | +VSSRLRNI(vssrlrni_w_d, 64, W, D) | ||
395 | +VSSRLRNI_Q(vssrlrni_d_q, 63) | ||
396 | + | ||
397 | +#define VSSRARNI(NAME, BIT, E1, E2) \ | ||
398 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
399 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
400 | +{ \ | ||
401 | + int i; \ | ||
402 | + VReg temp; \ | ||
403 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
404 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
405 | + \ | ||
406 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
407 | + temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ | ||
408 | + temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ | ||
409 | + } \ | ||
410 | + *Vd = temp; \ | ||
411 | +} | ||
412 | + | ||
413 | +void HELPER(vssrarni_d_q)(CPULoongArchState *env, | ||
414 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
415 | +{ | ||
416 | + Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
417 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
418 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
419 | + | ||
420 | + if (imm == 0) { | ||
421 | + shft_res1 = Vj->Q(0); | ||
422 | + shft_res2 = Vd->Q(0); | ||
423 | + } else { | ||
424 | + r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
425 | + r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
426 | + | ||
427 | + shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | ||
428 | + shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | ||
429 | + } | ||
430 | + | ||
431 | + mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); | ||
432 | + mask2 = int128_lshift(int128_one(), 63); | ||
433 | + | ||
434 | + if (int128_gt(shft_res1, mask1)) { | ||
435 | + Vd->D(0) = int128_getlo(mask1); | ||
436 | + } else if (int128_lt(shft_res1, int128_neg(mask2))) { | ||
437 | + Vd->D(0) = int128_getlo(mask2); | ||
438 | + } else { | ||
439 | + Vd->D(0) = int128_getlo(shft_res1); | ||
440 | + } | ||
441 | + | ||
442 | + if (int128_gt(shft_res2, mask1)) { | ||
443 | + Vd->D(1) = int128_getlo(mask1); | ||
444 | + } else if (int128_lt(shft_res2, int128_neg(mask2))) { | ||
445 | + Vd->D(1) = int128_getlo(mask2); | ||
446 | + } else { | ||
447 | + Vd->D(1) = int128_getlo(shft_res2); | ||
448 | + } | ||
449 | +} | ||
450 | + | ||
451 | +VSSRARNI(vssrarni_b_h, 16, B, H) | ||
452 | +VSSRARNI(vssrarni_h_w, 32, H, W) | ||
453 | +VSSRARNI(vssrarni_w_d, 64, W, D) | ||
454 | + | ||
455 | +#define VSSRLRNUI(NAME, BIT, E1, E2) \ | ||
456 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
457 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
458 | +{ \ | ||
459 | + int i; \ | ||
460 | + VReg temp; \ | ||
461 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
462 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
463 | + \ | ||
464 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
465 | + temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
466 | + temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
467 | + } \ | ||
468 | + *Vd = temp; \ | ||
469 | +} | ||
470 | + | ||
471 | +VSSRLRNUI(vssrlrni_bu_h, 16, B, H) | ||
472 | +VSSRLRNUI(vssrlrni_hu_w, 32, H, W) | ||
473 | +VSSRLRNUI(vssrlrni_wu_d, 64, W, D) | ||
474 | +VSSRLRNI_Q(vssrlrni_du_q, 64) | ||
475 | + | ||
476 | +#define VSSRARNUI(NAME, BIT, E1, E2) \ | ||
477 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
478 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
479 | +{ \ | ||
480 | + int i; \ | ||
481 | + VReg temp; \ | ||
482 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
483 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
484 | + \ | ||
485 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
486 | + temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ | ||
487 | + temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \ | ||
488 | + } \ | ||
489 | + *Vd = temp; \ | ||
490 | +} | ||
491 | + | ||
492 | +void HELPER(vssrarni_du_q)(CPULoongArchState *env, | ||
493 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
494 | +{ | ||
495 | + Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; | ||
496 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
497 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
498 | + | ||
499 | + if (imm == 0) { | ||
500 | + shft_res1 = Vj->Q(0); | ||
501 | + shft_res2 = Vd->Q(0); | ||
502 | + } else { | ||
503 | + r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); | ||
504 | + r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); | ||
505 | + | ||
506 | + shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); | ||
507 | + shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); | ||
508 | + } | ||
509 | + | ||
510 | + if (int128_lt(Vj->Q(0), int128_zero())) { | ||
511 | + shft_res1 = int128_zero(); | ||
512 | + } | ||
513 | + if (int128_lt(Vd->Q(0), int128_zero())) { | ||
514 | + shft_res2 = int128_zero(); | ||
515 | + } | ||
516 | + | ||
517 | + mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); | ||
518 | + mask2 = int128_lshift(int128_one(), 64); | ||
519 | + | ||
520 | + if (int128_gt(shft_res1, mask1)) { | ||
521 | + Vd->D(0) = int128_getlo(mask1); | ||
522 | + } else if (int128_lt(shft_res1, int128_neg(mask2))) { | ||
523 | + Vd->D(0) = int128_getlo(mask2); | ||
524 | + } else { | ||
525 | + Vd->D(0) = int128_getlo(shft_res1); | ||
526 | + } | ||
527 | + | ||
528 | + if (int128_gt(shft_res2, mask1)) { | ||
529 | + Vd->D(1) = int128_getlo(mask1); | ||
530 | + } else if (int128_lt(shft_res2, int128_neg(mask2))) { | ||
531 | + Vd->D(1) = int128_getlo(mask2); | ||
532 | + } else { | ||
533 | + Vd->D(1) = int128_getlo(shft_res2); | ||
534 | + } | ||
535 | +} | ||
536 | + | ||
537 | +VSSRARNUI(vssrarni_bu_h, 16, B, H) | ||
538 | +VSSRARNUI(vssrarni_hu_w, 32, H, W) | ||
539 | +VSSRARNUI(vssrarni_wu_d, 64, W, D) | ||
540 | -- | ||
541 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VCLO.{B/H/W/D}; | ||
3 | - VCLZ.{B/H/W/D}. | ||
4 | 1 | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
7 | Message-Id: <20230504122810.4094787-30-gaosong@loongson.cn> | ||
8 | --- | ||
9 | target/loongarch/disas.c | 9 ++++++ | ||
10 | target/loongarch/helper.h | 9 ++++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 9 ++++++ | ||
12 | target/loongarch/insns.decode | 9 ++++++ | ||
13 | target/loongarch/lsx_helper.c | 31 +++++++++++++++++++++ | ||
14 | 5 files changed, 67 insertions(+) | ||
15 | |||
16 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/loongarch/disas.c | ||
19 | +++ b/target/loongarch/disas.c | ||
20 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vssrarni_bu_h, vv_i) | ||
21 | INSN_LSX(vssrarni_hu_w, vv_i) | ||
22 | INSN_LSX(vssrarni_wu_d, vv_i) | ||
23 | INSN_LSX(vssrarni_du_q, vv_i) | ||
24 | + | ||
25 | +INSN_LSX(vclo_b, vv) | ||
26 | +INSN_LSX(vclo_h, vv) | ||
27 | +INSN_LSX(vclo_w, vv) | ||
28 | +INSN_LSX(vclo_d, vv) | ||
29 | +INSN_LSX(vclz_b, vv) | ||
30 | +INSN_LSX(vclz_h, vv) | ||
31 | +INSN_LSX(vclz_w, vv) | ||
32 | +INSN_LSX(vclz_d, vv) | ||
33 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/loongarch/helper.h | ||
36 | +++ b/target/loongarch/helper.h | ||
37 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32) | ||
38 | DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32) | ||
39 | DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32) | ||
40 | DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32) | ||
41 | + | ||
42 | +DEF_HELPER_3(vclo_b, void, env, i32, i32) | ||
43 | +DEF_HELPER_3(vclo_h, void, env, i32, i32) | ||
44 | +DEF_HELPER_3(vclo_w, void, env, i32, i32) | ||
45 | +DEF_HELPER_3(vclo_d, void, env, i32, i32) | ||
46 | +DEF_HELPER_3(vclz_b, void, env, i32, i32) | ||
47 | +DEF_HELPER_3(vclz_h, void, env, i32, i32) | ||
48 | +DEF_HELPER_3(vclz_w, void, env, i32, i32) | ||
49 | +DEF_HELPER_3(vclz_d, void, env, i32, i32) | ||
50 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
53 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
54 | @@ -XXX,XX +XXX,XX @@ TRANS(vssrarni_bu_h, gen_vv_i, gen_helper_vssrarni_bu_h) | ||
55 | TRANS(vssrarni_hu_w, gen_vv_i, gen_helper_vssrarni_hu_w) | ||
56 | TRANS(vssrarni_wu_d, gen_vv_i, gen_helper_vssrarni_wu_d) | ||
57 | TRANS(vssrarni_du_q, gen_vv_i, gen_helper_vssrarni_du_q) | ||
58 | + | ||
59 | +TRANS(vclo_b, gen_vv, gen_helper_vclo_b) | ||
60 | +TRANS(vclo_h, gen_vv, gen_helper_vclo_h) | ||
61 | +TRANS(vclo_w, gen_vv, gen_helper_vclo_w) | ||
62 | +TRANS(vclo_d, gen_vv, gen_helper_vclo_d) | ||
63 | +TRANS(vclz_b, gen_vv, gen_helper_vclz_b) | ||
64 | +TRANS(vclz_h, gen_vv, gen_helper_vclz_h) | ||
65 | +TRANS(vclz_w, gen_vv, gen_helper_vclz_w) | ||
66 | +TRANS(vclz_d, gen_vv, gen_helper_vclz_d) | ||
67 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/loongarch/insns.decode | ||
70 | +++ b/target/loongarch/insns.decode | ||
71 | @@ -XXX,XX +XXX,XX @@ vssrarni_bu_h 0111 00110110 11000 1 .... ..... ..... @vv_ui4 | ||
72 | vssrarni_hu_w 0111 00110110 11001 ..... ..... ..... @vv_ui5 | ||
73 | vssrarni_wu_d 0111 00110110 1101 ...... ..... ..... @vv_ui6 | ||
74 | vssrarni_du_q 0111 00110110 111 ....... ..... ..... @vv_ui7 | ||
75 | + | ||
76 | +vclo_b 0111 00101001 11000 00000 ..... ..... @vv | ||
77 | +vclo_h 0111 00101001 11000 00001 ..... ..... @vv | ||
78 | +vclo_w 0111 00101001 11000 00010 ..... ..... @vv | ||
79 | +vclo_d 0111 00101001 11000 00011 ..... ..... @vv | ||
80 | +vclz_b 0111 00101001 11000 00100 ..... ..... @vv | ||
81 | +vclz_h 0111 00101001 11000 00101 ..... ..... @vv | ||
82 | +vclz_w 0111 00101001 11000 00110 ..... ..... @vv | ||
83 | +vclz_d 0111 00101001 11000 00111 ..... ..... @vv | ||
84 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/target/loongarch/lsx_helper.c | ||
87 | +++ b/target/loongarch/lsx_helper.c | ||
88 | @@ -XXX,XX +XXX,XX @@ void HELPER(vssrarni_du_q)(CPULoongArchState *env, | ||
89 | VSSRARNUI(vssrarni_bu_h, 16, B, H) | ||
90 | VSSRARNUI(vssrarni_hu_w, 32, H, W) | ||
91 | VSSRARNUI(vssrarni_wu_d, 64, W, D) | ||
92 | + | ||
93 | +#define DO_2OP(NAME, BIT, E, DO_OP) \ | ||
94 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
95 | +{ \ | ||
96 | + int i; \ | ||
97 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
98 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
99 | + \ | ||
100 | + for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
101 | + { \ | ||
102 | + Vd->E(i) = DO_OP(Vj->E(i)); \ | ||
103 | + } \ | ||
104 | +} | ||
105 | + | ||
106 | +#define DO_CLO_B(N) (clz32(~N & 0xff) - 24) | ||
107 | +#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) | ||
108 | +#define DO_CLO_W(N) (clz32(~N)) | ||
109 | +#define DO_CLO_D(N) (clz64(~N)) | ||
110 | +#define DO_CLZ_B(N) (clz32(N) - 24) | ||
111 | +#define DO_CLZ_H(N) (clz32(N) - 16) | ||
112 | +#define DO_CLZ_W(N) (clz32(N)) | ||
113 | +#define DO_CLZ_D(N) (clz64(N)) | ||
114 | + | ||
115 | +DO_2OP(vclo_b, 8, UB, DO_CLO_B) | ||
116 | +DO_2OP(vclo_h, 16, UH, DO_CLO_H) | ||
117 | +DO_2OP(vclo_w, 32, UW, DO_CLO_W) | ||
118 | +DO_2OP(vclo_d, 64, UD, DO_CLO_D) | ||
119 | +DO_2OP(vclz_b, 8, UB, DO_CLZ_B) | ||
120 | +DO_2OP(vclz_h, 16, UH, DO_CLZ_H) | ||
121 | +DO_2OP(vclz_w, 32, UW, DO_CLZ_W) | ||
122 | +DO_2OP(vclz_d, 64, UD, DO_CLZ_D) | ||
123 | -- | ||
124 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VPCNT.{B/H/W/D}. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-31-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 5 +++++ | ||
9 | target/loongarch/helper.h | 5 +++++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++++ | ||
11 | target/loongarch/insns.decode | 5 +++++ | ||
12 | target/loongarch/lsx_helper.c | 18 ++++++++++++++++++ | ||
13 | 5 files changed, 38 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vclz_b, vv) | ||
20 | INSN_LSX(vclz_h, vv) | ||
21 | INSN_LSX(vclz_w, vv) | ||
22 | INSN_LSX(vclz_d, vv) | ||
23 | + | ||
24 | +INSN_LSX(vpcnt_b, vv) | ||
25 | +INSN_LSX(vpcnt_h, vv) | ||
26 | +INSN_LSX(vpcnt_w, vv) | ||
27 | +INSN_LSX(vpcnt_d, vv) | ||
28 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/loongarch/helper.h | ||
31 | +++ b/target/loongarch/helper.h | ||
32 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vclz_b, void, env, i32, i32) | ||
33 | DEF_HELPER_3(vclz_h, void, env, i32, i32) | ||
34 | DEF_HELPER_3(vclz_w, void, env, i32, i32) | ||
35 | DEF_HELPER_3(vclz_d, void, env, i32, i32) | ||
36 | + | ||
37 | +DEF_HELPER_3(vpcnt_b, void, env, i32, i32) | ||
38 | +DEF_HELPER_3(vpcnt_h, void, env, i32, i32) | ||
39 | +DEF_HELPER_3(vpcnt_w, void, env, i32, i32) | ||
40 | +DEF_HELPER_3(vpcnt_d, void, env, i32, i32) | ||
41 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
44 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
45 | @@ -XXX,XX +XXX,XX @@ TRANS(vclz_b, gen_vv, gen_helper_vclz_b) | ||
46 | TRANS(vclz_h, gen_vv, gen_helper_vclz_h) | ||
47 | TRANS(vclz_w, gen_vv, gen_helper_vclz_w) | ||
48 | TRANS(vclz_d, gen_vv, gen_helper_vclz_d) | ||
49 | + | ||
50 | +TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b) | ||
51 | +TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h) | ||
52 | +TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w) | ||
53 | +TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d) | ||
54 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/loongarch/insns.decode | ||
57 | +++ b/target/loongarch/insns.decode | ||
58 | @@ -XXX,XX +XXX,XX @@ vclz_b 0111 00101001 11000 00100 ..... ..... @vv | ||
59 | vclz_h 0111 00101001 11000 00101 ..... ..... @vv | ||
60 | vclz_w 0111 00101001 11000 00110 ..... ..... @vv | ||
61 | vclz_d 0111 00101001 11000 00111 ..... ..... @vv | ||
62 | + | ||
63 | +vpcnt_b 0111 00101001 11000 01000 ..... ..... @vv | ||
64 | +vpcnt_h 0111 00101001 11000 01001 ..... ..... @vv | ||
65 | +vpcnt_w 0111 00101001 11000 01010 ..... ..... @vv | ||
66 | +vpcnt_d 0111 00101001 11000 01011 ..... ..... @vv | ||
67 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/loongarch/lsx_helper.c | ||
70 | +++ b/target/loongarch/lsx_helper.c | ||
71 | @@ -XXX,XX +XXX,XX @@ DO_2OP(vclz_b, 8, UB, DO_CLZ_B) | ||
72 | DO_2OP(vclz_h, 16, UH, DO_CLZ_H) | ||
73 | DO_2OP(vclz_w, 32, UW, DO_CLZ_W) | ||
74 | DO_2OP(vclz_d, 64, UD, DO_CLZ_D) | ||
75 | + | ||
76 | +#define VPCNT(NAME, BIT, E, FN) \ | ||
77 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
78 | +{ \ | ||
79 | + int i; \ | ||
80 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
81 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
82 | + \ | ||
83 | + for (i = 0; i < LSX_LEN/BIT; i++) \ | ||
84 | + { \ | ||
85 | + Vd->E(i) = FN(Vj->E(i)); \ | ||
86 | + } \ | ||
87 | +} | ||
88 | + | ||
89 | +VPCNT(vpcnt_b, 8, UB, ctpop8) | ||
90 | +VPCNT(vpcnt_h, 16, UH, ctpop16) | ||
91 | +VPCNT(vpcnt_w, 32, UW, ctpop32) | ||
92 | +VPCNT(vpcnt_d, 64, UD, ctpop64) | ||
93 | -- | ||
94 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VBITCLR[I].{B/H/W/D}; | ||
3 | - VBITSET[I].{B/H/W/D}; | ||
4 | - VBITREV[I].{B/H/W/D}. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-32-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 25 ++ | ||
11 | target/loongarch/helper.h | 27 ++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 305 ++++++++++++++++++++ | ||
13 | target/loongarch/insns.decode | 25 ++ | ||
14 | target/loongarch/lsx_helper.c | 55 ++++ | ||
15 | 5 files changed, 437 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/disas.c | ||
20 | +++ b/target/loongarch/disas.c | ||
21 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vpcnt_b, vv) | ||
22 | INSN_LSX(vpcnt_h, vv) | ||
23 | INSN_LSX(vpcnt_w, vv) | ||
24 | INSN_LSX(vpcnt_d, vv) | ||
25 | + | ||
26 | +INSN_LSX(vbitclr_b, vvv) | ||
27 | +INSN_LSX(vbitclr_h, vvv) | ||
28 | +INSN_LSX(vbitclr_w, vvv) | ||
29 | +INSN_LSX(vbitclr_d, vvv) | ||
30 | +INSN_LSX(vbitclri_b, vv_i) | ||
31 | +INSN_LSX(vbitclri_h, vv_i) | ||
32 | +INSN_LSX(vbitclri_w, vv_i) | ||
33 | +INSN_LSX(vbitclri_d, vv_i) | ||
34 | +INSN_LSX(vbitset_b, vvv) | ||
35 | +INSN_LSX(vbitset_h, vvv) | ||
36 | +INSN_LSX(vbitset_w, vvv) | ||
37 | +INSN_LSX(vbitset_d, vvv) | ||
38 | +INSN_LSX(vbitseti_b, vv_i) | ||
39 | +INSN_LSX(vbitseti_h, vv_i) | ||
40 | +INSN_LSX(vbitseti_w, vv_i) | ||
41 | +INSN_LSX(vbitseti_d, vv_i) | ||
42 | +INSN_LSX(vbitrev_b, vvv) | ||
43 | +INSN_LSX(vbitrev_h, vvv) | ||
44 | +INSN_LSX(vbitrev_w, vvv) | ||
45 | +INSN_LSX(vbitrev_d, vvv) | ||
46 | +INSN_LSX(vbitrevi_b, vv_i) | ||
47 | +INSN_LSX(vbitrevi_h, vv_i) | ||
48 | +INSN_LSX(vbitrevi_w, vv_i) | ||
49 | +INSN_LSX(vbitrevi_d, vv_i) | ||
50 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/loongarch/helper.h | ||
53 | +++ b/target/loongarch/helper.h | ||
54 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vpcnt_b, void, env, i32, i32) | ||
55 | DEF_HELPER_3(vpcnt_h, void, env, i32, i32) | ||
56 | DEF_HELPER_3(vpcnt_w, void, env, i32, i32) | ||
57 | DEF_HELPER_3(vpcnt_d, void, env, i32, i32) | ||
58 | + | ||
59 | +DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
60 | +DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
61 | +DEF_HELPER_FLAGS_4(vbitclr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
62 | +DEF_HELPER_FLAGS_4(vbitclr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
63 | +DEF_HELPER_FLAGS_4(vbitclri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
64 | +DEF_HELPER_FLAGS_4(vbitclri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
65 | +DEF_HELPER_FLAGS_4(vbitclri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
66 | +DEF_HELPER_FLAGS_4(vbitclri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
67 | + | ||
68 | +DEF_HELPER_FLAGS_4(vbitset_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
69 | +DEF_HELPER_FLAGS_4(vbitset_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
70 | +DEF_HELPER_FLAGS_4(vbitset_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
71 | +DEF_HELPER_FLAGS_4(vbitset_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
72 | +DEF_HELPER_FLAGS_4(vbitseti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
73 | +DEF_HELPER_FLAGS_4(vbitseti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
74 | +DEF_HELPER_FLAGS_4(vbitseti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
75 | +DEF_HELPER_FLAGS_4(vbitseti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
76 | + | ||
77 | +DEF_HELPER_FLAGS_4(vbitrev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vbitrev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vbitrev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vbitrev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
81 | +DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
82 | +DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
85 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
88 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
89 | @@ -XXX,XX +XXX,XX @@ TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b) | ||
90 | TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h) | ||
91 | TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w) | ||
92 | TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d) | ||
93 | + | ||
94 | +static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, | ||
95 | + void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) | ||
96 | +{ | ||
97 | + TCGv_vec mask, lsh, t1, one; | ||
98 | + | ||
99 | + lsh = tcg_temp_new_vec_matching(t); | ||
100 | + t1 = tcg_temp_new_vec_matching(t); | ||
101 | + mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1); | ||
102 | + one = tcg_constant_vec_matching(t, vece, 1); | ||
103 | + | ||
104 | + tcg_gen_and_vec(vece, lsh, b, mask); | ||
105 | + tcg_gen_shlv_vec(vece, t1, one, lsh); | ||
106 | + func(vece, t, a, t1); | ||
107 | +} | ||
108 | + | ||
109 | +static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
110 | +{ | ||
111 | + do_vbit(vece, t, a, b, tcg_gen_andc_vec); | ||
112 | +} | ||
113 | + | ||
114 | +static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
115 | +{ | ||
116 | + do_vbit(vece, t, a, b, tcg_gen_or_vec); | ||
117 | +} | ||
118 | + | ||
119 | +static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) | ||
120 | +{ | ||
121 | + do_vbit(vece, t, a, b, tcg_gen_xor_vec); | ||
122 | +} | ||
123 | + | ||
124 | +static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
125 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
126 | +{ | ||
127 | + static const TCGOpcode vecop_list[] = { | ||
128 | + INDEX_op_shlv_vec, INDEX_op_andc_vec, 0 | ||
129 | + }; | ||
130 | + static const GVecGen3 op[4] = { | ||
131 | + { | ||
132 | + .fniv = gen_vbitclr, | ||
133 | + .fno = gen_helper_vbitclr_b, | ||
134 | + .opt_opc = vecop_list, | ||
135 | + .vece = MO_8 | ||
136 | + }, | ||
137 | + { | ||
138 | + .fniv = gen_vbitclr, | ||
139 | + .fno = gen_helper_vbitclr_h, | ||
140 | + .opt_opc = vecop_list, | ||
141 | + .vece = MO_16 | ||
142 | + }, | ||
143 | + { | ||
144 | + .fniv = gen_vbitclr, | ||
145 | + .fno = gen_helper_vbitclr_w, | ||
146 | + .opt_opc = vecop_list, | ||
147 | + .vece = MO_32 | ||
148 | + }, | ||
149 | + { | ||
150 | + .fniv = gen_vbitclr, | ||
151 | + .fno = gen_helper_vbitclr_d, | ||
152 | + .opt_opc = vecop_list, | ||
153 | + .vece = MO_64 | ||
154 | + }, | ||
155 | + }; | ||
156 | + | ||
157 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
158 | +} | ||
159 | + | ||
160 | +TRANS(vbitclr_b, gvec_vvv, MO_8, do_vbitclr) | ||
161 | +TRANS(vbitclr_h, gvec_vvv, MO_16, do_vbitclr) | ||
162 | +TRANS(vbitclr_w, gvec_vvv, MO_32, do_vbitclr) | ||
163 | +TRANS(vbitclr_d, gvec_vvv, MO_64, do_vbitclr) | ||
164 | + | ||
165 | +static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm, | ||
166 | + void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) | ||
167 | +{ | ||
168 | + int lsh; | ||
169 | + TCGv_vec t1, one; | ||
170 | + | ||
171 | + lsh = imm & ((8 << vece) -1); | ||
172 | + t1 = tcg_temp_new_vec_matching(t); | ||
173 | + one = tcg_constant_vec_matching(t, vece, 1); | ||
174 | + | ||
175 | + tcg_gen_shli_vec(vece, t1, one, lsh); | ||
176 | + func(vece, t, a, t1); | ||
177 | +} | ||
178 | + | ||
179 | +static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
180 | +{ | ||
181 | + do_vbiti(vece, t, a, imm, tcg_gen_andc_vec); | ||
182 | +} | ||
183 | + | ||
184 | +static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
185 | +{ | ||
186 | + do_vbiti(vece, t, a, imm, tcg_gen_or_vec); | ||
187 | +} | ||
188 | + | ||
189 | +static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
190 | +{ | ||
191 | + do_vbiti(vece, t, a, imm, tcg_gen_xor_vec); | ||
192 | +} | ||
193 | + | ||
194 | +static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
195 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
196 | +{ | ||
197 | + static const TCGOpcode vecop_list[] = { | ||
198 | + INDEX_op_shli_vec, INDEX_op_andc_vec, 0 | ||
199 | + }; | ||
200 | + static const GVecGen2i op[4] = { | ||
201 | + { | ||
202 | + .fniv = gen_vbitclri, | ||
203 | + .fnoi = gen_helper_vbitclri_b, | ||
204 | + .opt_opc = vecop_list, | ||
205 | + .vece = MO_8 | ||
206 | + }, | ||
207 | + { | ||
208 | + .fniv = gen_vbitclri, | ||
209 | + .fnoi = gen_helper_vbitclri_h, | ||
210 | + .opt_opc = vecop_list, | ||
211 | + .vece = MO_16 | ||
212 | + }, | ||
213 | + { | ||
214 | + .fniv = gen_vbitclri, | ||
215 | + .fnoi = gen_helper_vbitclri_w, | ||
216 | + .opt_opc = vecop_list, | ||
217 | + .vece = MO_32 | ||
218 | + }, | ||
219 | + { | ||
220 | + .fniv = gen_vbitclri, | ||
221 | + .fnoi = gen_helper_vbitclri_d, | ||
222 | + .opt_opc = vecop_list, | ||
223 | + .vece = MO_64 | ||
224 | + }, | ||
225 | + }; | ||
226 | + | ||
227 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
228 | +} | ||
229 | + | ||
230 | +TRANS(vbitclri_b, gvec_vv_i, MO_8, do_vbitclri) | ||
231 | +TRANS(vbitclri_h, gvec_vv_i, MO_16, do_vbitclri) | ||
232 | +TRANS(vbitclri_w, gvec_vv_i, MO_32, do_vbitclri) | ||
233 | +TRANS(vbitclri_d, gvec_vv_i, MO_64, do_vbitclri) | ||
234 | + | ||
235 | +static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
236 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
237 | +{ | ||
238 | + static const TCGOpcode vecop_list[] = { | ||
239 | + INDEX_op_shlv_vec, 0 | ||
240 | + }; | ||
241 | + static const GVecGen3 op[4] = { | ||
242 | + { | ||
243 | + .fniv = gen_vbitset, | ||
244 | + .fno = gen_helper_vbitset_b, | ||
245 | + .opt_opc = vecop_list, | ||
246 | + .vece = MO_8 | ||
247 | + }, | ||
248 | + { | ||
249 | + .fniv = gen_vbitset, | ||
250 | + .fno = gen_helper_vbitset_h, | ||
251 | + .opt_opc = vecop_list, | ||
252 | + .vece = MO_16 | ||
253 | + }, | ||
254 | + { | ||
255 | + .fniv = gen_vbitset, | ||
256 | + .fno = gen_helper_vbitset_w, | ||
257 | + .opt_opc = vecop_list, | ||
258 | + .vece = MO_32 | ||
259 | + }, | ||
260 | + { | ||
261 | + .fniv = gen_vbitset, | ||
262 | + .fno = gen_helper_vbitset_d, | ||
263 | + .opt_opc = vecop_list, | ||
264 | + .vece = MO_64 | ||
265 | + }, | ||
266 | + }; | ||
267 | + | ||
268 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
269 | +} | ||
270 | + | ||
271 | +TRANS(vbitset_b, gvec_vvv, MO_8, do_vbitset) | ||
272 | +TRANS(vbitset_h, gvec_vvv, MO_16, do_vbitset) | ||
273 | +TRANS(vbitset_w, gvec_vvv, MO_32, do_vbitset) | ||
274 | +TRANS(vbitset_d, gvec_vvv, MO_64, do_vbitset) | ||
275 | + | ||
276 | +static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
277 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
278 | +{ | ||
279 | + static const TCGOpcode vecop_list[] = { | ||
280 | + INDEX_op_shli_vec, 0 | ||
281 | + }; | ||
282 | + static const GVecGen2i op[4] = { | ||
283 | + { | ||
284 | + .fniv = gen_vbitseti, | ||
285 | + .fnoi = gen_helper_vbitseti_b, | ||
286 | + .opt_opc = vecop_list, | ||
287 | + .vece = MO_8 | ||
288 | + }, | ||
289 | + { | ||
290 | + .fniv = gen_vbitseti, | ||
291 | + .fnoi = gen_helper_vbitseti_h, | ||
292 | + .opt_opc = vecop_list, | ||
293 | + .vece = MO_16 | ||
294 | + }, | ||
295 | + { | ||
296 | + .fniv = gen_vbitseti, | ||
297 | + .fnoi = gen_helper_vbitseti_w, | ||
298 | + .opt_opc = vecop_list, | ||
299 | + .vece = MO_32 | ||
300 | + }, | ||
301 | + { | ||
302 | + .fniv = gen_vbitseti, | ||
303 | + .fnoi = gen_helper_vbitseti_d, | ||
304 | + .opt_opc = vecop_list, | ||
305 | + .vece = MO_64 | ||
306 | + }, | ||
307 | + }; | ||
308 | + | ||
309 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
310 | +} | ||
311 | + | ||
312 | +TRANS(vbitseti_b, gvec_vv_i, MO_8, do_vbitseti) | ||
313 | +TRANS(vbitseti_h, gvec_vv_i, MO_16, do_vbitseti) | ||
314 | +TRANS(vbitseti_w, gvec_vv_i, MO_32, do_vbitseti) | ||
315 | +TRANS(vbitseti_d, gvec_vv_i, MO_64, do_vbitseti) | ||
316 | + | ||
317 | +static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
318 | + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) | ||
319 | +{ | ||
320 | + static const TCGOpcode vecop_list[] = { | ||
321 | + INDEX_op_shlv_vec, 0 | ||
322 | + }; | ||
323 | + static const GVecGen3 op[4] = { | ||
324 | + { | ||
325 | + .fniv = gen_vbitrev, | ||
326 | + .fno = gen_helper_vbitrev_b, | ||
327 | + .opt_opc = vecop_list, | ||
328 | + .vece = MO_8 | ||
329 | + }, | ||
330 | + { | ||
331 | + .fniv = gen_vbitrev, | ||
332 | + .fno = gen_helper_vbitrev_h, | ||
333 | + .opt_opc = vecop_list, | ||
334 | + .vece = MO_16 | ||
335 | + }, | ||
336 | + { | ||
337 | + .fniv = gen_vbitrev, | ||
338 | + .fno = gen_helper_vbitrev_w, | ||
339 | + .opt_opc = vecop_list, | ||
340 | + .vece = MO_32 | ||
341 | + }, | ||
342 | + { | ||
343 | + .fniv = gen_vbitrev, | ||
344 | + .fno = gen_helper_vbitrev_d, | ||
345 | + .opt_opc = vecop_list, | ||
346 | + .vece = MO_64 | ||
347 | + }, | ||
348 | + }; | ||
349 | + | ||
350 | + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); | ||
351 | +} | ||
352 | + | ||
353 | +TRANS(vbitrev_b, gvec_vvv, MO_8, do_vbitrev) | ||
354 | +TRANS(vbitrev_h, gvec_vvv, MO_16, do_vbitrev) | ||
355 | +TRANS(vbitrev_w, gvec_vvv, MO_32, do_vbitrev) | ||
356 | +TRANS(vbitrev_d, gvec_vvv, MO_64, do_vbitrev) | ||
357 | + | ||
358 | +static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, | ||
359 | + int64_t imm, uint32_t oprsz, uint32_t maxsz) | ||
360 | +{ | ||
361 | + static const TCGOpcode vecop_list[] = { | ||
362 | + INDEX_op_shli_vec, 0 | ||
363 | + }; | ||
364 | + static const GVecGen2i op[4] = { | ||
365 | + { | ||
366 | + .fniv = gen_vbitrevi, | ||
367 | + .fnoi = gen_helper_vbitrevi_b, | ||
368 | + .opt_opc = vecop_list, | ||
369 | + .vece = MO_8 | ||
370 | + }, | ||
371 | + { | ||
372 | + .fniv = gen_vbitrevi, | ||
373 | + .fnoi = gen_helper_vbitrevi_h, | ||
374 | + .opt_opc = vecop_list, | ||
375 | + .vece = MO_16 | ||
376 | + }, | ||
377 | + { | ||
378 | + .fniv = gen_vbitrevi, | ||
379 | + .fnoi = gen_helper_vbitrevi_w, | ||
380 | + .opt_opc = vecop_list, | ||
381 | + .vece = MO_32 | ||
382 | + }, | ||
383 | + { | ||
384 | + .fniv = gen_vbitrevi, | ||
385 | + .fnoi = gen_helper_vbitrevi_d, | ||
386 | + .opt_opc = vecop_list, | ||
387 | + .vece = MO_64 | ||
388 | + }, | ||
389 | + }; | ||
390 | + | ||
391 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); | ||
392 | +} | ||
393 | + | ||
394 | +TRANS(vbitrevi_b, gvec_vv_i, MO_8, do_vbitrevi) | ||
395 | +TRANS(vbitrevi_h, gvec_vv_i, MO_16, do_vbitrevi) | ||
396 | +TRANS(vbitrevi_w, gvec_vv_i, MO_32, do_vbitrevi) | ||
397 | +TRANS(vbitrevi_d, gvec_vv_i, MO_64, do_vbitrevi) | ||
398 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
399 | index XXXXXXX..XXXXXXX 100644 | ||
400 | --- a/target/loongarch/insns.decode | ||
401 | +++ b/target/loongarch/insns.decode | ||
402 | @@ -XXX,XX +XXX,XX @@ vpcnt_b 0111 00101001 11000 01000 ..... ..... @vv | ||
403 | vpcnt_h 0111 00101001 11000 01001 ..... ..... @vv | ||
404 | vpcnt_w 0111 00101001 11000 01010 ..... ..... @vv | ||
405 | vpcnt_d 0111 00101001 11000 01011 ..... ..... @vv | ||
406 | + | ||
407 | +vbitclr_b 0111 00010000 11000 ..... ..... ..... @vvv | ||
408 | +vbitclr_h 0111 00010000 11001 ..... ..... ..... @vvv | ||
409 | +vbitclr_w 0111 00010000 11010 ..... ..... ..... @vvv | ||
410 | +vbitclr_d 0111 00010000 11011 ..... ..... ..... @vvv | ||
411 | +vbitclri_b 0111 00110001 00000 01 ... ..... ..... @vv_ui3 | ||
412 | +vbitclri_h 0111 00110001 00000 1 .... ..... ..... @vv_ui4 | ||
413 | +vbitclri_w 0111 00110001 00001 ..... ..... ..... @vv_ui5 | ||
414 | +vbitclri_d 0111 00110001 0001 ...... ..... ..... @vv_ui6 | ||
415 | +vbitset_b 0111 00010000 11100 ..... ..... ..... @vvv | ||
416 | +vbitset_h 0111 00010000 11101 ..... ..... ..... @vvv | ||
417 | +vbitset_w 0111 00010000 11110 ..... ..... ..... @vvv | ||
418 | +vbitset_d 0111 00010000 11111 ..... ..... ..... @vvv | ||
419 | +vbitseti_b 0111 00110001 01000 01 ... ..... ..... @vv_ui3 | ||
420 | +vbitseti_h 0111 00110001 01000 1 .... ..... ..... @vv_ui4 | ||
421 | +vbitseti_w 0111 00110001 01001 ..... ..... ..... @vv_ui5 | ||
422 | +vbitseti_d 0111 00110001 0101 ...... ..... ..... @vv_ui6 | ||
423 | +vbitrev_b 0111 00010001 00000 ..... ..... ..... @vvv | ||
424 | +vbitrev_h 0111 00010001 00001 ..... ..... ..... @vvv | ||
425 | +vbitrev_w 0111 00010001 00010 ..... ..... ..... @vvv | ||
426 | +vbitrev_d 0111 00010001 00011 ..... ..... ..... @vvv | ||
427 | +vbitrevi_b 0111 00110001 10000 01 ... ..... ..... @vv_ui3 | ||
428 | +vbitrevi_h 0111 00110001 10000 1 .... ..... ..... @vv_ui4 | ||
429 | +vbitrevi_w 0111 00110001 10001 ..... ..... ..... @vv_ui5 | ||
430 | +vbitrevi_d 0111 00110001 1001 ...... ..... ..... @vv_ui6 | ||
431 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
432 | index XXXXXXX..XXXXXXX 100644 | ||
433 | --- a/target/loongarch/lsx_helper.c | ||
434 | +++ b/target/loongarch/lsx_helper.c | ||
435 | @@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_b, 8, UB, ctpop8) | ||
436 | VPCNT(vpcnt_h, 16, UH, ctpop16) | ||
437 | VPCNT(vpcnt_w, 32, UW, ctpop32) | ||
438 | VPCNT(vpcnt_d, 64, UD, ctpop64) | ||
439 | + | ||
440 | +#define DO_BITCLR(a, bit) (a & ~(1ull << bit)) | ||
441 | +#define DO_BITSET(a, bit) (a | 1ull << bit) | ||
442 | +#define DO_BITREV(a, bit) (a ^ (1ull << bit)) | ||
443 | + | ||
444 | +#define DO_BIT(NAME, BIT, E, DO_OP) \ | ||
445 | +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ | ||
446 | +{ \ | ||
447 | + int i; \ | ||
448 | + VReg *Vd = (VReg *)vd; \ | ||
449 | + VReg *Vj = (VReg *)vj; \ | ||
450 | + VReg *Vk = (VReg *)vk; \ | ||
451 | + \ | ||
452 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
453 | + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ | ||
454 | + } \ | ||
455 | +} | ||
456 | + | ||
457 | +DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) | ||
458 | +DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) | ||
459 | +DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) | ||
460 | +DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) | ||
461 | +DO_BIT(vbitset_b, 8, UB, DO_BITSET) | ||
462 | +DO_BIT(vbitset_h, 16, UH, DO_BITSET) | ||
463 | +DO_BIT(vbitset_w, 32, UW, DO_BITSET) | ||
464 | +DO_BIT(vbitset_d, 64, UD, DO_BITSET) | ||
465 | +DO_BIT(vbitrev_b, 8, UB, DO_BITREV) | ||
466 | +DO_BIT(vbitrev_h, 16, UH, DO_BITREV) | ||
467 | +DO_BIT(vbitrev_w, 32, UW, DO_BITREV) | ||
468 | +DO_BIT(vbitrev_d, 64, UD, DO_BITREV) | ||
469 | + | ||
470 | +#define DO_BITI(NAME, BIT, E, DO_OP) \ | ||
471 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
472 | +{ \ | ||
473 | + int i; \ | ||
474 | + VReg *Vd = (VReg *)vd; \ | ||
475 | + VReg *Vj = (VReg *)vj; \ | ||
476 | + \ | ||
477 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
478 | + Vd->E(i) = DO_OP(Vj->E(i), imm); \ | ||
479 | + } \ | ||
480 | +} | ||
481 | + | ||
482 | +DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) | ||
483 | +DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) | ||
484 | +DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) | ||
485 | +DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) | ||
486 | +DO_BITI(vbitseti_b, 8, UB, DO_BITSET) | ||
487 | +DO_BITI(vbitseti_h, 16, UH, DO_BITSET) | ||
488 | +DO_BITI(vbitseti_w, 32, UW, DO_BITSET) | ||
489 | +DO_BITI(vbitseti_d, 64, UD, DO_BITSET) | ||
490 | +DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) | ||
491 | +DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) | ||
492 | +DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) | ||
493 | +DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) | ||
494 | -- | ||
495 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VFRSTP[I].{B/H}. | ||
3 | 1 | ||
4 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-33-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 5 +++ | ||
9 | target/loongarch/helper.h | 5 +++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++ | ||
11 | target/loongarch/insns.decode | 5 +++ | ||
12 | target/loongarch/lsx_helper.c | 41 +++++++++++++++++++++ | ||
13 | 5 files changed, 61 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vbitrevi_b, vv_i) | ||
20 | INSN_LSX(vbitrevi_h, vv_i) | ||
21 | INSN_LSX(vbitrevi_w, vv_i) | ||
22 | INSN_LSX(vbitrevi_d, vv_i) | ||
23 | + | ||
24 | +INSN_LSX(vfrstp_b, vvv) | ||
25 | +INSN_LSX(vfrstp_h, vvv) | ||
26 | +INSN_LSX(vfrstpi_b, vv_i) | ||
27 | +INSN_LSX(vfrstpi_h, vv_i) | ||
28 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/loongarch/helper.h | ||
31 | +++ b/target/loongarch/helper.h | ||
32 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
33 | DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
34 | DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
35 | DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
36 | + | ||
37 | +DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32) | ||
38 | +DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32) | ||
39 | +DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) | ||
40 | +DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) | ||
41 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
44 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
45 | @@ -XXX,XX +XXX,XX @@ TRANS(vbitrevi_b, gvec_vv_i, MO_8, do_vbitrevi) | ||
46 | TRANS(vbitrevi_h, gvec_vv_i, MO_16, do_vbitrevi) | ||
47 | TRANS(vbitrevi_w, gvec_vv_i, MO_32, do_vbitrevi) | ||
48 | TRANS(vbitrevi_d, gvec_vv_i, MO_64, do_vbitrevi) | ||
49 | + | ||
50 | +TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b) | ||
51 | +TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h) | ||
52 | +TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b) | ||
53 | +TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h) | ||
54 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/loongarch/insns.decode | ||
57 | +++ b/target/loongarch/insns.decode | ||
58 | @@ -XXX,XX +XXX,XX @@ vbitrevi_b 0111 00110001 10000 01 ... ..... ..... @vv_ui3 | ||
59 | vbitrevi_h 0111 00110001 10000 1 .... ..... ..... @vv_ui4 | ||
60 | vbitrevi_w 0111 00110001 10001 ..... ..... ..... @vv_ui5 | ||
61 | vbitrevi_d 0111 00110001 1001 ...... ..... ..... @vv_ui6 | ||
62 | + | ||
63 | +vfrstp_b 0111 00010010 10110 ..... ..... ..... @vvv | ||
64 | +vfrstp_h 0111 00010010 10111 ..... ..... ..... @vvv | ||
65 | +vfrstpi_b 0111 00101001 10100 ..... ..... ..... @vv_ui5 | ||
66 | +vfrstpi_h 0111 00101001 10101 ..... ..... ..... @vv_ui5 | ||
67 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/loongarch/lsx_helper.c | ||
70 | +++ b/target/loongarch/lsx_helper.c | ||
71 | @@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) | ||
72 | DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) | ||
73 | DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) | ||
74 | DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) | ||
75 | + | ||
76 | +#define VFRSTP(NAME, BIT, MASK, E) \ | ||
77 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
78 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
79 | +{ \ | ||
80 | + int i, m; \ | ||
81 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
82 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
83 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
84 | + \ | ||
85 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
86 | + if (Vj->E(i) < 0) { \ | ||
87 | + break; \ | ||
88 | + } \ | ||
89 | + } \ | ||
90 | + m = Vk->E(0) & MASK; \ | ||
91 | + Vd->E(m) = i; \ | ||
92 | +} | ||
93 | + | ||
94 | +VFRSTP(vfrstp_b, 8, 0xf, B) | ||
95 | +VFRSTP(vfrstp_h, 16, 0x7, H) | ||
96 | + | ||
97 | +#define VFRSTPI(NAME, BIT, E) \ | ||
98 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
99 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
100 | +{ \ | ||
101 | + int i, m; \ | ||
102 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
103 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
104 | + \ | ||
105 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
106 | + if (Vj->E(i) < 0) { \ | ||
107 | + break; \ | ||
108 | + } \ | ||
109 | + } \ | ||
110 | + m = imm % (LSX_LEN/BIT); \ | ||
111 | + Vd->E(m) = i; \ | ||
112 | +} | ||
113 | + | ||
114 | +VFRSTPI(vfrstpi_b, 8, B) | ||
115 | +VFRSTPI(vfrstpi_h, 16, H) | ||
116 | -- | ||
117 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VFCVT{L/H}.{S.H/D.S}; | ||
3 | - VFCVT.{H.S/S.D}; | ||
4 | - VFRINT[{RNE/RZ/RP/RM}].{S/D}; | ||
5 | - VFTINT[{RNE/RZ/RP/RM}].{W.S/L.D}; | ||
6 | - VFTINT[RZ].{WU.S/LU.D}; | ||
7 | - VFTINT[{RNE/RZ/RP/RM}].W.D; | ||
8 | - VFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S; | ||
9 | - VFFINT.{S.W/D.L}[U]; | ||
10 | - VFFINT.S.L, VFFINT{L/H}.D.W. | ||
11 | 1 | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
14 | Message-Id: <20230504122810.4094787-35-gaosong@loongson.cn> | ||
15 | --- | ||
16 | target/loongarch/disas.c | 56 +++ | ||
17 | target/loongarch/helper.h | 56 +++ | ||
18 | target/loongarch/insn_trans/trans_lsx.c.inc | 56 +++ | ||
19 | target/loongarch/insns.decode | 56 +++ | ||
20 | target/loongarch/lsx_helper.c | 376 ++++++++++++++++++++ | ||
21 | 5 files changed, 600 insertions(+) | ||
22 | |||
23 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/target/loongarch/disas.c | ||
26 | +++ b/target/loongarch/disas.c | ||
27 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vfrecip_s, vv) | ||
28 | INSN_LSX(vfrecip_d, vv) | ||
29 | INSN_LSX(vfrsqrt_s, vv) | ||
30 | INSN_LSX(vfrsqrt_d, vv) | ||
31 | + | ||
32 | +INSN_LSX(vfcvtl_s_h, vv) | ||
33 | +INSN_LSX(vfcvth_s_h, vv) | ||
34 | +INSN_LSX(vfcvtl_d_s, vv) | ||
35 | +INSN_LSX(vfcvth_d_s, vv) | ||
36 | +INSN_LSX(vfcvt_h_s, vvv) | ||
37 | +INSN_LSX(vfcvt_s_d, vvv) | ||
38 | + | ||
39 | +INSN_LSX(vfrint_s, vv) | ||
40 | +INSN_LSX(vfrint_d, vv) | ||
41 | +INSN_LSX(vfrintrm_s, vv) | ||
42 | +INSN_LSX(vfrintrm_d, vv) | ||
43 | +INSN_LSX(vfrintrp_s, vv) | ||
44 | +INSN_LSX(vfrintrp_d, vv) | ||
45 | +INSN_LSX(vfrintrz_s, vv) | ||
46 | +INSN_LSX(vfrintrz_d, vv) | ||
47 | +INSN_LSX(vfrintrne_s, vv) | ||
48 | +INSN_LSX(vfrintrne_d, vv) | ||
49 | + | ||
50 | +INSN_LSX(vftint_w_s, vv) | ||
51 | +INSN_LSX(vftint_l_d, vv) | ||
52 | +INSN_LSX(vftintrm_w_s, vv) | ||
53 | +INSN_LSX(vftintrm_l_d, vv) | ||
54 | +INSN_LSX(vftintrp_w_s, vv) | ||
55 | +INSN_LSX(vftintrp_l_d, vv) | ||
56 | +INSN_LSX(vftintrz_w_s, vv) | ||
57 | +INSN_LSX(vftintrz_l_d, vv) | ||
58 | +INSN_LSX(vftintrne_w_s, vv) | ||
59 | +INSN_LSX(vftintrne_l_d, vv) | ||
60 | +INSN_LSX(vftint_wu_s, vv) | ||
61 | +INSN_LSX(vftint_lu_d, vv) | ||
62 | +INSN_LSX(vftintrz_wu_s, vv) | ||
63 | +INSN_LSX(vftintrz_lu_d, vv) | ||
64 | +INSN_LSX(vftint_w_d, vvv) | ||
65 | +INSN_LSX(vftintrm_w_d, vvv) | ||
66 | +INSN_LSX(vftintrp_w_d, vvv) | ||
67 | +INSN_LSX(vftintrz_w_d, vvv) | ||
68 | +INSN_LSX(vftintrne_w_d, vvv) | ||
69 | +INSN_LSX(vftintl_l_s, vv) | ||
70 | +INSN_LSX(vftinth_l_s, vv) | ||
71 | +INSN_LSX(vftintrml_l_s, vv) | ||
72 | +INSN_LSX(vftintrmh_l_s, vv) | ||
73 | +INSN_LSX(vftintrpl_l_s, vv) | ||
74 | +INSN_LSX(vftintrph_l_s, vv) | ||
75 | +INSN_LSX(vftintrzl_l_s, vv) | ||
76 | +INSN_LSX(vftintrzh_l_s, vv) | ||
77 | +INSN_LSX(vftintrnel_l_s, vv) | ||
78 | +INSN_LSX(vftintrneh_l_s, vv) | ||
79 | + | ||
80 | +INSN_LSX(vffint_s_w, vv) | ||
81 | +INSN_LSX(vffint_s_wu, vv) | ||
82 | +INSN_LSX(vffint_d_l, vv) | ||
83 | +INSN_LSX(vffint_d_lu, vv) | ||
84 | +INSN_LSX(vffintl_d_w, vv) | ||
85 | +INSN_LSX(vffinth_d_w, vv) | ||
86 | +INSN_LSX(vffint_s_l, vvv) | ||
87 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/loongarch/helper.h | ||
90 | +++ b/target/loongarch/helper.h | ||
91 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfrecip_s, void, env, i32, i32) | ||
92 | DEF_HELPER_3(vfrecip_d, void, env, i32, i32) | ||
93 | DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32) | ||
94 | DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32) | ||
95 | + | ||
96 | +DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32) | ||
97 | +DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32) | ||
98 | +DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32) | ||
99 | +DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32) | ||
100 | +DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32) | ||
101 | +DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32) | ||
102 | + | ||
103 | +DEF_HELPER_3(vfrintrne_s, void, env, i32, i32) | ||
104 | +DEF_HELPER_3(vfrintrne_d, void, env, i32, i32) | ||
105 | +DEF_HELPER_3(vfrintrz_s, void, env, i32, i32) | ||
106 | +DEF_HELPER_3(vfrintrz_d, void, env, i32, i32) | ||
107 | +DEF_HELPER_3(vfrintrp_s, void, env, i32, i32) | ||
108 | +DEF_HELPER_3(vfrintrp_d, void, env, i32, i32) | ||
109 | +DEF_HELPER_3(vfrintrm_s, void, env, i32, i32) | ||
110 | +DEF_HELPER_3(vfrintrm_d, void, env, i32, i32) | ||
111 | +DEF_HELPER_3(vfrint_s, void, env, i32, i32) | ||
112 | +DEF_HELPER_3(vfrint_d, void, env, i32, i32) | ||
113 | + | ||
114 | +DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32) | ||
115 | +DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32) | ||
116 | +DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32) | ||
117 | +DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32) | ||
118 | +DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32) | ||
119 | +DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32) | ||
120 | +DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32) | ||
121 | +DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32) | ||
122 | +DEF_HELPER_3(vftint_w_s, void, env, i32, i32) | ||
123 | +DEF_HELPER_3(vftint_l_d, void, env, i32, i32) | ||
124 | +DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32) | ||
125 | +DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32) | ||
126 | +DEF_HELPER_3(vftint_wu_s, void, env, i32, i32) | ||
127 | +DEF_HELPER_3(vftint_lu_d, void, env, i32, i32) | ||
128 | +DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32) | ||
129 | +DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32) | ||
130 | +DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32) | ||
131 | +DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32) | ||
132 | +DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32) | ||
133 | +DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32) | ||
134 | +DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32) | ||
135 | +DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32) | ||
136 | +DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32) | ||
137 | +DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32) | ||
138 | +DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32) | ||
139 | +DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32) | ||
140 | +DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32) | ||
141 | +DEF_HELPER_3(vftintl_l_s, void, env, i32, i32) | ||
142 | +DEF_HELPER_3(vftinth_l_s, void, env, i32, i32) | ||
143 | + | ||
144 | +DEF_HELPER_3(vffint_s_w, void, env, i32, i32) | ||
145 | +DEF_HELPER_3(vffint_d_l, void, env, i32, i32) | ||
146 | +DEF_HELPER_3(vffint_s_wu, void, env, i32, i32) | ||
147 | +DEF_HELPER_3(vffint_d_lu, void, env, i32, i32) | ||
148 | +DEF_HELPER_3(vffintl_d_w, void, env, i32, i32) | ||
149 | +DEF_HELPER_3(vffinth_d_w, void, env, i32, i32) | ||
150 | +DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32) | ||
151 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
154 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
155 | @@ -XXX,XX +XXX,XX @@ TRANS(vfrecip_s, gen_vv, gen_helper_vfrecip_s) | ||
156 | TRANS(vfrecip_d, gen_vv, gen_helper_vfrecip_d) | ||
157 | TRANS(vfrsqrt_s, gen_vv, gen_helper_vfrsqrt_s) | ||
158 | TRANS(vfrsqrt_d, gen_vv, gen_helper_vfrsqrt_d) | ||
159 | + | ||
160 | +TRANS(vfcvtl_s_h, gen_vv, gen_helper_vfcvtl_s_h) | ||
161 | +TRANS(vfcvth_s_h, gen_vv, gen_helper_vfcvth_s_h) | ||
162 | +TRANS(vfcvtl_d_s, gen_vv, gen_helper_vfcvtl_d_s) | ||
163 | +TRANS(vfcvth_d_s, gen_vv, gen_helper_vfcvth_d_s) | ||
164 | +TRANS(vfcvt_h_s, gen_vvv, gen_helper_vfcvt_h_s) | ||
165 | +TRANS(vfcvt_s_d, gen_vvv, gen_helper_vfcvt_s_d) | ||
166 | + | ||
167 | +TRANS(vfrintrne_s, gen_vv, gen_helper_vfrintrne_s) | ||
168 | +TRANS(vfrintrne_d, gen_vv, gen_helper_vfrintrne_d) | ||
169 | +TRANS(vfrintrz_s, gen_vv, gen_helper_vfrintrz_s) | ||
170 | +TRANS(vfrintrz_d, gen_vv, gen_helper_vfrintrz_d) | ||
171 | +TRANS(vfrintrp_s, gen_vv, gen_helper_vfrintrp_s) | ||
172 | +TRANS(vfrintrp_d, gen_vv, gen_helper_vfrintrp_d) | ||
173 | +TRANS(vfrintrm_s, gen_vv, gen_helper_vfrintrm_s) | ||
174 | +TRANS(vfrintrm_d, gen_vv, gen_helper_vfrintrm_d) | ||
175 | +TRANS(vfrint_s, gen_vv, gen_helper_vfrint_s) | ||
176 | +TRANS(vfrint_d, gen_vv, gen_helper_vfrint_d) | ||
177 | + | ||
178 | +TRANS(vftintrne_w_s, gen_vv, gen_helper_vftintrne_w_s) | ||
179 | +TRANS(vftintrne_l_d, gen_vv, gen_helper_vftintrne_l_d) | ||
180 | +TRANS(vftintrz_w_s, gen_vv, gen_helper_vftintrz_w_s) | ||
181 | +TRANS(vftintrz_l_d, gen_vv, gen_helper_vftintrz_l_d) | ||
182 | +TRANS(vftintrp_w_s, gen_vv, gen_helper_vftintrp_w_s) | ||
183 | +TRANS(vftintrp_l_d, gen_vv, gen_helper_vftintrp_l_d) | ||
184 | +TRANS(vftintrm_w_s, gen_vv, gen_helper_vftintrm_w_s) | ||
185 | +TRANS(vftintrm_l_d, gen_vv, gen_helper_vftintrm_l_d) | ||
186 | +TRANS(vftint_w_s, gen_vv, gen_helper_vftint_w_s) | ||
187 | +TRANS(vftint_l_d, gen_vv, gen_helper_vftint_l_d) | ||
188 | +TRANS(vftintrz_wu_s, gen_vv, gen_helper_vftintrz_wu_s) | ||
189 | +TRANS(vftintrz_lu_d, gen_vv, gen_helper_vftintrz_lu_d) | ||
190 | +TRANS(vftint_wu_s, gen_vv, gen_helper_vftint_wu_s) | ||
191 | +TRANS(vftint_lu_d, gen_vv, gen_helper_vftint_lu_d) | ||
192 | +TRANS(vftintrne_w_d, gen_vvv, gen_helper_vftintrne_w_d) | ||
193 | +TRANS(vftintrz_w_d, gen_vvv, gen_helper_vftintrz_w_d) | ||
194 | +TRANS(vftintrp_w_d, gen_vvv, gen_helper_vftintrp_w_d) | ||
195 | +TRANS(vftintrm_w_d, gen_vvv, gen_helper_vftintrm_w_d) | ||
196 | +TRANS(vftint_w_d, gen_vvv, gen_helper_vftint_w_d) | ||
197 | +TRANS(vftintrnel_l_s, gen_vv, gen_helper_vftintrnel_l_s) | ||
198 | +TRANS(vftintrneh_l_s, gen_vv, gen_helper_vftintrneh_l_s) | ||
199 | +TRANS(vftintrzl_l_s, gen_vv, gen_helper_vftintrzl_l_s) | ||
200 | +TRANS(vftintrzh_l_s, gen_vv, gen_helper_vftintrzh_l_s) | ||
201 | +TRANS(vftintrpl_l_s, gen_vv, gen_helper_vftintrpl_l_s) | ||
202 | +TRANS(vftintrph_l_s, gen_vv, gen_helper_vftintrph_l_s) | ||
203 | +TRANS(vftintrml_l_s, gen_vv, gen_helper_vftintrml_l_s) | ||
204 | +TRANS(vftintrmh_l_s, gen_vv, gen_helper_vftintrmh_l_s) | ||
205 | +TRANS(vftintl_l_s, gen_vv, gen_helper_vftintl_l_s) | ||
206 | +TRANS(vftinth_l_s, gen_vv, gen_helper_vftinth_l_s) | ||
207 | + | ||
208 | +TRANS(vffint_s_w, gen_vv, gen_helper_vffint_s_w) | ||
209 | +TRANS(vffint_d_l, gen_vv, gen_helper_vffint_d_l) | ||
210 | +TRANS(vffint_s_wu, gen_vv, gen_helper_vffint_s_wu) | ||
211 | +TRANS(vffint_d_lu, gen_vv, gen_helper_vffint_d_lu) | ||
212 | +TRANS(vffintl_d_w, gen_vv, gen_helper_vffintl_d_w) | ||
213 | +TRANS(vffinth_d_w, gen_vv, gen_helper_vffinth_d_w) | ||
214 | +TRANS(vffint_s_l, gen_vvv, gen_helper_vffint_s_l) | ||
215 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
216 | index XXXXXXX..XXXXXXX 100644 | ||
217 | --- a/target/loongarch/insns.decode | ||
218 | +++ b/target/loongarch/insns.decode | ||
219 | @@ -XXX,XX +XXX,XX @@ vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv | ||
220 | vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv | ||
221 | vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv | ||
222 | vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv | ||
223 | + | ||
224 | +vfcvtl_s_h 0111 00101001 11011 11010 ..... ..... @vv | ||
225 | +vfcvth_s_h 0111 00101001 11011 11011 ..... ..... @vv | ||
226 | +vfcvtl_d_s 0111 00101001 11011 11100 ..... ..... @vv | ||
227 | +vfcvth_d_s 0111 00101001 11011 11101 ..... ..... @vv | ||
228 | +vfcvt_h_s 0111 00010100 01100 ..... ..... ..... @vvv | ||
229 | +vfcvt_s_d 0111 00010100 01101 ..... ..... ..... @vvv | ||
230 | + | ||
231 | +vfrint_s 0111 00101001 11010 01101 ..... ..... @vv | ||
232 | +vfrint_d 0111 00101001 11010 01110 ..... ..... @vv | ||
233 | +vfrintrm_s 0111 00101001 11010 10001 ..... ..... @vv | ||
234 | +vfrintrm_d 0111 00101001 11010 10010 ..... ..... @vv | ||
235 | +vfrintrp_s 0111 00101001 11010 10101 ..... ..... @vv | ||
236 | +vfrintrp_d 0111 00101001 11010 10110 ..... ..... @vv | ||
237 | +vfrintrz_s 0111 00101001 11010 11001 ..... ..... @vv | ||
238 | +vfrintrz_d 0111 00101001 11010 11010 ..... ..... @vv | ||
239 | +vfrintrne_s 0111 00101001 11010 11101 ..... ..... @vv | ||
240 | +vfrintrne_d 0111 00101001 11010 11110 ..... ..... @vv | ||
241 | + | ||
242 | +vftint_w_s 0111 00101001 11100 01100 ..... ..... @vv | ||
243 | +vftint_l_d 0111 00101001 11100 01101 ..... ..... @vv | ||
244 | +vftintrm_w_s 0111 00101001 11100 01110 ..... ..... @vv | ||
245 | +vftintrm_l_d 0111 00101001 11100 01111 ..... ..... @vv | ||
246 | +vftintrp_w_s 0111 00101001 11100 10000 ..... ..... @vv | ||
247 | +vftintrp_l_d 0111 00101001 11100 10001 ..... ..... @vv | ||
248 | +vftintrz_w_s 0111 00101001 11100 10010 ..... ..... @vv | ||
249 | +vftintrz_l_d 0111 00101001 11100 10011 ..... ..... @vv | ||
250 | +vftintrne_w_s 0111 00101001 11100 10100 ..... ..... @vv | ||
251 | +vftintrne_l_d 0111 00101001 11100 10101 ..... ..... @vv | ||
252 | +vftint_wu_s 0111 00101001 11100 10110 ..... ..... @vv | ||
253 | +vftint_lu_d 0111 00101001 11100 10111 ..... ..... @vv | ||
254 | +vftintrz_wu_s 0111 00101001 11100 11100 ..... ..... @vv | ||
255 | +vftintrz_lu_d 0111 00101001 11100 11101 ..... ..... @vv | ||
256 | +vftint_w_d 0111 00010100 10011 ..... ..... ..... @vvv | ||
257 | +vftintrm_w_d 0111 00010100 10100 ..... ..... ..... @vvv | ||
258 | +vftintrp_w_d 0111 00010100 10101 ..... ..... ..... @vvv | ||
259 | +vftintrz_w_d 0111 00010100 10110 ..... ..... ..... @vvv | ||
260 | +vftintrne_w_d 0111 00010100 10111 ..... ..... ..... @vvv | ||
261 | +vftintl_l_s 0111 00101001 11101 00000 ..... ..... @vv | ||
262 | +vftinth_l_s 0111 00101001 11101 00001 ..... ..... @vv | ||
263 | +vftintrml_l_s 0111 00101001 11101 00010 ..... ..... @vv | ||
264 | +vftintrmh_l_s 0111 00101001 11101 00011 ..... ..... @vv | ||
265 | +vftintrpl_l_s 0111 00101001 11101 00100 ..... ..... @vv | ||
266 | +vftintrph_l_s 0111 00101001 11101 00101 ..... ..... @vv | ||
267 | +vftintrzl_l_s 0111 00101001 11101 00110 ..... ..... @vv | ||
268 | +vftintrzh_l_s 0111 00101001 11101 00111 ..... ..... @vv | ||
269 | +vftintrnel_l_s 0111 00101001 11101 01000 ..... ..... @vv | ||
270 | +vftintrneh_l_s 0111 00101001 11101 01001 ..... ..... @vv | ||
271 | + | ||
272 | +vffint_s_w 0111 00101001 11100 00000 ..... ..... @vv | ||
273 | +vffint_s_wu 0111 00101001 11100 00001 ..... ..... @vv | ||
274 | +vffint_d_l 0111 00101001 11100 00010 ..... ..... @vv | ||
275 | +vffint_d_lu 0111 00101001 11100 00011 ..... ..... @vv | ||
276 | +vffintl_d_w 0111 00101001 11100 00100 ..... ..... @vv | ||
277 | +vffinth_d_w 0111 00101001 11100 00101 ..... ..... @vv | ||
278 | +vffint_s_l 0111 00010100 10000 ..... ..... ..... @vvv | ||
279 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
280 | index XXXXXXX..XXXXXXX 100644 | ||
281 | --- a/target/loongarch/lsx_helper.c | ||
282 | +++ b/target/loongarch/lsx_helper.c | ||
283 | @@ -XXX,XX +XXX,XX @@ DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) | ||
284 | DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) | ||
285 | DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) | ||
286 | DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) | ||
287 | + | ||
288 | +static uint32_t float16_cvt_float32(uint16_t h, float_status *status) | ||
289 | +{ | ||
290 | + return float16_to_float32(h, true, status); | ||
291 | +} | ||
292 | +static uint64_t float32_cvt_float64(uint32_t s, float_status *status) | ||
293 | +{ | ||
294 | + return float32_to_float64(s, status); | ||
295 | +} | ||
296 | + | ||
297 | +static uint16_t float32_cvt_float16(uint32_t s, float_status *status) | ||
298 | +{ | ||
299 | + return float32_to_float16(s, true, status); | ||
300 | +} | ||
301 | +static uint32_t float64_cvt_float32(uint64_t d, float_status *status) | ||
302 | +{ | ||
303 | + return float64_to_float32(d, status); | ||
304 | +} | ||
305 | + | ||
306 | +void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
307 | +{ | ||
308 | + int i; | ||
309 | + VReg temp; | ||
310 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
311 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
312 | + | ||
313 | + vec_clear_cause(env); | ||
314 | + for (i = 0; i < LSX_LEN/32; i++) { | ||
315 | + temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status); | ||
316 | + vec_update_fcsr0(env, GETPC()); | ||
317 | + } | ||
318 | + *Vd = temp; | ||
319 | +} | ||
320 | + | ||
321 | +void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
322 | +{ | ||
323 | + int i; | ||
324 | + VReg temp; | ||
325 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
326 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
327 | + | ||
328 | + vec_clear_cause(env); | ||
329 | + for (i = 0; i < LSX_LEN/64; i++) { | ||
330 | + temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status); | ||
331 | + vec_update_fcsr0(env, GETPC()); | ||
332 | + } | ||
333 | + *Vd = temp; | ||
334 | +} | ||
335 | + | ||
336 | +void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
337 | +{ | ||
338 | + int i; | ||
339 | + VReg temp; | ||
340 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
341 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
342 | + | ||
343 | + vec_clear_cause(env); | ||
344 | + for (i = 0; i < LSX_LEN/32; i++) { | ||
345 | + temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status); | ||
346 | + vec_update_fcsr0(env, GETPC()); | ||
347 | + } | ||
348 | + *Vd = temp; | ||
349 | +} | ||
350 | + | ||
351 | +void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
352 | +{ | ||
353 | + int i; | ||
354 | + VReg temp; | ||
355 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
356 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
357 | + | ||
358 | + vec_clear_cause(env); | ||
359 | + for (i = 0; i < LSX_LEN/64; i++) { | ||
360 | + temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status); | ||
361 | + vec_update_fcsr0(env, GETPC()); | ||
362 | + } | ||
363 | + *Vd = temp; | ||
364 | +} | ||
365 | + | ||
366 | +void HELPER(vfcvt_h_s)(CPULoongArchState *env, | ||
367 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
368 | +{ | ||
369 | + int i; | ||
370 | + VReg temp; | ||
371 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
372 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
373 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
374 | + | ||
375 | + vec_clear_cause(env); | ||
376 | + for(i = 0; i < LSX_LEN/32; i++) { | ||
377 | + temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status); | ||
378 | + temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status); | ||
379 | + vec_update_fcsr0(env, GETPC()); | ||
380 | + } | ||
381 | + *Vd = temp; | ||
382 | +} | ||
383 | + | ||
384 | +void HELPER(vfcvt_s_d)(CPULoongArchState *env, | ||
385 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
386 | +{ | ||
387 | + int i; | ||
388 | + VReg temp; | ||
389 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
390 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
391 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
392 | + | ||
393 | + vec_clear_cause(env); | ||
394 | + for(i = 0; i < LSX_LEN/64; i++) { | ||
395 | + temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status); | ||
396 | + temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status); | ||
397 | + vec_update_fcsr0(env, GETPC()); | ||
398 | + } | ||
399 | + *Vd = temp; | ||
400 | +} | ||
401 | + | ||
402 | +void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
403 | +{ | ||
404 | + int i; | ||
405 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
406 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
407 | + | ||
408 | + vec_clear_cause(env); | ||
409 | + for (i = 0; i < 4; i++) { | ||
410 | + Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); | ||
411 | + vec_update_fcsr0(env, GETPC()); | ||
412 | + } | ||
413 | +} | ||
414 | + | ||
415 | +void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
416 | +{ | ||
417 | + int i; | ||
418 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
419 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
420 | + | ||
421 | + vec_clear_cause(env); | ||
422 | + for (i = 0; i < 2; i++) { | ||
423 | + Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); | ||
424 | + vec_update_fcsr0(env, GETPC()); | ||
425 | + } | ||
426 | +} | ||
427 | + | ||
428 | +#define FCVT_2OP(NAME, BIT, E, MODE) \ | ||
429 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
430 | +{ \ | ||
431 | + int i; \ | ||
432 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
433 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
434 | + \ | ||
435 | + vec_clear_cause(env); \ | ||
436 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
437 | + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ | ||
438 | + set_float_rounding_mode(MODE, &env->fp_status); \ | ||
439 | + Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ | ||
440 | + set_float_rounding_mode(old_mode, &env->fp_status); \ | ||
441 | + vec_update_fcsr0(env, GETPC()); \ | ||
442 | + } \ | ||
443 | +} | ||
444 | + | ||
445 | +FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) | ||
446 | +FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) | ||
447 | +FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) | ||
448 | +FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) | ||
449 | +FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) | ||
450 | +FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) | ||
451 | +FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) | ||
452 | +FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) | ||
453 | + | ||
454 | +#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ | ||
455 | +static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ | ||
456 | +{ \ | ||
457 | + T2 fd; \ | ||
458 | + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ | ||
459 | + \ | ||
460 | + set_float_rounding_mode(MODE, &env->fp_status); \ | ||
461 | + fd = do_## FMT1 ##_to_## FMT2(env, fj); \ | ||
462 | + set_float_rounding_mode(old_mode, &env->fp_status); \ | ||
463 | + return fd; \ | ||
464 | +} | ||
465 | + | ||
466 | +#define DO_FTINT(FMT1, FMT2, T1, T2) \ | ||
467 | +static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ | ||
468 | +{ \ | ||
469 | + T2 fd; \ | ||
470 | + \ | ||
471 | + fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ | ||
472 | + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ | ||
473 | + if (FMT1 ##_is_any_nan(fj)) { \ | ||
474 | + fd = 0; \ | ||
475 | + } \ | ||
476 | + } \ | ||
477 | + vec_update_fcsr0(env, GETPC()); \ | ||
478 | + return fd; \ | ||
479 | +} | ||
480 | + | ||
481 | +DO_FTINT(float32, int32, uint32_t, uint32_t) | ||
482 | +DO_FTINT(float64, int64, uint64_t, uint64_t) | ||
483 | +DO_FTINT(float32, uint32, uint32_t, uint32_t) | ||
484 | +DO_FTINT(float64, uint64, uint64_t, uint64_t) | ||
485 | +DO_FTINT(float64, int32, uint64_t, uint32_t) | ||
486 | +DO_FTINT(float32, int64, uint32_t, uint64_t) | ||
487 | + | ||
488 | +FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) | ||
489 | +FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) | ||
490 | +FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) | ||
491 | +FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) | ||
492 | +FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) | ||
493 | +FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) | ||
494 | +FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) | ||
495 | +FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) | ||
496 | + | ||
497 | +DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) | ||
498 | +DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) | ||
499 | +DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) | ||
500 | +DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) | ||
501 | +DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) | ||
502 | +DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) | ||
503 | +DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) | ||
504 | +DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) | ||
505 | +DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) | ||
506 | +DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) | ||
507 | + | ||
508 | +FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) | ||
509 | +FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) | ||
510 | + | ||
511 | +DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) | ||
512 | +DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) | ||
513 | +DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) | ||
514 | +DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) | ||
515 | + | ||
516 | +FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) | ||
517 | +FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) | ||
518 | +FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) | ||
519 | +FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) | ||
520 | + | ||
521 | +#define FTINT_W_D(NAME, FN) \ | ||
522 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
523 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
524 | +{ \ | ||
525 | + int i; \ | ||
526 | + VReg temp; \ | ||
527 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
528 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
529 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
530 | + \ | ||
531 | + vec_clear_cause(env); \ | ||
532 | + for (i = 0; i < 2; i++) { \ | ||
533 | + temp.W(i + 2) = FN(env, Vj->UD(i)); \ | ||
534 | + temp.W(i) = FN(env, Vk->UD(i)); \ | ||
535 | + } \ | ||
536 | + *Vd = temp; \ | ||
537 | +} | ||
538 | + | ||
539 | +FTINT_W_D(vftint_w_d, do_float64_to_int32) | ||
540 | +FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) | ||
541 | +FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) | ||
542 | +FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) | ||
543 | +FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) | ||
544 | + | ||
545 | +FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) | ||
546 | +FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | ||
547 | +FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | ||
548 | +FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | ||
549 | +FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) | ||
550 | +FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) | ||
551 | +FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) | ||
552 | +FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) | ||
553 | + | ||
554 | +#define FTINTL_L_S(NAME, FN) \ | ||
555 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
556 | +{ \ | ||
557 | + int i; \ | ||
558 | + VReg temp; \ | ||
559 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
560 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
561 | + \ | ||
562 | + vec_clear_cause(env); \ | ||
563 | + for (i = 0; i < 2; i++) { \ | ||
564 | + temp.D(i) = FN(env, Vj->UW(i)); \ | ||
565 | + } \ | ||
566 | + *Vd = temp; \ | ||
567 | +} | ||
568 | + | ||
569 | +FTINTL_L_S(vftintl_l_s, do_float32_to_int64) | ||
570 | +FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) | ||
571 | +FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) | ||
572 | +FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) | ||
573 | +FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) | ||
574 | + | ||
575 | +#define FTINTH_L_S(NAME, FN) \ | ||
576 | +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ | ||
577 | +{ \ | ||
578 | + int i; \ | ||
579 | + VReg temp; \ | ||
580 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
581 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
582 | + \ | ||
583 | + vec_clear_cause(env); \ | ||
584 | + for (i = 0; i < 2; i++) { \ | ||
585 | + temp.D(i) = FN(env, Vj->UW(i + 2)); \ | ||
586 | + } \ | ||
587 | + *Vd = temp; \ | ||
588 | +} | ||
589 | + | ||
590 | +FTINTH_L_S(vftinth_l_s, do_float32_to_int64) | ||
591 | +FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) | ||
592 | +FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) | ||
593 | +FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) | ||
594 | +FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) | ||
595 | + | ||
596 | +#define FFINT(NAME, FMT1, FMT2, T1, T2) \ | ||
597 | +static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ | ||
598 | +{ \ | ||
599 | + T2 fd; \ | ||
600 | + \ | ||
601 | + fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ | ||
602 | + vec_update_fcsr0(env, GETPC()); \ | ||
603 | + return fd; \ | ||
604 | +} | ||
605 | + | ||
606 | +FFINT(s_w, int32, float32, int32_t, uint32_t) | ||
607 | +FFINT(d_l, int64, float64, int64_t, uint64_t) | ||
608 | +FFINT(s_wu, uint32, float32, uint32_t, uint32_t) | ||
609 | +FFINT(d_lu, uint64, float64, uint64_t, uint64_t) | ||
610 | + | ||
611 | +DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) | ||
612 | +DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) | ||
613 | +DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) | ||
614 | +DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) | ||
615 | + | ||
616 | +void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
617 | +{ | ||
618 | + int i; | ||
619 | + VReg temp; | ||
620 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
621 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
622 | + | ||
623 | + vec_clear_cause(env); | ||
624 | + for (i = 0; i < 2; i++) { | ||
625 | + temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status); | ||
626 | + vec_update_fcsr0(env, GETPC()); | ||
627 | + } | ||
628 | + *Vd = temp; | ||
629 | +} | ||
630 | + | ||
631 | +void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) | ||
632 | +{ | ||
633 | + int i; | ||
634 | + VReg temp; | ||
635 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
636 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
637 | + | ||
638 | + vec_clear_cause(env); | ||
639 | + for (i = 0; i < 2; i++) { | ||
640 | + temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status); | ||
641 | + vec_update_fcsr0(env, GETPC()); | ||
642 | + } | ||
643 | + *Vd = temp; | ||
644 | +} | ||
645 | + | ||
646 | +void HELPER(vffint_s_l)(CPULoongArchState *env, | ||
647 | + uint32_t vd, uint32_t vj, uint32_t vk) | ||
648 | +{ | ||
649 | + int i; | ||
650 | + VReg temp; | ||
651 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
652 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
653 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
654 | + | ||
655 | + vec_clear_cause(env); | ||
656 | + for (i = 0; i < 2; i++) { | ||
657 | + temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status); | ||
658 | + temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status); | ||
659 | + vec_update_fcsr0(env, GETPC()); | ||
660 | + } | ||
661 | + *Vd = temp; | ||
662 | +} | ||
663 | -- | ||
664 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VSEQ[I].{B/H/W/D}; | ||
3 | - VSLE[I].{B/H/W/D}[U]; | ||
4 | - VSLT[I].{B/H/W/D/}[U]. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-36-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 43 +++++ | ||
11 | target/loongarch/helper.h | 23 +++ | ||
12 | target/loongarch/insn_trans/trans_lsx.c.inc | 185 ++++++++++++++++++++ | ||
13 | target/loongarch/insns.decode | 43 +++++ | ||
14 | target/loongarch/lsx_helper.c | 38 ++++ | ||
15 | 5 files changed, 332 insertions(+) | ||
16 | |||
17 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/disas.c | ||
20 | +++ b/target/loongarch/disas.c | ||
21 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vffint_d_lu, vv) | ||
22 | INSN_LSX(vffintl_d_w, vv) | ||
23 | INSN_LSX(vffinth_d_w, vv) | ||
24 | INSN_LSX(vffint_s_l, vvv) | ||
25 | + | ||
26 | +INSN_LSX(vseq_b, vvv) | ||
27 | +INSN_LSX(vseq_h, vvv) | ||
28 | +INSN_LSX(vseq_w, vvv) | ||
29 | +INSN_LSX(vseq_d, vvv) | ||
30 | +INSN_LSX(vseqi_b, vv_i) | ||
31 | +INSN_LSX(vseqi_h, vv_i) | ||
32 | +INSN_LSX(vseqi_w, vv_i) | ||
33 | +INSN_LSX(vseqi_d, vv_i) | ||
34 | + | ||
35 | +INSN_LSX(vsle_b, vvv) | ||
36 | +INSN_LSX(vsle_h, vvv) | ||
37 | +INSN_LSX(vsle_w, vvv) | ||
38 | +INSN_LSX(vsle_d, vvv) | ||
39 | +INSN_LSX(vslei_b, vv_i) | ||
40 | +INSN_LSX(vslei_h, vv_i) | ||
41 | +INSN_LSX(vslei_w, vv_i) | ||
42 | +INSN_LSX(vslei_d, vv_i) | ||
43 | +INSN_LSX(vsle_bu, vvv) | ||
44 | +INSN_LSX(vsle_hu, vvv) | ||
45 | +INSN_LSX(vsle_wu, vvv) | ||
46 | +INSN_LSX(vsle_du, vvv) | ||
47 | +INSN_LSX(vslei_bu, vv_i) | ||
48 | +INSN_LSX(vslei_hu, vv_i) | ||
49 | +INSN_LSX(vslei_wu, vv_i) | ||
50 | +INSN_LSX(vslei_du, vv_i) | ||
51 | + | ||
52 | +INSN_LSX(vslt_b, vvv) | ||
53 | +INSN_LSX(vslt_h, vvv) | ||
54 | +INSN_LSX(vslt_w, vvv) | ||
55 | +INSN_LSX(vslt_d, vvv) | ||
56 | +INSN_LSX(vslti_b, vv_i) | ||
57 | +INSN_LSX(vslti_h, vv_i) | ||
58 | +INSN_LSX(vslti_w, vv_i) | ||
59 | +INSN_LSX(vslti_d, vv_i) | ||
60 | +INSN_LSX(vslt_bu, vvv) | ||
61 | +INSN_LSX(vslt_hu, vvv) | ||
62 | +INSN_LSX(vslt_wu, vvv) | ||
63 | +INSN_LSX(vslt_du, vvv) | ||
64 | +INSN_LSX(vslti_bu, vv_i) | ||
65 | +INSN_LSX(vslti_hu, vv_i) | ||
66 | +INSN_LSX(vslti_wu, vv_i) | ||
67 | +INSN_LSX(vslti_du, vv_i) | ||
68 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/loongarch/helper.h | ||
71 | +++ b/target/loongarch/helper.h | ||
72 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vffint_d_lu, void, env, i32, i32) | ||
73 | DEF_HELPER_3(vffintl_d_w, void, env, i32, i32) | ||
74 | DEF_HELPER_3(vffinth_d_w, void, env, i32, i32) | ||
75 | DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32) | ||
76 | + | ||
77 | +DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
78 | +DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
79 | +DEF_HELPER_FLAGS_4(vseqi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
80 | +DEF_HELPER_FLAGS_4(vseqi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
81 | + | ||
82 | +DEF_HELPER_FLAGS_4(vslei_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
83 | +DEF_HELPER_FLAGS_4(vslei_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
84 | +DEF_HELPER_FLAGS_4(vslei_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
85 | +DEF_HELPER_FLAGS_4(vslei_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
86 | +DEF_HELPER_FLAGS_4(vslei_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
87 | +DEF_HELPER_FLAGS_4(vslei_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
88 | +DEF_HELPER_FLAGS_4(vslei_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
89 | +DEF_HELPER_FLAGS_4(vslei_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
90 | + | ||
91 | +DEF_HELPER_FLAGS_4(vslti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
92 | +DEF_HELPER_FLAGS_4(vslti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
93 | +DEF_HELPER_FLAGS_4(vslti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
94 | +DEF_HELPER_FLAGS_4(vslti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
95 | +DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
96 | +DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
97 | +DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
98 | +DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
99 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
100 | index XXXXXXX..XXXXXXX 100644 | ||
101 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
102 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
103 | @@ -XXX,XX +XXX,XX @@ TRANS(vffint_d_lu, gen_vv, gen_helper_vffint_d_lu) | ||
104 | TRANS(vffintl_d_w, gen_vv, gen_helper_vffintl_d_w) | ||
105 | TRANS(vffinth_d_w, gen_vv, gen_helper_vffinth_d_w) | ||
106 | TRANS(vffint_s_l, gen_vvv, gen_helper_vffint_s_l) | ||
107 | + | ||
108 | +static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond) | ||
109 | +{ | ||
110 | + uint32_t vd_ofs, vj_ofs, vk_ofs; | ||
111 | + | ||
112 | + CHECK_SXE; | ||
113 | + | ||
114 | + vd_ofs = vec_full_offset(a->vd); | ||
115 | + vj_ofs = vec_full_offset(a->vj); | ||
116 | + vk_ofs = vec_full_offset(a->vk); | ||
117 | + | ||
118 | + tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8); | ||
119 | + return true; | ||
120 | +} | ||
121 | + | ||
122 | +static void do_cmpi_vec(TCGCond cond, | ||
123 | + unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
124 | +{ | ||
125 | + tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm)); | ||
126 | +} | ||
127 | + | ||
128 | +static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
129 | +{ | ||
130 | + do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm); | ||
131 | +} | ||
132 | + | ||
133 | +static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
134 | +{ | ||
135 | + do_cmpi_vec(TCG_COND_LE, vece, t, a, imm); | ||
136 | +} | ||
137 | + | ||
138 | +static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
139 | +{ | ||
140 | + do_cmpi_vec(TCG_COND_LT, vece, t, a, imm); | ||
141 | +} | ||
142 | + | ||
143 | +static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
144 | +{ | ||
145 | + do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm); | ||
146 | +} | ||
147 | + | ||
148 | +static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) | ||
149 | +{ | ||
150 | + do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm); | ||
151 | +} | ||
152 | + | ||
153 | +#define DO_CMPI_S(NAME) \ | ||
154 | +static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
155 | +{ \ | ||
156 | + uint32_t vd_ofs, vj_ofs; \ | ||
157 | + \ | ||
158 | + CHECK_SXE; \ | ||
159 | + \ | ||
160 | + static const TCGOpcode vecop_list[] = { \ | ||
161 | + INDEX_op_cmp_vec, 0 \ | ||
162 | + }; \ | ||
163 | + static const GVecGen2i op[4] = { \ | ||
164 | + { \ | ||
165 | + .fniv = gen_## NAME ##_s_vec, \ | ||
166 | + .fnoi = gen_helper_## NAME ##_b, \ | ||
167 | + .opt_opc = vecop_list, \ | ||
168 | + .vece = MO_8 \ | ||
169 | + }, \ | ||
170 | + { \ | ||
171 | + .fniv = gen_## NAME ##_s_vec, \ | ||
172 | + .fnoi = gen_helper_## NAME ##_h, \ | ||
173 | + .opt_opc = vecop_list, \ | ||
174 | + .vece = MO_16 \ | ||
175 | + }, \ | ||
176 | + { \ | ||
177 | + .fniv = gen_## NAME ##_s_vec, \ | ||
178 | + .fnoi = gen_helper_## NAME ##_w, \ | ||
179 | + .opt_opc = vecop_list, \ | ||
180 | + .vece = MO_32 \ | ||
181 | + }, \ | ||
182 | + { \ | ||
183 | + .fniv = gen_## NAME ##_s_vec, \ | ||
184 | + .fnoi = gen_helper_## NAME ##_d, \ | ||
185 | + .opt_opc = vecop_list, \ | ||
186 | + .vece = MO_64 \ | ||
187 | + } \ | ||
188 | + }; \ | ||
189 | + \ | ||
190 | + vd_ofs = vec_full_offset(a->vd); \ | ||
191 | + vj_ofs = vec_full_offset(a->vj); \ | ||
192 | + \ | ||
193 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \ | ||
194 | + \ | ||
195 | + return true; \ | ||
196 | +} | ||
197 | + | ||
198 | +DO_CMPI_S(vseqi) | ||
199 | +DO_CMPI_S(vslei) | ||
200 | +DO_CMPI_S(vslti) | ||
201 | + | ||
202 | +#define DO_CMPI_U(NAME) \ | ||
203 | +static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \ | ||
204 | +{ \ | ||
205 | + uint32_t vd_ofs, vj_ofs; \ | ||
206 | + \ | ||
207 | + CHECK_SXE; \ | ||
208 | + \ | ||
209 | + static const TCGOpcode vecop_list[] = { \ | ||
210 | + INDEX_op_cmp_vec, 0 \ | ||
211 | + }; \ | ||
212 | + static const GVecGen2i op[4] = { \ | ||
213 | + { \ | ||
214 | + .fniv = gen_## NAME ##_u_vec, \ | ||
215 | + .fnoi = gen_helper_## NAME ##_bu, \ | ||
216 | + .opt_opc = vecop_list, \ | ||
217 | + .vece = MO_8 \ | ||
218 | + }, \ | ||
219 | + { \ | ||
220 | + .fniv = gen_## NAME ##_u_vec, \ | ||
221 | + .fnoi = gen_helper_## NAME ##_hu, \ | ||
222 | + .opt_opc = vecop_list, \ | ||
223 | + .vece = MO_16 \ | ||
224 | + }, \ | ||
225 | + { \ | ||
226 | + .fniv = gen_## NAME ##_u_vec, \ | ||
227 | + .fnoi = gen_helper_## NAME ##_wu, \ | ||
228 | + .opt_opc = vecop_list, \ | ||
229 | + .vece = MO_32 \ | ||
230 | + }, \ | ||
231 | + { \ | ||
232 | + .fniv = gen_## NAME ##_u_vec, \ | ||
233 | + .fnoi = gen_helper_## NAME ##_du, \ | ||
234 | + .opt_opc = vecop_list, \ | ||
235 | + .vece = MO_64 \ | ||
236 | + } \ | ||
237 | + }; \ | ||
238 | + \ | ||
239 | + vd_ofs = vec_full_offset(a->vd); \ | ||
240 | + vj_ofs = vec_full_offset(a->vj); \ | ||
241 | + \ | ||
242 | + tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \ | ||
243 | + \ | ||
244 | + return true; \ | ||
245 | +} | ||
246 | + | ||
247 | +DO_CMPI_U(vslei) | ||
248 | +DO_CMPI_U(vslti) | ||
249 | + | ||
250 | +TRANS(vseq_b, do_cmp, MO_8, TCG_COND_EQ) | ||
251 | +TRANS(vseq_h, do_cmp, MO_16, TCG_COND_EQ) | ||
252 | +TRANS(vseq_w, do_cmp, MO_32, TCG_COND_EQ) | ||
253 | +TRANS(vseq_d, do_cmp, MO_64, TCG_COND_EQ) | ||
254 | +TRANS(vseqi_b, do_vseqi_s, MO_8) | ||
255 | +TRANS(vseqi_h, do_vseqi_s, MO_16) | ||
256 | +TRANS(vseqi_w, do_vseqi_s, MO_32) | ||
257 | +TRANS(vseqi_d, do_vseqi_s, MO_64) | ||
258 | + | ||
259 | +TRANS(vsle_b, do_cmp, MO_8, TCG_COND_LE) | ||
260 | +TRANS(vsle_h, do_cmp, MO_16, TCG_COND_LE) | ||
261 | +TRANS(vsle_w, do_cmp, MO_32, TCG_COND_LE) | ||
262 | +TRANS(vsle_d, do_cmp, MO_64, TCG_COND_LE) | ||
263 | +TRANS(vslei_b, do_vslei_s, MO_8) | ||
264 | +TRANS(vslei_h, do_vslei_s, MO_16) | ||
265 | +TRANS(vslei_w, do_vslei_s, MO_32) | ||
266 | +TRANS(vslei_d, do_vslei_s, MO_64) | ||
267 | +TRANS(vsle_bu, do_cmp, MO_8, TCG_COND_LEU) | ||
268 | +TRANS(vsle_hu, do_cmp, MO_16, TCG_COND_LEU) | ||
269 | +TRANS(vsle_wu, do_cmp, MO_32, TCG_COND_LEU) | ||
270 | +TRANS(vsle_du, do_cmp, MO_64, TCG_COND_LEU) | ||
271 | +TRANS(vslei_bu, do_vslei_u, MO_8) | ||
272 | +TRANS(vslei_hu, do_vslei_u, MO_16) | ||
273 | +TRANS(vslei_wu, do_vslei_u, MO_32) | ||
274 | +TRANS(vslei_du, do_vslei_u, MO_64) | ||
275 | + | ||
276 | +TRANS(vslt_b, do_cmp, MO_8, TCG_COND_LT) | ||
277 | +TRANS(vslt_h, do_cmp, MO_16, TCG_COND_LT) | ||
278 | +TRANS(vslt_w, do_cmp, MO_32, TCG_COND_LT) | ||
279 | +TRANS(vslt_d, do_cmp, MO_64, TCG_COND_LT) | ||
280 | +TRANS(vslti_b, do_vslti_s, MO_8) | ||
281 | +TRANS(vslti_h, do_vslti_s, MO_16) | ||
282 | +TRANS(vslti_w, do_vslti_s, MO_32) | ||
283 | +TRANS(vslti_d, do_vslti_s, MO_64) | ||
284 | +TRANS(vslt_bu, do_cmp, MO_8, TCG_COND_LTU) | ||
285 | +TRANS(vslt_hu, do_cmp, MO_16, TCG_COND_LTU) | ||
286 | +TRANS(vslt_wu, do_cmp, MO_32, TCG_COND_LTU) | ||
287 | +TRANS(vslt_du, do_cmp, MO_64, TCG_COND_LTU) | ||
288 | +TRANS(vslti_bu, do_vslti_u, MO_8) | ||
289 | +TRANS(vslti_hu, do_vslti_u, MO_16) | ||
290 | +TRANS(vslti_wu, do_vslti_u, MO_32) | ||
291 | +TRANS(vslti_du, do_vslti_u, MO_64) | ||
292 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/target/loongarch/insns.decode | ||
295 | +++ b/target/loongarch/insns.decode | ||
296 | @@ -XXX,XX +XXX,XX @@ vffint_d_lu 0111 00101001 11100 00011 ..... ..... @vv | ||
297 | vffintl_d_w 0111 00101001 11100 00100 ..... ..... @vv | ||
298 | vffinth_d_w 0111 00101001 11100 00101 ..... ..... @vv | ||
299 | vffint_s_l 0111 00010100 10000 ..... ..... ..... @vvv | ||
300 | + | ||
301 | +vseq_b 0111 00000000 00000 ..... ..... ..... @vvv | ||
302 | +vseq_h 0111 00000000 00001 ..... ..... ..... @vvv | ||
303 | +vseq_w 0111 00000000 00010 ..... ..... ..... @vvv | ||
304 | +vseq_d 0111 00000000 00011 ..... ..... ..... @vvv | ||
305 | +vseqi_b 0111 00101000 00000 ..... ..... ..... @vv_i5 | ||
306 | +vseqi_h 0111 00101000 00001 ..... ..... ..... @vv_i5 | ||
307 | +vseqi_w 0111 00101000 00010 ..... ..... ..... @vv_i5 | ||
308 | +vseqi_d 0111 00101000 00011 ..... ..... ..... @vv_i5 | ||
309 | + | ||
310 | +vsle_b 0111 00000000 00100 ..... ..... ..... @vvv | ||
311 | +vsle_h 0111 00000000 00101 ..... ..... ..... @vvv | ||
312 | +vsle_w 0111 00000000 00110 ..... ..... ..... @vvv | ||
313 | +vsle_d 0111 00000000 00111 ..... ..... ..... @vvv | ||
314 | +vslei_b 0111 00101000 00100 ..... ..... ..... @vv_i5 | ||
315 | +vslei_h 0111 00101000 00101 ..... ..... ..... @vv_i5 | ||
316 | +vslei_w 0111 00101000 00110 ..... ..... ..... @vv_i5 | ||
317 | +vslei_d 0111 00101000 00111 ..... ..... ..... @vv_i5 | ||
318 | +vsle_bu 0111 00000000 01000 ..... ..... ..... @vvv | ||
319 | +vsle_hu 0111 00000000 01001 ..... ..... ..... @vvv | ||
320 | +vsle_wu 0111 00000000 01010 ..... ..... ..... @vvv | ||
321 | +vsle_du 0111 00000000 01011 ..... ..... ..... @vvv | ||
322 | +vslei_bu 0111 00101000 01000 ..... ..... ..... @vv_ui5 | ||
323 | +vslei_hu 0111 00101000 01001 ..... ..... ..... @vv_ui5 | ||
324 | +vslei_wu 0111 00101000 01010 ..... ..... ..... @vv_ui5 | ||
325 | +vslei_du 0111 00101000 01011 ..... ..... ..... @vv_ui5 | ||
326 | + | ||
327 | +vslt_b 0111 00000000 01100 ..... ..... ..... @vvv | ||
328 | +vslt_h 0111 00000000 01101 ..... ..... ..... @vvv | ||
329 | +vslt_w 0111 00000000 01110 ..... ..... ..... @vvv | ||
330 | +vslt_d 0111 00000000 01111 ..... ..... ..... @vvv | ||
331 | +vslti_b 0111 00101000 01100 ..... ..... ..... @vv_i5 | ||
332 | +vslti_h 0111 00101000 01101 ..... ..... ..... @vv_i5 | ||
333 | +vslti_w 0111 00101000 01110 ..... ..... ..... @vv_i5 | ||
334 | +vslti_d 0111 00101000 01111 ..... ..... ..... @vv_i5 | ||
335 | +vslt_bu 0111 00000000 10000 ..... ..... ..... @vvv | ||
336 | +vslt_hu 0111 00000000 10001 ..... ..... ..... @vvv | ||
337 | +vslt_wu 0111 00000000 10010 ..... ..... ..... @vvv | ||
338 | +vslt_du 0111 00000000 10011 ..... ..... ..... @vvv | ||
339 | +vslti_bu 0111 00101000 10000 ..... ..... ..... @vv_ui5 | ||
340 | +vslti_hu 0111 00101000 10001 ..... ..... ..... @vv_ui5 | ||
341 | +vslti_wu 0111 00101000 10010 ..... ..... ..... @vv_ui5 | ||
342 | +vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5 | ||
343 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
344 | index XXXXXXX..XXXXXXX 100644 | ||
345 | --- a/target/loongarch/lsx_helper.c | ||
346 | +++ b/target/loongarch/lsx_helper.c | ||
347 | @@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(CPULoongArchState *env, | ||
348 | } | ||
349 | *Vd = temp; | ||
350 | } | ||
351 | + | ||
352 | +#define VSEQ(a, b) (a == b ? -1 : 0) | ||
353 | +#define VSLE(a, b) (a <= b ? -1 : 0) | ||
354 | +#define VSLT(a, b) (a < b ? -1 : 0) | ||
355 | + | ||
356 | +#define VCMPI(NAME, BIT, E, DO_OP) \ | ||
357 | +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ | ||
358 | +{ \ | ||
359 | + int i; \ | ||
360 | + VReg *Vd = (VReg *)vd; \ | ||
361 | + VReg *Vj = (VReg *)vj; \ | ||
362 | + typedef __typeof(Vd->E(0)) TD; \ | ||
363 | + \ | ||
364 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
365 | + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ | ||
366 | + } \ | ||
367 | +} | ||
368 | + | ||
369 | +VCMPI(vseqi_b, 8, B, VSEQ) | ||
370 | +VCMPI(vseqi_h, 16, H, VSEQ) | ||
371 | +VCMPI(vseqi_w, 32, W, VSEQ) | ||
372 | +VCMPI(vseqi_d, 64, D, VSEQ) | ||
373 | +VCMPI(vslei_b, 8, B, VSLE) | ||
374 | +VCMPI(vslei_h, 16, H, VSLE) | ||
375 | +VCMPI(vslei_w, 32, W, VSLE) | ||
376 | +VCMPI(vslei_d, 64, D, VSLE) | ||
377 | +VCMPI(vslei_bu, 8, UB, VSLE) | ||
378 | +VCMPI(vslei_hu, 16, UH, VSLE) | ||
379 | +VCMPI(vslei_wu, 32, UW, VSLE) | ||
380 | +VCMPI(vslei_du, 64, UD, VSLE) | ||
381 | +VCMPI(vslti_b, 8, B, VSLT) | ||
382 | +VCMPI(vslti_h, 16, H, VSLT) | ||
383 | +VCMPI(vslti_w, 32, W, VSLT) | ||
384 | +VCMPI(vslti_d, 64, D, VSLT) | ||
385 | +VCMPI(vslti_bu, 8, UB, VSLT) | ||
386 | +VCMPI(vslti_hu, 16, UH, VSLT) | ||
387 | +VCMPI(vslti_wu, 32, UW, VSLT) | ||
388 | +VCMPI(vslti_du, 64, UD, VSLT) | ||
389 | -- | ||
390 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VFCMP.cond.{S/D}. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-37-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 94 +++++++++++++++++++++ | ||
9 | target/loongarch/helper.h | 5 ++ | ||
10 | target/loongarch/insn_trans/trans_lsx.c.inc | 32 +++++++ | ||
11 | target/loongarch/insns.decode | 5 ++ | ||
12 | target/loongarch/lsx_helper.c | 54 ++++++++++++ | ||
13 | 5 files changed, 190 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vslti_bu, vv_i) | ||
20 | INSN_LSX(vslti_hu, vv_i) | ||
21 | INSN_LSX(vslti_wu, vv_i) | ||
22 | INSN_LSX(vslti_du, vv_i) | ||
23 | + | ||
24 | +#define output_vfcmp(C, PREFIX, SUFFIX) \ | ||
25 | +{ \ | ||
26 | + (C)->info->fprintf_func((C)->info->stream, "%08x %s%s\t%d, f%d, f%d", \ | ||
27 | + (C)->insn, PREFIX, SUFFIX, a->vd, \ | ||
28 | + a->vj, a->vk); \ | ||
29 | +} | ||
30 | + | ||
31 | +static bool output_vvv_fcond(DisasContext *ctx, arg_vvv_fcond * a, | ||
32 | + const char *suffix) | ||
33 | +{ | ||
34 | + bool ret = true; | ||
35 | + switch (a->fcond) { | ||
36 | + case 0x0: | ||
37 | + output_vfcmp(ctx, "vfcmp_caf_", suffix); | ||
38 | + break; | ||
39 | + case 0x1: | ||
40 | + output_vfcmp(ctx, "vfcmp_saf_", suffix); | ||
41 | + break; | ||
42 | + case 0x2: | ||
43 | + output_vfcmp(ctx, "vfcmp_clt_", suffix); | ||
44 | + break; | ||
45 | + case 0x3: | ||
46 | + output_vfcmp(ctx, "vfcmp_slt_", suffix); | ||
47 | + break; | ||
48 | + case 0x4: | ||
49 | + output_vfcmp(ctx, "vfcmp_ceq_", suffix); | ||
50 | + break; | ||
51 | + case 0x5: | ||
52 | + output_vfcmp(ctx, "vfcmp_seq_", suffix); | ||
53 | + break; | ||
54 | + case 0x6: | ||
55 | + output_vfcmp(ctx, "vfcmp_cle_", suffix); | ||
56 | + break; | ||
57 | + case 0x7: | ||
58 | + output_vfcmp(ctx, "vfcmp_sle_", suffix); | ||
59 | + break; | ||
60 | + case 0x8: | ||
61 | + output_vfcmp(ctx, "vfcmp_cun_", suffix); | ||
62 | + break; | ||
63 | + case 0x9: | ||
64 | + output_vfcmp(ctx, "vfcmp_sun_", suffix); | ||
65 | + break; | ||
66 | + case 0xA: | ||
67 | + output_vfcmp(ctx, "vfcmp_cult_", suffix); | ||
68 | + break; | ||
69 | + case 0xB: | ||
70 | + output_vfcmp(ctx, "vfcmp_sult_", suffix); | ||
71 | + break; | ||
72 | + case 0xC: | ||
73 | + output_vfcmp(ctx, "vfcmp_cueq_", suffix); | ||
74 | + break; | ||
75 | + case 0xD: | ||
76 | + output_vfcmp(ctx, "vfcmp_sueq_", suffix); | ||
77 | + break; | ||
78 | + case 0xE: | ||
79 | + output_vfcmp(ctx, "vfcmp_cule_", suffix); | ||
80 | + break; | ||
81 | + case 0xF: | ||
82 | + output_vfcmp(ctx, "vfcmp_sule_", suffix); | ||
83 | + break; | ||
84 | + case 0x10: | ||
85 | + output_vfcmp(ctx, "vfcmp_cne_", suffix); | ||
86 | + break; | ||
87 | + case 0x11: | ||
88 | + output_vfcmp(ctx, "vfcmp_sne_", suffix); | ||
89 | + break; | ||
90 | + case 0x14: | ||
91 | + output_vfcmp(ctx, "vfcmp_cor_", suffix); | ||
92 | + break; | ||
93 | + case 0x15: | ||
94 | + output_vfcmp(ctx, "vfcmp_sor_", suffix); | ||
95 | + break; | ||
96 | + case 0x18: | ||
97 | + output_vfcmp(ctx, "vfcmp_cune_", suffix); | ||
98 | + break; | ||
99 | + case 0x19: | ||
100 | + output_vfcmp(ctx, "vfcmp_sune_", suffix); | ||
101 | + break; | ||
102 | + default: | ||
103 | + ret = false; | ||
104 | + } | ||
105 | + return ret; | ||
106 | +} | ||
107 | + | ||
108 | +#define LSX_FCMP_INSN(suffix) \ | ||
109 | +static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, \ | ||
110 | + arg_vvv_fcond * a) \ | ||
111 | +{ \ | ||
112 | + return output_vvv_fcond(ctx, a, #suffix); \ | ||
113 | +} | ||
114 | + | ||
115 | +LSX_FCMP_INSN(s) | ||
116 | +LSX_FCMP_INSN(d) | ||
117 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/target/loongarch/helper.h | ||
120 | +++ b/target/loongarch/helper.h | ||
121 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
122 | DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
123 | DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
124 | DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
125 | + | ||
126 | +DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32) | ||
127 | +DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32) | ||
128 | +DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32) | ||
129 | +DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32) | ||
130 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
131 | index XXXXXXX..XXXXXXX 100644 | ||
132 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
133 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
134 | @@ -XXX,XX +XXX,XX @@ TRANS(vslti_bu, do_vslti_u, MO_8) | ||
135 | TRANS(vslti_hu, do_vslti_u, MO_16) | ||
136 | TRANS(vslti_wu, do_vslti_u, MO_32) | ||
137 | TRANS(vslti_du, do_vslti_u, MO_64) | ||
138 | + | ||
139 | +static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a) | ||
140 | +{ | ||
141 | + uint32_t flags; | ||
142 | + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
143 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
144 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
145 | + TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
146 | + | ||
147 | + CHECK_SXE; | ||
148 | + | ||
149 | + fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); | ||
150 | + flags = get_fcmp_flags(a->fcond >> 1); | ||
151 | + fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags)); | ||
152 | + | ||
153 | + return true; | ||
154 | +} | ||
155 | + | ||
156 | +static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a) | ||
157 | +{ | ||
158 | + uint32_t flags; | ||
159 | + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); | ||
160 | + TCGv_i32 vd = tcg_constant_i32(a->vd); | ||
161 | + TCGv_i32 vj = tcg_constant_i32(a->vj); | ||
162 | + TCGv_i32 vk = tcg_constant_i32(a->vk); | ||
163 | + | ||
164 | + fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); | ||
165 | + flags = get_fcmp_flags(a->fcond >> 1); | ||
166 | + fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags)); | ||
167 | + | ||
168 | + return true; | ||
169 | +} | ||
170 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
171 | index XXXXXXX..XXXXXXX 100644 | ||
172 | --- a/target/loongarch/insns.decode | ||
173 | +++ b/target/loongarch/insns.decode | ||
174 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
175 | &vvv vd vj vk | ||
176 | &vv_i vd vj imm | ||
177 | &vvvv vd vj vk va | ||
178 | +&vvv_fcond vd vj vk fcond | ||
179 | |||
180 | # | ||
181 | # LSX Formats | ||
182 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
183 | @vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i | ||
184 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
185 | @vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv | ||
186 | +@vvv_fcond .... ........ fcond:5 vk:5 vj:5 vd:5 &vvv_fcond | ||
187 | |||
188 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
189 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
190 | @@ -XXX,XX +XXX,XX @@ vslti_bu 0111 00101000 10000 ..... ..... ..... @vv_ui5 | ||
191 | vslti_hu 0111 00101000 10001 ..... ..... ..... @vv_ui5 | ||
192 | vslti_wu 0111 00101000 10010 ..... ..... ..... @vv_ui5 | ||
193 | vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5 | ||
194 | + | ||
195 | +vfcmp_cond_s 0000 11000101 ..... ..... ..... ..... @vvv_fcond | ||
196 | +vfcmp_cond_d 0000 11000110 ..... ..... ..... ..... @vvv_fcond | ||
197 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
198 | index XXXXXXX..XXXXXXX 100644 | ||
199 | --- a/target/loongarch/lsx_helper.c | ||
200 | +++ b/target/loongarch/lsx_helper.c | ||
201 | @@ -XXX,XX +XXX,XX @@ VCMPI(vslti_bu, 8, UB, VSLT) | ||
202 | VCMPI(vslti_hu, 16, UH, VSLT) | ||
203 | VCMPI(vslti_wu, 32, UW, VSLT) | ||
204 | VCMPI(vslti_du, 64, UD, VSLT) | ||
205 | + | ||
206 | +static uint64_t vfcmp_common(CPULoongArchState *env, | ||
207 | + FloatRelation cmp, uint32_t flags) | ||
208 | +{ | ||
209 | + uint64_t ret = 0; | ||
210 | + | ||
211 | + switch (cmp) { | ||
212 | + case float_relation_less: | ||
213 | + ret = (flags & FCMP_LT); | ||
214 | + break; | ||
215 | + case float_relation_equal: | ||
216 | + ret = (flags & FCMP_EQ); | ||
217 | + break; | ||
218 | + case float_relation_greater: | ||
219 | + ret = (flags & FCMP_GT); | ||
220 | + break; | ||
221 | + case float_relation_unordered: | ||
222 | + ret = (flags & FCMP_UN); | ||
223 | + break; | ||
224 | + default: | ||
225 | + g_assert_not_reached(); | ||
226 | + } | ||
227 | + | ||
228 | + if (ret) { | ||
229 | + ret = -1; | ||
230 | + } | ||
231 | + | ||
232 | + return ret; | ||
233 | +} | ||
234 | + | ||
235 | +#define VFCMP(NAME, BIT, E, FN) \ | ||
236 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
237 | + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ | ||
238 | +{ \ | ||
239 | + int i; \ | ||
240 | + VReg t; \ | ||
241 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
242 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
243 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
244 | + \ | ||
245 | + vec_clear_cause(env); \ | ||
246 | + for (i = 0; i < LSX_LEN/BIT ; i++) { \ | ||
247 | + FloatRelation cmp; \ | ||
248 | + cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ | ||
249 | + t.E(i) = vfcmp_common(env, cmp, flags); \ | ||
250 | + vec_update_fcsr0(env, GETPC()); \ | ||
251 | + } \ | ||
252 | + *Vd = t; \ | ||
253 | +} | ||
254 | + | ||
255 | +VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) | ||
256 | +VFCMP(vfcmp_s_s, 32, UW, float32_compare) | ||
257 | +VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) | ||
258 | +VFCMP(vfcmp_s_d, 64, UD, float64_compare) | ||
259 | -- | ||
260 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VINSGR2VR.{B/H/W/D}; | ||
3 | - VPICKVE2GR.{B/H/W/D}[U]; | ||
4 | - VREPLGR2VR.{B/H/W/D}. | ||
5 | 1 | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
8 | Message-Id: <20230504122810.4094787-39-gaosong@loongson.cn> | ||
9 | --- | ||
10 | target/loongarch/disas.c | 33 ++++++ | ||
11 | target/loongarch/insn_trans/trans_lsx.c.inc | 110 ++++++++++++++++++++ | ||
12 | target/loongarch/insns.decode | 30 ++++++ | ||
13 | 3 files changed, 173 insertions(+) | ||
14 | |||
15 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/loongarch/disas.c | ||
18 | +++ b/target/loongarch/disas.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void output_vvvv(DisasContext *ctx, arg_vvvv *a, const char *mnemonic) | ||
20 | output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va); | ||
21 | } | ||
22 | |||
23 | +static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic) | ||
24 | +{ | ||
25 | + output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm); | ||
26 | +} | ||
27 | + | ||
28 | +static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic) | ||
29 | +{ | ||
30 | + output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj, a->imm); | ||
31 | +} | ||
32 | + | ||
33 | +static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic) | ||
34 | +{ | ||
35 | + output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj); | ||
36 | +} | ||
37 | + | ||
38 | INSN_LSX(vadd_b, vvv) | ||
39 | INSN_LSX(vadd_h, vvv) | ||
40 | INSN_LSX(vadd_w, vvv) | ||
41 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vsetallnez_b, cv) | ||
42 | INSN_LSX(vsetallnez_h, cv) | ||
43 | INSN_LSX(vsetallnez_w, cv) | ||
44 | INSN_LSX(vsetallnez_d, cv) | ||
45 | + | ||
46 | +INSN_LSX(vinsgr2vr_b, vr_i) | ||
47 | +INSN_LSX(vinsgr2vr_h, vr_i) | ||
48 | +INSN_LSX(vinsgr2vr_w, vr_i) | ||
49 | +INSN_LSX(vinsgr2vr_d, vr_i) | ||
50 | +INSN_LSX(vpickve2gr_b, rv_i) | ||
51 | +INSN_LSX(vpickve2gr_h, rv_i) | ||
52 | +INSN_LSX(vpickve2gr_w, rv_i) | ||
53 | +INSN_LSX(vpickve2gr_d, rv_i) | ||
54 | +INSN_LSX(vpickve2gr_bu, rv_i) | ||
55 | +INSN_LSX(vpickve2gr_hu, rv_i) | ||
56 | +INSN_LSX(vpickve2gr_wu, rv_i) | ||
57 | +INSN_LSX(vpickve2gr_du, rv_i) | ||
58 | + | ||
59 | +INSN_LSX(vreplgr2vr_b, vr) | ||
60 | +INSN_LSX(vreplgr2vr_h, vr) | ||
61 | +INSN_LSX(vreplgr2vr_w, vr) | ||
62 | +INSN_LSX(vreplgr2vr_d, vr) | ||
63 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
66 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
67 | @@ -XXX,XX +XXX,XX @@ TRANS(vsetallnez_b, gen_cv, gen_helper_vsetallnez_b) | ||
68 | TRANS(vsetallnez_h, gen_cv, gen_helper_vsetallnez_h) | ||
69 | TRANS(vsetallnez_w, gen_cv, gen_helper_vsetallnez_w) | ||
70 | TRANS(vsetallnez_d, gen_cv, gen_helper_vsetallnez_d) | ||
71 | + | ||
72 | +static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a) | ||
73 | +{ | ||
74 | + CHECK_SXE; | ||
75 | + tcg_gen_st8_i64(cpu_gpr[a->rj], cpu_env, | ||
76 | + offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm))); | ||
77 | + return true; | ||
78 | +} | ||
79 | + | ||
80 | +static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a) | ||
81 | +{ | ||
82 | + CHECK_SXE; | ||
83 | + tcg_gen_st16_i64(cpu_gpr[a->rj], cpu_env, | ||
84 | + offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm))); | ||
85 | + return true; | ||
86 | +} | ||
87 | + | ||
88 | +static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a) | ||
89 | +{ | ||
90 | + CHECK_SXE; | ||
91 | + tcg_gen_st32_i64(cpu_gpr[a->rj], cpu_env, | ||
92 | + offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm))); | ||
93 | + return true; | ||
94 | +} | ||
95 | + | ||
96 | +static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a) | ||
97 | +{ | ||
98 | + CHECK_SXE; | ||
99 | + tcg_gen_st_i64(cpu_gpr[a->rj], cpu_env, | ||
100 | + offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm))); | ||
101 | + return true; | ||
102 | +} | ||
103 | + | ||
104 | +static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a) | ||
105 | +{ | ||
106 | + CHECK_SXE; | ||
107 | + tcg_gen_ld8s_i64(cpu_gpr[a->rd], cpu_env, | ||
108 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
109 | + return true; | ||
110 | +} | ||
111 | + | ||
112 | +static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a) | ||
113 | +{ | ||
114 | + CHECK_SXE; | ||
115 | + tcg_gen_ld16s_i64(cpu_gpr[a->rd], cpu_env, | ||
116 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
117 | + return true; | ||
118 | +} | ||
119 | + | ||
120 | +static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a) | ||
121 | +{ | ||
122 | + CHECK_SXE; | ||
123 | + tcg_gen_ld32s_i64(cpu_gpr[a->rd], cpu_env, | ||
124 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
125 | + return true; | ||
126 | +} | ||
127 | + | ||
128 | +static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a) | ||
129 | +{ | ||
130 | + CHECK_SXE; | ||
131 | + tcg_gen_ld_i64(cpu_gpr[a->rd], cpu_env, | ||
132 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
133 | + return true; | ||
134 | +} | ||
135 | + | ||
136 | +static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a) | ||
137 | +{ | ||
138 | + CHECK_SXE; | ||
139 | + tcg_gen_ld8u_i64(cpu_gpr[a->rd], cpu_env, | ||
140 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm))); | ||
141 | + return true; | ||
142 | +} | ||
143 | + | ||
144 | +static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a) | ||
145 | +{ | ||
146 | + CHECK_SXE; | ||
147 | + tcg_gen_ld16u_i64(cpu_gpr[a->rd], cpu_env, | ||
148 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm))); | ||
149 | + return true; | ||
150 | +} | ||
151 | + | ||
152 | +static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a) | ||
153 | +{ | ||
154 | + CHECK_SXE; | ||
155 | + tcg_gen_ld32u_i64(cpu_gpr[a->rd], cpu_env, | ||
156 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm))); | ||
157 | + return true; | ||
158 | +} | ||
159 | + | ||
160 | +static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a) | ||
161 | +{ | ||
162 | + CHECK_SXE; | ||
163 | + tcg_gen_ld_i64(cpu_gpr[a->rd], cpu_env, | ||
164 | + offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm))); | ||
165 | + return true; | ||
166 | +} | ||
167 | + | ||
168 | +static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) | ||
169 | +{ | ||
170 | + CHECK_SXE; | ||
171 | + | ||
172 | + tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), | ||
173 | + 16, ctx->vl/8, cpu_gpr[a->rj]); | ||
174 | + return true; | ||
175 | +} | ||
176 | + | ||
177 | +TRANS(vreplgr2vr_b, gvec_dup, MO_8) | ||
178 | +TRANS(vreplgr2vr_h, gvec_dup, MO_16) | ||
179 | +TRANS(vreplgr2vr_w, gvec_dup, MO_32) | ||
180 | +TRANS(vreplgr2vr_d, gvec_dup, MO_64) | ||
181 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/loongarch/insns.decode | ||
184 | +++ b/target/loongarch/insns.decode | ||
185 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
186 | &vv_i vd vj imm | ||
187 | &vvvv vd vj vk va | ||
188 | &vvv_fcond vd vj vk fcond | ||
189 | +&vr_i vd rj imm | ||
190 | +&rv_i rd vj imm | ||
191 | +&vr vd rj | ||
192 | |||
193 | # | ||
194 | # LSX Formats | ||
195 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
196 | @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i | ||
197 | @vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv | ||
198 | @vvv_fcond .... ........ fcond:5 vk:5 vj:5 vd:5 &vvv_fcond | ||
199 | +@vr_ui4 .... ........ ..... . imm:4 rj:5 vd:5 &vr_i | ||
200 | +@vr_ui3 .... ........ ..... .. imm:3 rj:5 vd:5 &vr_i | ||
201 | +@vr_ui2 .... ........ ..... ... imm:2 rj:5 vd:5 &vr_i | ||
202 | +@vr_ui1 .... ........ ..... .... imm:1 rj:5 vd:5 &vr_i | ||
203 | +@rv_ui4 .... ........ ..... . imm:4 vj:5 rd:5 &rv_i | ||
204 | +@rv_ui3 .... ........ ..... .. imm:3 vj:5 rd:5 &rv_i | ||
205 | +@rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i | ||
206 | +@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i | ||
207 | +@vr .... ........ ..... ..... rj:5 vd:5 &vr | ||
208 | |||
209 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
210 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
211 | @@ -XXX,XX +XXX,XX @@ vsetallnez_b 0111 00101001 11001 01100 ..... 00 ... @cv | ||
212 | vsetallnez_h 0111 00101001 11001 01101 ..... 00 ... @cv | ||
213 | vsetallnez_w 0111 00101001 11001 01110 ..... 00 ... @cv | ||
214 | vsetallnez_d 0111 00101001 11001 01111 ..... 00 ... @cv | ||
215 | + | ||
216 | +vinsgr2vr_b 0111 00101110 10111 0 .... ..... ..... @vr_ui4 | ||
217 | +vinsgr2vr_h 0111 00101110 10111 10 ... ..... ..... @vr_ui3 | ||
218 | +vinsgr2vr_w 0111 00101110 10111 110 .. ..... ..... @vr_ui2 | ||
219 | +vinsgr2vr_d 0111 00101110 10111 1110 . ..... ..... @vr_ui1 | ||
220 | +vpickve2gr_b 0111 00101110 11111 0 .... ..... ..... @rv_ui4 | ||
221 | +vpickve2gr_h 0111 00101110 11111 10 ... ..... ..... @rv_ui3 | ||
222 | +vpickve2gr_w 0111 00101110 11111 110 .. ..... ..... @rv_ui2 | ||
223 | +vpickve2gr_d 0111 00101110 11111 1110 . ..... ..... @rv_ui1 | ||
224 | +vpickve2gr_bu 0111 00101111 00111 0 .... ..... ..... @rv_ui4 | ||
225 | +vpickve2gr_hu 0111 00101111 00111 10 ... ..... ..... @rv_ui3 | ||
226 | +vpickve2gr_wu 0111 00101111 00111 110 .. ..... ..... @rv_ui2 | ||
227 | +vpickve2gr_du 0111 00101111 00111 1110 . ..... ..... @rv_ui1 | ||
228 | + | ||
229 | +vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr | ||
230 | +vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr | ||
231 | +vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr | ||
232 | +vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr | ||
233 | -- | ||
234 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VILV{L/H}.{B/H/W/D}; | ||
3 | - VSHUF.{B/H/W/D}; | ||
4 | - VSHUF4I.{B/H/W/D}; | ||
5 | - VPERMI.W; | ||
6 | - VEXTRINS.{B/H/W/D}. | ||
7 | 1 | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
10 | Message-Id: <20230504122810.4094787-41-gaosong@loongson.cn> | ||
11 | --- | ||
12 | target/loongarch/disas.c | 25 ++++ | ||
13 | target/loongarch/helper.h | 25 ++++ | ||
14 | target/loongarch/insn_trans/trans_lsx.c.inc | 25 ++++ | ||
15 | target/loongarch/insns.decode | 25 ++++ | ||
16 | target/loongarch/lsx_helper.c | 148 ++++++++++++++++++++ | ||
17 | 5 files changed, 248 insertions(+) | ||
18 | |||
19 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/loongarch/disas.c | ||
22 | +++ b/target/loongarch/disas.c | ||
23 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vpickod_b, vvv) | ||
24 | INSN_LSX(vpickod_h, vvv) | ||
25 | INSN_LSX(vpickod_w, vvv) | ||
26 | INSN_LSX(vpickod_d, vvv) | ||
27 | + | ||
28 | +INSN_LSX(vilvl_b, vvv) | ||
29 | +INSN_LSX(vilvl_h, vvv) | ||
30 | +INSN_LSX(vilvl_w, vvv) | ||
31 | +INSN_LSX(vilvl_d, vvv) | ||
32 | +INSN_LSX(vilvh_b, vvv) | ||
33 | +INSN_LSX(vilvh_h, vvv) | ||
34 | +INSN_LSX(vilvh_w, vvv) | ||
35 | +INSN_LSX(vilvh_d, vvv) | ||
36 | + | ||
37 | +INSN_LSX(vshuf_b, vvvv) | ||
38 | +INSN_LSX(vshuf_h, vvv) | ||
39 | +INSN_LSX(vshuf_w, vvv) | ||
40 | +INSN_LSX(vshuf_d, vvv) | ||
41 | +INSN_LSX(vshuf4i_b, vv_i) | ||
42 | +INSN_LSX(vshuf4i_h, vv_i) | ||
43 | +INSN_LSX(vshuf4i_w, vv_i) | ||
44 | +INSN_LSX(vshuf4i_d, vv_i) | ||
45 | + | ||
46 | +INSN_LSX(vpermi_w, vv_i) | ||
47 | + | ||
48 | +INSN_LSX(vextrins_d, vv_i) | ||
49 | +INSN_LSX(vextrins_w, vv_i) | ||
50 | +INSN_LSX(vextrins_h, vv_i) | ||
51 | +INSN_LSX(vextrins_b, vv_i) | ||
52 | diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/loongarch/helper.h | ||
55 | +++ b/target/loongarch/helper.h | ||
56 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32) | ||
57 | DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32) | ||
58 | DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32) | ||
59 | DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32) | ||
60 | + | ||
61 | +DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32) | ||
62 | +DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32) | ||
63 | +DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32) | ||
64 | +DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32) | ||
65 | +DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32) | ||
66 | +DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32) | ||
67 | +DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32) | ||
68 | +DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32) | ||
69 | + | ||
70 | +DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32) | ||
71 | +DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32) | ||
72 | +DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32) | ||
73 | +DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32) | ||
74 | +DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32) | ||
75 | +DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32) | ||
76 | +DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32) | ||
77 | +DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32) | ||
78 | + | ||
79 | +DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32) | ||
80 | + | ||
81 | +DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32) | ||
82 | +DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32) | ||
83 | +DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32) | ||
84 | +DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32) | ||
85 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
88 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
89 | @@ -XXX,XX +XXX,XX @@ TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b) | ||
90 | TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h) | ||
91 | TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w) | ||
92 | TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d) | ||
93 | + | ||
94 | +TRANS(vilvl_b, gen_vvv, gen_helper_vilvl_b) | ||
95 | +TRANS(vilvl_h, gen_vvv, gen_helper_vilvl_h) | ||
96 | +TRANS(vilvl_w, gen_vvv, gen_helper_vilvl_w) | ||
97 | +TRANS(vilvl_d, gen_vvv, gen_helper_vilvl_d) | ||
98 | +TRANS(vilvh_b, gen_vvv, gen_helper_vilvh_b) | ||
99 | +TRANS(vilvh_h, gen_vvv, gen_helper_vilvh_h) | ||
100 | +TRANS(vilvh_w, gen_vvv, gen_helper_vilvh_w) | ||
101 | +TRANS(vilvh_d, gen_vvv, gen_helper_vilvh_d) | ||
102 | + | ||
103 | +TRANS(vshuf_b, gen_vvvv, gen_helper_vshuf_b) | ||
104 | +TRANS(vshuf_h, gen_vvv, gen_helper_vshuf_h) | ||
105 | +TRANS(vshuf_w, gen_vvv, gen_helper_vshuf_w) | ||
106 | +TRANS(vshuf_d, gen_vvv, gen_helper_vshuf_d) | ||
107 | +TRANS(vshuf4i_b, gen_vv_i, gen_helper_vshuf4i_b) | ||
108 | +TRANS(vshuf4i_h, gen_vv_i, gen_helper_vshuf4i_h) | ||
109 | +TRANS(vshuf4i_w, gen_vv_i, gen_helper_vshuf4i_w) | ||
110 | +TRANS(vshuf4i_d, gen_vv_i, gen_helper_vshuf4i_d) | ||
111 | + | ||
112 | +TRANS(vpermi_w, gen_vv_i, gen_helper_vpermi_w) | ||
113 | + | ||
114 | +TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b) | ||
115 | +TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h) | ||
116 | +TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w) | ||
117 | +TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d) | ||
118 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/target/loongarch/insns.decode | ||
121 | +++ b/target/loongarch/insns.decode | ||
122 | @@ -XXX,XX +XXX,XX @@ vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv | ||
123 | vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv | ||
124 | vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv | ||
125 | vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv | ||
126 | + | ||
127 | +vilvl_b 0111 00010001 10100 ..... ..... ..... @vvv | ||
128 | +vilvl_h 0111 00010001 10101 ..... ..... ..... @vvv | ||
129 | +vilvl_w 0111 00010001 10110 ..... ..... ..... @vvv | ||
130 | +vilvl_d 0111 00010001 10111 ..... ..... ..... @vvv | ||
131 | +vilvh_b 0111 00010001 11000 ..... ..... ..... @vvv | ||
132 | +vilvh_h 0111 00010001 11001 ..... ..... ..... @vvv | ||
133 | +vilvh_w 0111 00010001 11010 ..... ..... ..... @vvv | ||
134 | +vilvh_d 0111 00010001 11011 ..... ..... ..... @vvv | ||
135 | + | ||
136 | +vshuf_b 0000 11010101 ..... ..... ..... ..... @vvvv | ||
137 | +vshuf_h 0111 00010111 10101 ..... ..... ..... @vvv | ||
138 | +vshuf_w 0111 00010111 10110 ..... ..... ..... @vvv | ||
139 | +vshuf_d 0111 00010111 10111 ..... ..... ..... @vvv | ||
140 | +vshuf4i_b 0111 00111001 00 ........ ..... ..... @vv_ui8 | ||
141 | +vshuf4i_h 0111 00111001 01 ........ ..... ..... @vv_ui8 | ||
142 | +vshuf4i_w 0111 00111001 10 ........ ..... ..... @vv_ui8 | ||
143 | +vshuf4i_d 0111 00111001 11 ........ ..... ..... @vv_ui8 | ||
144 | + | ||
145 | +vpermi_w 0111 00111110 01 ........ ..... ..... @vv_ui8 | ||
146 | + | ||
147 | +vextrins_d 0111 00111000 00 ........ ..... ..... @vv_ui8 | ||
148 | +vextrins_w 0111 00111000 01 ........ ..... ..... @vv_ui8 | ||
149 | +vextrins_h 0111 00111000 10 ........ ..... ..... @vv_ui8 | ||
150 | +vextrins_b 0111 00111000 11 ........ ..... ..... @vv_ui8 | ||
151 | diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/loongarch/lsx_helper.c | ||
154 | +++ b/target/loongarch/lsx_helper.c | ||
155 | @@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_b, 16, B) | ||
156 | VPICKOD(vpickod_h, 32, H) | ||
157 | VPICKOD(vpickod_w, 64, W) | ||
158 | VPICKOD(vpickod_d, 128, D) | ||
159 | + | ||
160 | +#define VILVL(NAME, BIT, E) \ | ||
161 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
162 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
163 | +{ \ | ||
164 | + int i; \ | ||
165 | + VReg temp; \ | ||
166 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
167 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
168 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
169 | + \ | ||
170 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
171 | + temp.E(2 * i + 1) = Vj->E(i); \ | ||
172 | + temp.E(2 * i) = Vk->E(i); \ | ||
173 | + } \ | ||
174 | + *Vd = temp; \ | ||
175 | +} | ||
176 | + | ||
177 | +VILVL(vilvl_b, 16, B) | ||
178 | +VILVL(vilvl_h, 32, H) | ||
179 | +VILVL(vilvl_w, 64, W) | ||
180 | +VILVL(vilvl_d, 128, D) | ||
181 | + | ||
182 | +#define VILVH(NAME, BIT, E) \ | ||
183 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
184 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
185 | +{ \ | ||
186 | + int i; \ | ||
187 | + VReg temp; \ | ||
188 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
189 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
190 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
191 | + \ | ||
192 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
193 | + temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ | ||
194 | + temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ | ||
195 | + } \ | ||
196 | + *Vd = temp; \ | ||
197 | +} | ||
198 | + | ||
199 | +VILVH(vilvh_b, 16, B) | ||
200 | +VILVH(vilvh_h, 32, H) | ||
201 | +VILVH(vilvh_w, 64, W) | ||
202 | +VILVH(vilvh_d, 128, D) | ||
203 | + | ||
204 | +void HELPER(vshuf_b)(CPULoongArchState *env, | ||
205 | + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) | ||
206 | +{ | ||
207 | + int i, m; | ||
208 | + VReg temp; | ||
209 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
210 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
211 | + VReg *Vk = &(env->fpr[vk].vreg); | ||
212 | + VReg *Va = &(env->fpr[va].vreg); | ||
213 | + | ||
214 | + m = LSX_LEN/8; | ||
215 | + for (i = 0; i < m ; i++) { | ||
216 | + uint64_t k = (uint8_t)Va->B(i) % (2 * m); | ||
217 | + temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m); | ||
218 | + } | ||
219 | + *Vd = temp; | ||
220 | +} | ||
221 | + | ||
222 | +#define VSHUF(NAME, BIT, E) \ | ||
223 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
224 | + uint32_t vd, uint32_t vj, uint32_t vk) \ | ||
225 | +{ \ | ||
226 | + int i, m; \ | ||
227 | + VReg temp; \ | ||
228 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
229 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
230 | + VReg *Vk = &(env->fpr[vk].vreg); \ | ||
231 | + \ | ||
232 | + m = LSX_LEN/BIT; \ | ||
233 | + for (i = 0; i < m; i++) { \ | ||
234 | + uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ | ||
235 | + temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ | ||
236 | + } \ | ||
237 | + *Vd = temp; \ | ||
238 | +} | ||
239 | + | ||
240 | +VSHUF(vshuf_h, 16, H) | ||
241 | +VSHUF(vshuf_w, 32, W) | ||
242 | +VSHUF(vshuf_d, 64, D) | ||
243 | + | ||
244 | +#define VSHUF4I(NAME, BIT, E) \ | ||
245 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
246 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
247 | +{ \ | ||
248 | + int i; \ | ||
249 | + VReg temp; \ | ||
250 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
251 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
252 | + \ | ||
253 | + for (i = 0; i < LSX_LEN/BIT; i++) { \ | ||
254 | + temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ | ||
255 | + (2 * ((i) & 0x03))) & 0x03)); \ | ||
256 | + } \ | ||
257 | + *Vd = temp; \ | ||
258 | +} | ||
259 | + | ||
260 | +VSHUF4I(vshuf4i_b, 8, B) | ||
261 | +VSHUF4I(vshuf4i_h, 16, H) | ||
262 | +VSHUF4I(vshuf4i_w, 32, W) | ||
263 | + | ||
264 | +void HELPER(vshuf4i_d)(CPULoongArchState *env, | ||
265 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
266 | +{ | ||
267 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
268 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
269 | + | ||
270 | + VReg temp; | ||
271 | + temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); | ||
272 | + temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); | ||
273 | + *Vd = temp; | ||
274 | +} | ||
275 | + | ||
276 | +void HELPER(vpermi_w)(CPULoongArchState *env, | ||
277 | + uint32_t vd, uint32_t vj, uint32_t imm) | ||
278 | +{ | ||
279 | + VReg temp; | ||
280 | + VReg *Vd = &(env->fpr[vd].vreg); | ||
281 | + VReg *Vj = &(env->fpr[vj].vreg); | ||
282 | + | ||
283 | + temp.W(0) = Vj->W(imm & 0x3); | ||
284 | + temp.W(1) = Vj->W((imm >> 2) & 0x3); | ||
285 | + temp.W(2) = Vd->W((imm >> 4) & 0x3); | ||
286 | + temp.W(3) = Vd->W((imm >> 6) & 0x3); | ||
287 | + *Vd = temp; | ||
288 | +} | ||
289 | + | ||
290 | +#define VEXTRINS(NAME, BIT, E, MASK) \ | ||
291 | +void HELPER(NAME)(CPULoongArchState *env, \ | ||
292 | + uint32_t vd, uint32_t vj, uint32_t imm) \ | ||
293 | +{ \ | ||
294 | + int ins, extr; \ | ||
295 | + VReg *Vd = &(env->fpr[vd].vreg); \ | ||
296 | + VReg *Vj = &(env->fpr[vj].vreg); \ | ||
297 | + \ | ||
298 | + ins = (imm >> 4) & MASK; \ | ||
299 | + extr = imm & MASK; \ | ||
300 | + Vd->E(ins) = Vj->E(extr); \ | ||
301 | +} | ||
302 | + | ||
303 | +VEXTRINS(vextrins_b, 8, B, 0xf) | ||
304 | +VEXTRINS(vextrins_h, 16, H, 0x7) | ||
305 | +VEXTRINS(vextrins_w, 32, W, 0x3) | ||
306 | +VEXTRINS(vextrins_d, 64, D, 0x1) | ||
307 | -- | ||
308 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This patch includes: | ||
2 | - VLDI. | ||
3 | 1 | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
6 | Message-Id: <20230504122810.4094787-43-gaosong@loongson.cn> | ||
7 | --- | ||
8 | target/loongarch/disas.c | 7 + | ||
9 | target/loongarch/insn_trans/trans_lsx.c.inc | 137 ++++++++++++++++++++ | ||
10 | target/loongarch/insns.decode | 4 + | ||
11 | 3 files changed, 148 insertions(+) | ||
12 | |||
13 | diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/loongarch/disas.c | ||
16 | +++ b/target/loongarch/disas.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic) | ||
18 | output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk); | ||
19 | } | ||
20 | |||
21 | +static void output_v_i(DisasContext *ctx, arg_v_i *a, const char *mnemonic) | ||
22 | +{ | ||
23 | + output(ctx, mnemonic, "v%d, 0x%x", a->vd, a->imm); | ||
24 | +} | ||
25 | + | ||
26 | INSN_LSX(vadd_b, vvv) | ||
27 | INSN_LSX(vadd_h, vvv) | ||
28 | INSN_LSX(vadd_w, vvv) | ||
29 | @@ -XXX,XX +XXX,XX @@ INSN_LSX(vmskltz_d, vv) | ||
30 | INSN_LSX(vmskgez_b, vv) | ||
31 | INSN_LSX(vmsknz_b, vv) | ||
32 | |||
33 | +INSN_LSX(vldi, v_i) | ||
34 | + | ||
35 | INSN_LSX(vand_v, vvv) | ||
36 | INSN_LSX(vor_v, vvv) | ||
37 | INSN_LSX(vxor_v, vvv) | ||
38 | diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/target/loongarch/insn_trans/trans_lsx.c.inc | ||
41 | +++ b/target/loongarch/insn_trans/trans_lsx.c.inc | ||
42 | @@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d) | ||
43 | TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b) | ||
44 | TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b) | ||
45 | |||
46 | +#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0)) | ||
47 | + | ||
48 | +static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) | ||
49 | +{ | ||
50 | + int mode; | ||
51 | + uint64_t data, t; | ||
52 | + | ||
53 | + /* | ||
54 | + * imm bit [11:8] is mode, mode value is 0-12. | ||
55 | + * other values are invalid. | ||
56 | + */ | ||
57 | + mode = (imm >> 8) & 0xf; | ||
58 | + t = imm & 0xff; | ||
59 | + switch (mode) { | ||
60 | + case 0: | ||
61 | + /* data: {2{24'0, imm[7:0]}} */ | ||
62 | + data = (t << 32) | t ; | ||
63 | + break; | ||
64 | + case 1: | ||
65 | + /* data: {2{16'0, imm[7:0], 8'0}} */ | ||
66 | + data = (t << 24) | (t << 8); | ||
67 | + break; | ||
68 | + case 2: | ||
69 | + /* data: {2{8'0, imm[7:0], 16'0}} */ | ||
70 | + data = (t << 48) | (t << 16); | ||
71 | + break; | ||
72 | + case 3: | ||
73 | + /* data: {2{imm[7:0], 24'0}} */ | ||
74 | + data = (t << 56) | (t << 24); | ||
75 | + break; | ||
76 | + case 4: | ||
77 | + /* data: {4{8'0, imm[7:0]}} */ | ||
78 | + data = (t << 48) | (t << 32) | (t << 16) | t; | ||
79 | + break; | ||
80 | + case 5: | ||
81 | + /* data: {4{imm[7:0], 8'0}} */ | ||
82 | + data = (t << 56) |(t << 40) | (t << 24) | (t << 8); | ||
83 | + break; | ||
84 | + case 6: | ||
85 | + /* data: {2{16'0, imm[7:0], 8'1}} */ | ||
86 | + data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff; | ||
87 | + break; | ||
88 | + case 7: | ||
89 | + /* data: {2{8'0, imm[7:0], 16'1}} */ | ||
90 | + data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff; | ||
91 | + break; | ||
92 | + case 8: | ||
93 | + /* data: {8{imm[7:0]}} */ | ||
94 | + data =(t << 56) | (t << 48) | (t << 40) | (t << 32) | | ||
95 | + (t << 24) | (t << 16) | (t << 8) | t; | ||
96 | + break; | ||
97 | + case 9: | ||
98 | + /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */ | ||
99 | + { | ||
100 | + uint64_t b0,b1,b2,b3,b4,b5,b6,b7; | ||
101 | + b0 = t& 0x1; | ||
102 | + b1 = (t & 0x2) >> 1; | ||
103 | + b2 = (t & 0x4) >> 2; | ||
104 | + b3 = (t & 0x8) >> 3; | ||
105 | + b4 = (t & 0x10) >> 4; | ||
106 | + b5 = (t & 0x20) >> 5; | ||
107 | + b6 = (t & 0x40) >> 6; | ||
108 | + b7 = (t & 0x80) >> 7; | ||
109 | + data = (EXPAND_BYTE(b7) << 56) | | ||
110 | + (EXPAND_BYTE(b6) << 48) | | ||
111 | + (EXPAND_BYTE(b5) << 40) | | ||
112 | + (EXPAND_BYTE(b4) << 32) | | ||
113 | + (EXPAND_BYTE(b3) << 24) | | ||
114 | + (EXPAND_BYTE(b2) << 16) | | ||
115 | + (EXPAND_BYTE(b1) << 8) | | ||
116 | + EXPAND_BYTE(b0); | ||
117 | + } | ||
118 | + break; | ||
119 | + case 10: | ||
120 | + /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */ | ||
121 | + { | ||
122 | + uint64_t b6, b7; | ||
123 | + uint64_t t0, t1; | ||
124 | + b6 = (imm & 0x40) >> 6; | ||
125 | + b7 = (imm & 0x80) >> 7; | ||
126 | + t0 = (imm & 0x3f); | ||
127 | + t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0); | ||
128 | + data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19); | ||
129 | + } | ||
130 | + break; | ||
131 | + case 11: | ||
132 | + /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */ | ||
133 | + { | ||
134 | + uint64_t b6,b7; | ||
135 | + uint64_t t0, t1; | ||
136 | + b6 = (imm & 0x40) >> 6; | ||
137 | + b7 = (imm & 0x80) >> 7; | ||
138 | + t0 = (imm & 0x3f); | ||
139 | + t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0); | ||
140 | + data = (t1 << 25) | (t0 << 19); | ||
141 | + } | ||
142 | + break; | ||
143 | + case 12: | ||
144 | + /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */ | ||
145 | + { | ||
146 | + uint64_t b6,b7; | ||
147 | + uint64_t t0, t1; | ||
148 | + b6 = (imm & 0x40) >> 6; | ||
149 | + b7 = (imm & 0x80) >> 7; | ||
150 | + t0 = (imm & 0x3f); | ||
151 | + t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0); | ||
152 | + data = (t1 << 54) | (t0 << 48); | ||
153 | + } | ||
154 | + break; | ||
155 | + default: | ||
156 | + generate_exception(ctx, EXCCODE_INE); | ||
157 | + g_assert_not_reached(); | ||
158 | + } | ||
159 | + return data; | ||
160 | +} | ||
161 | + | ||
162 | +static bool trans_vldi(DisasContext *ctx, arg_vldi *a) | ||
163 | +{ | ||
164 | + int sel, vece; | ||
165 | + uint64_t value; | ||
166 | + CHECK_SXE; | ||
167 | + | ||
168 | + sel = (a->imm >> 12) & 0x1; | ||
169 | + | ||
170 | + if (sel) { | ||
171 | + value = vldi_get_value(ctx, a->imm); | ||
172 | + vece = MO_64; | ||
173 | + } else { | ||
174 | + value = ((int32_t)(a->imm << 22)) >> 22; | ||
175 | + vece = (a->imm >> 10) & 0x3; | ||
176 | + } | ||
177 | + | ||
178 | + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, | ||
179 | + tcg_constant_i64(value)); | ||
180 | + return true; | ||
181 | +} | ||
182 | + | ||
183 | TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and) | ||
184 | TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or) | ||
185 | TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor) | ||
186 | diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode | ||
187 | index XXXXXXX..XXXXXXX 100644 | ||
188 | --- a/target/loongarch/insns.decode | ||
189 | +++ b/target/loongarch/insns.decode | ||
190 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
191 | &vvr vd vj rk | ||
192 | &vrr vd rj rk | ||
193 | &vr_ii vd rj imm imm2 | ||
194 | +&v_i vd imm | ||
195 | |||
196 | # | ||
197 | # LSX Formats | ||
198 | @@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15 | ||
199 | @vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1 | ||
200 | @vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii | ||
201 | @vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr | ||
202 | +@v_i13 .... ........ .. imm:13 vd:5 &v_i | ||
203 | |||
204 | vadd_b 0111 00000000 10100 ..... ..... ..... @vvv | ||
205 | vadd_h 0111 00000000 10101 ..... ..... ..... @vvv | ||
206 | @@ -XXX,XX +XXX,XX @@ vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv | ||
207 | vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv | ||
208 | vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv | ||
209 | |||
210 | +vldi 0111 00111110 00 ............. ..... @v_i13 | ||
211 | + | ||
212 | vand_v 0111 00010010 01100 ..... ..... ..... @vvv | ||
213 | vor_v 0111 00010010 01101 ..... ..... ..... @vvv | ||
214 | vxor_v 0111 00010010 01110 ..... ..... ..... @vvv | ||
215 | -- | ||
216 | 2.31.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | Signed-off-by: Song Gao <gaosong@loongson.cn> | ||
3 | Message-Id: <20230504122810.4094787-45-gaosong@loongson.cn> | ||
4 | --- | ||
5 | target/loongarch/cpu.c | 1 + | ||
6 | 1 file changed, 1 insertion(+) | ||
7 | 1 | ||
8 | diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/loongarch/cpu.c | ||
11 | +++ b/target/loongarch/cpu.c | ||
12 | @@ -XXX,XX +XXX,XX @@ static void loongarch_la464_initfn(Object *obj) | ||
13 | data = FIELD_DP32(data, CPUCFG2, FP_SP, 1); | ||
14 | data = FIELD_DP32(data, CPUCFG2, FP_DP, 1); | ||
15 | data = FIELD_DP32(data, CPUCFG2, FP_VER, 1); | ||
16 | + data = FIELD_DP32(data, CPUCFG2, LSX, 1), | ||
17 | data = FIELD_DP32(data, CPUCFG2, LLFTP, 1); | ||
18 | data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1); | ||
19 | data = FIELD_DP32(data, CPUCFG2, LAM, 1); | ||
20 | -- | ||
21 | 2.31.1 | diff view generated by jsdifflib |