1
The following changes since commit f6b761bdbd8ba63cee7428d52fb6b46e4224ddab:
1
The following changes since commit 7433709a147706ad7d1956b15669279933d0f82b:
2
2
3
Merge tag 'qga-pull-2023-05-04' of https://github.com/kostyanf14/qemu into staging (2023-05-04 12:08:00 +0100)
3
Merge tag 'hw-misc-20250113' of https://github.com/philmd/qemu into staging (2025-01-14 12:46:56 -0500)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20230505
7
https://gitlab.com/bibo-mao/qemu.git tags/pull-loongarch-20250116
8
8
9
for you to fetch changes up to 9dd207d409cc2eb08fe52965b9d1fd4a12a82bd5:
9
for you to fetch changes up to bb81f237401b5f89f6bba21d9d4f50e0073372a6:
10
10
11
hw/intc: don't use target_ulong for LoongArch ipi (2023-05-05 10:00:47 +0800)
11
hw/intc/loongarch_ipi: Use alternative implemation for cpu_by_arch_id (2025-01-15 14:36:19 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Add LoongArch LSX instructions.
14
pull-loongarch-20250116 queue
15
15
16
----------------------------------------------------------------
16
----------------------------------------------------------------
17
Alex Bennée (1):
17
Bibo Mao (7):
18
hw/intc: don't use target_ulong for LoongArch ipi
18
hw/intc/loongarch_ipi: Implement realize interface
19
hw/intc/loongson_ipi: Remove num_cpu from loongson_ipi_common
20
hw/intc/loongson_ipi: Remove property num_cpu from loongson_ipi_common
21
hw/intc/loongarch_ipi: Get cpu number from possible_cpu_arch_ids
22
hw/intc/loongarch_ipi: Remove property num-cpu
23
hw/intc/loongson_ipi: Add more input parameter for cpu_by_arch_id
24
hw/intc/loongarch_ipi: Use alternative implemation for cpu_by_arch_id
19
25
20
Song Gao (44):
26
Miao Hao (1):
21
target/loongarch: Add LSX data type VReg
27
target/loongarch: Add page table walker support for debugger usage
22
target/loongarch: meson.build support build LSX
23
target/loongarch: Add CHECK_SXE maccro for check LSX enable
24
target/loongarch: Implement vadd/vsub
25
target/loongarch: Implement vaddi/vsubi
26
target/loongarch: Implement vneg
27
target/loongarch: Implement vsadd/vssub
28
target/loongarch: Implement vhaddw/vhsubw
29
target/loongarch: Implement vaddw/vsubw
30
target/loongarch: Implement vavg/vavgr
31
target/loongarch: Implement vabsd
32
target/loongarch: Implement vadda
33
target/loongarch: Implement vmax/vmin
34
target/loongarch: Implement vmul/vmuh/vmulw{ev/od}
35
target/loongarch: Implement vmadd/vmsub/vmaddw{ev/od}
36
target/loongarch: Implement vdiv/vmod
37
target/loongarch: Implement vsat
38
target/loongarch: Implement vexth
39
target/loongarch: Implement vsigncov
40
target/loongarch: Implement vmskltz/vmskgez/vmsknz
41
target/loongarch: Implement LSX logic instructions
42
target/loongarch: Implement vsll vsrl vsra vrotr
43
target/loongarch: Implement vsllwil vextl
44
target/loongarch: Implement vsrlr vsrar
45
target/loongarch: Implement vsrln vsran
46
target/loongarch: Implement vsrlrn vsrarn
47
target/loongarch: Implement vssrln vssran
48
target/loongarch: Implement vssrlrn vssrarn
49
target/loongarch: Implement vclo vclz
50
target/loongarch: Implement vpcnt
51
target/loongarch: Implement vbitclr vbitset vbitrev
52
target/loongarch: Implement vfrstp
53
target/loongarch: Implement LSX fpu arith instructions
54
target/loongarch: Implement LSX fpu fcvt instructions
55
target/loongarch: Implement vseq vsle vslt
56
target/loongarch: Implement vfcmp
57
target/loongarch: Implement vbitsel vset
58
target/loongarch: Implement vinsgr2vr vpickve2gr vreplgr2vr
59
target/loongarch: Implement vreplve vpack vpick
60
target/loongarch: Implement vilvl vilvh vextrins vshuf
61
target/loongarch: Implement vld vst
62
target/loongarch: Implement vldi
63
target/loongarch: Use {set/get}_gpr replace to cpu_fpr
64
target/loongarch: CPUCFG support LSX
65
28
66
hw/intc/loongarch_ipi.c | 2 +-
29
hw/intc/loongarch_ipi.c | 69 ++++++++++++++++++-------
67
linux-user/loongarch64/signal.c | 4 +-
30
hw/intc/loongson_ipi.c | 43 +++++++++++++++-
68
target/loongarch/cpu.c | 5 +-
31
hw/intc/loongson_ipi_common.c | 41 +++++----------
69
target/loongarch/cpu.h | 27 +-
32
hw/loongarch/virt.c | 1 -
70
target/loongarch/disas.c | 911 +++++
33
include/hw/intc/loongarch_ipi.h | 1 +
71
target/loongarch/fpu_helper.c | 2 +-
34
include/hw/intc/loongson_ipi_common.h | 5 +-
72
target/loongarch/gdbstub.c | 4 +-
35
target/loongarch/cpu_helper.c | 94 +++++++++++++++++++++++++++++++++--
73
target/loongarch/helper.h | 566 +++
36
target/loongarch/internals.h | 4 +-
74
target/loongarch/insn_trans/trans_farith.c.inc | 72 +-
37
target/loongarch/tcg/tlb_helper.c | 4 +-
75
target/loongarch/insn_trans/trans_fcmp.c.inc | 12 +-
38
9 files changed, 203 insertions(+), 59 deletions(-)
76
target/loongarch/insn_trans/trans_fmemory.c.inc | 37 +-
77
target/loongarch/insn_trans/trans_fmov.c.inc | 31 +-
78
target/loongarch/insn_trans/trans_lsx.c.inc | 4400 +++++++++++++++++++++++
79
target/loongarch/insns.decode | 811 +++++
80
target/loongarch/internals.h | 23 +
81
target/loongarch/lsx_helper.c | 3004 ++++++++++++++++
82
target/loongarch/machine.c | 79 +-
83
target/loongarch/meson.build | 1 +
84
target/loongarch/translate.c | 55 +-
85
target/loongarch/translate.h | 1 +
86
20 files changed, 9989 insertions(+), 58 deletions(-)
87
create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc
88
create mode 100644 target/loongarch/lsx_helper.c
89
90
diff view generated by jsdifflib
1
This patch includes:
1
From: Miao Hao <haomiao23s@ict.ac.cn>
2
- VF{ADD/SUB/MUL/DIV}.{S/D};
3
- VF{MADD/MSUB/NMADD/NMSUB}.{S/D};
4
- VF{MAX/MIN}.{S/D};
5
- VF{MAXA/MINA}.{S/D};
6
- VFLOGB.{S/D};
7
- VFCLASS.{S/D};
8
- VF{SQRT/RECIP/RSQRT}.{S/D}.
9
2
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
When dump memory content with gva address, software page table walker is
11
Signed-off-by: Song Gao <gaosong@loongson.cn>
4
necessary to get responding gpa address.
12
Message-Id: <20230504122810.4094787-34-gaosong@loongson.cn>
5
6
Here page table walker is added for debugger usage.
7
8
Signed-off-by: Miao Hao <haomiao23s@ict.ac.cn>
9
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
10
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
13
---
11
---
14
target/loongarch/cpu.h | 4 +
12
target/loongarch/cpu_helper.c | 94 +++++++++++++++++++++++++++++--
15
target/loongarch/disas.c | 46 +++++
13
target/loongarch/internals.h | 4 +-
16
target/loongarch/fpu_helper.c | 2 +-
14
target/loongarch/tcg/tlb_helper.c | 4 +-
17
target/loongarch/helper.h | 41 +++++
15
3 files changed, 94 insertions(+), 8 deletions(-)
18
target/loongarch/insn_trans/trans_lsx.c.inc | 55 ++++++
19
target/loongarch/insns.decode | 43 +++++
20
target/loongarch/internals.h | 1 +
21
target/loongarch/lsx_helper.c | 186 ++++++++++++++++++++
22
8 files changed, 377 insertions(+), 1 deletion(-)
23
16
24
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
17
diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c
25
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
26
--- a/target/loongarch/cpu.h
19
--- a/target/loongarch/cpu_helper.c
27
+++ b/target/loongarch/cpu.h
20
+++ b/target/loongarch/cpu_helper.c
28
@@ -XXX,XX +XXX,XX @@ FIELD(FCSR0, CAUSE, 24, 5)
21
@@ -XXX,XX +XXX,XX @@ bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr,
29
do { \
22
return false;
30
(REG) = FIELD_DP32(REG, FCSR0, CAUSE, V); \
31
} while (0)
32
+#define UPDATE_FP_CAUSE(REG, V) \
33
+ do { \
34
+ (REG) |= FIELD_DP32(0, FCSR0, CAUSE, V); \
35
+ } while (0)
36
37
#define GET_FP_ENABLES(REG) FIELD_EX32(REG, FCSR0, ENABLES)
38
#define SET_FP_ENABLES(REG, V) \
39
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/loongarch/disas.c
42
+++ b/target/loongarch/disas.c
43
@@ -XXX,XX +XXX,XX @@ static void output_vv(DisasContext *ctx, arg_vv *a, const char *mnemonic)
44
output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj);
45
}
23
}
46
24
47
+static void output_vvvv(DisasContext *ctx, arg_vvvv *a, const char *mnemonic)
25
+static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical,
26
+ int *prot, target_ulong address)
48
+{
27
+{
49
+ output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va);
28
+ CPUState *cs = env_cpu(env);
29
+ target_ulong index, phys;
30
+ uint64_t dir_base, dir_width;
31
+ uint64_t base;
32
+ int level;
33
+
34
+ if ((address >> 63) & 0x1) {
35
+ base = env->CSR_PGDH;
36
+ } else {
37
+ base = env->CSR_PGDL;
38
+ }
39
+ base &= TARGET_PHYS_MASK;
40
+
41
+ for (level = 4; level > 0; level--) {
42
+ get_dir_base_width(env, &dir_base, &dir_width, level);
43
+
44
+ if (dir_width == 0) {
45
+ continue;
46
+ }
47
+
48
+ /* get next level page directory */
49
+ index = (address >> dir_base) & ((1 << dir_width) - 1);
50
+ phys = base | index << 3;
51
+ base = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
52
+ if (FIELD_EX64(base, TLBENTRY, HUGE)) {
53
+ /* base is a huge pte */
54
+ break;
55
+ }
56
+ }
57
+
58
+ /* pte */
59
+ if (FIELD_EX64(base, TLBENTRY, HUGE)) {
60
+ /* Huge Page. base is pte */
61
+ base = FIELD_DP64(base, TLBENTRY, LEVEL, 0);
62
+ base = FIELD_DP64(base, TLBENTRY, HUGE, 0);
63
+ if (FIELD_EX64(base, TLBENTRY, HGLOBAL)) {
64
+ base = FIELD_DP64(base, TLBENTRY, HGLOBAL, 0);
65
+ base = FIELD_DP64(base, TLBENTRY, G, 1);
66
+ }
67
+ } else {
68
+ /* Normal Page. base points to pte */
69
+ get_dir_base_width(env, &dir_base, &dir_width, 0);
70
+ index = (address >> dir_base) & ((1 << dir_width) - 1);
71
+ phys = base | index << 3;
72
+ base = ldq_phys(cs->as, phys);
73
+ }
74
+
75
+ /* TODO: check plv and other bits? */
76
+
77
+ /* base is pte, in normal pte format */
78
+ if (!FIELD_EX64(base, TLBENTRY, V)) {
79
+ return TLBRET_NOMATCH;
80
+ }
81
+
82
+ if (!FIELD_EX64(base, TLBENTRY, D)) {
83
+ *prot = PAGE_READ;
84
+ } else {
85
+ *prot = PAGE_READ | PAGE_WRITE;
86
+ }
87
+
88
+ /* get TARGET_PAGE_SIZE aligned physical address */
89
+ base += (address & TARGET_PHYS_MASK) & ((1 << dir_base) - 1);
90
+ /* mask RPLV, NX, NR bits */
91
+ base = FIELD_DP64(base, TLBENTRY_64, RPLV, 0);
92
+ base = FIELD_DP64(base, TLBENTRY_64, NX, 0);
93
+ base = FIELD_DP64(base, TLBENTRY_64, NR, 0);
94
+ /* mask other attribute bits */
95
+ *physical = base & TARGET_PAGE_MASK;
96
+
97
+ return 0;
50
+}
98
+}
51
+
99
+
52
INSN_LSX(vadd_b, vvv)
100
static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
53
INSN_LSX(vadd_h, vvv)
101
int *prot, target_ulong address,
54
INSN_LSX(vadd_w, vvv)
102
- MMUAccessType access_type, int mmu_idx)
55
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vfrstp_b, vvv)
103
+ MMUAccessType access_type, int mmu_idx,
56
INSN_LSX(vfrstp_h, vvv)
104
+ int is_debug)
57
INSN_LSX(vfrstpi_b, vv_i)
105
{
58
INSN_LSX(vfrstpi_h, vv_i)
106
int index, match;
59
+
107
60
+INSN_LSX(vfadd_s, vvv)
108
@@ -XXX,XX +XXX,XX @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
61
+INSN_LSX(vfadd_d, vvv)
109
if (match) {
62
+INSN_LSX(vfsub_s, vvv)
110
return loongarch_map_tlb_entry(env, physical, prot,
63
+INSN_LSX(vfsub_d, vvv)
111
address, access_type, index, mmu_idx);
64
+INSN_LSX(vfmul_s, vvv)
112
+ } else if (is_debug) {
65
+INSN_LSX(vfmul_d, vvv)
113
+ /*
66
+INSN_LSX(vfdiv_s, vvv)
114
+ * For debugger memory access, we want to do the map when there is a
67
+INSN_LSX(vfdiv_d, vvv)
115
+ * legal mapping, even if the mapping is not yet in TLB. return 0 if
68
+
116
+ * there is a valid map, else none zero.
69
+INSN_LSX(vfmadd_s, vvvv)
117
+ */
70
+INSN_LSX(vfmadd_d, vvvv)
118
+ return loongarch_page_table_walker(env, physical, prot, address);
71
+INSN_LSX(vfmsub_s, vvvv)
119
}
72
+INSN_LSX(vfmsub_d, vvvv)
120
73
+INSN_LSX(vfnmadd_s, vvvv)
121
return TLBRET_NOMATCH;
74
+INSN_LSX(vfnmadd_d, vvvv)
122
@@ -XXX,XX +XXX,XX @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
75
+INSN_LSX(vfnmsub_s, vvvv)
123
#else
76
+INSN_LSX(vfnmsub_d, vvvv)
124
static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
77
+
125
int *prot, target_ulong address,
78
+INSN_LSX(vfmax_s, vvv)
126
- MMUAccessType access_type, int mmu_idx)
79
+INSN_LSX(vfmax_d, vvv)
127
+ MMUAccessType access_type, int mmu_idx,
80
+INSN_LSX(vfmin_s, vvv)
128
+ int is_debug)
81
+INSN_LSX(vfmin_d, vvv)
129
{
82
+
130
return TLBRET_NOMATCH;
83
+INSN_LSX(vfmaxa_s, vvv)
84
+INSN_LSX(vfmaxa_d, vvv)
85
+INSN_LSX(vfmina_s, vvv)
86
+INSN_LSX(vfmina_d, vvv)
87
+
88
+INSN_LSX(vflogb_s, vv)
89
+INSN_LSX(vflogb_d, vv)
90
+
91
+INSN_LSX(vfclass_s, vv)
92
+INSN_LSX(vfclass_d, vv)
93
+
94
+INSN_LSX(vfsqrt_s, vv)
95
+INSN_LSX(vfsqrt_d, vv)
96
+INSN_LSX(vfrecip_s, vv)
97
+INSN_LSX(vfrecip_d, vv)
98
+INSN_LSX(vfrsqrt_s, vv)
99
+INSN_LSX(vfrsqrt_d, vv)
100
diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/loongarch/fpu_helper.c
103
+++ b/target/loongarch/fpu_helper.c
104
@@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env)
105
set_flush_to_zero(0, &env->fp_status);
106
}
131
}
107
132
@@ -XXX,XX +XXX,XX @@ static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va,
108
-static int ieee_ex_to_loongarch(int xcpt)
133
109
+int ieee_ex_to_loongarch(int xcpt)
134
int get_physical_address(CPULoongArchState *env, hwaddr *physical,
135
int *prot, target_ulong address,
136
- MMUAccessType access_type, int mmu_idx)
137
+ MMUAccessType access_type, int mmu_idx, int is_debug)
110
{
138
{
111
int ret = 0;
139
int user_mode = mmu_idx == MMU_USER_IDX;
112
if (xcpt & float_flag_invalid) {
140
int kernel_mode = mmu_idx == MMU_KERNEL_IDX;
113
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
141
@@ -XXX,XX +XXX,XX @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical,
114
index XXXXXXX..XXXXXXX 100644
142
115
--- a/target/loongarch/helper.h
143
/* Mapped address */
116
+++ b/target/loongarch/helper.h
144
return loongarch_map_address(env, physical, prot, address,
117
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
145
- access_type, mmu_idx);
118
DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
146
+ access_type, mmu_idx, is_debug);
119
DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
147
}
120
DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
148
121
+
149
hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
122
+DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32)
150
@@ -XXX,XX +XXX,XX @@ hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
123
+DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32)
151
int prot;
124
+DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32)
152
125
+DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32)
153
if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
126
+DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32)
154
- cpu_mmu_index(cs, false)) != 0) {
127
+DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
155
+ cpu_mmu_index(cs, false), 1) != 0) {
128
+DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
156
return -1;
129
+DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
157
}
130
+
158
return phys_addr;
131
+DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32)
132
+DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32)
133
+DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32)
134
+DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32)
135
+DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32)
136
+DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32)
137
+DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32)
138
+DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32)
139
+
140
+DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
141
+DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
142
+DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32)
143
+DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32)
144
+
145
+DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32)
146
+DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32)
147
+DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32)
148
+DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32)
149
+
150
+DEF_HELPER_3(vflogb_s, void, env, i32, i32)
151
+DEF_HELPER_3(vflogb_d, void, env, i32, i32)
152
+
153
+DEF_HELPER_3(vfclass_s, void, env, i32, i32)
154
+DEF_HELPER_3(vfclass_d, void, env, i32, i32)
155
+
156
+DEF_HELPER_3(vfsqrt_s, void, env, i32, i32)
157
+DEF_HELPER_3(vfsqrt_d, void, env, i32, i32)
158
+DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
159
+DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
160
+DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
161
+DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
162
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
163
index XXXXXXX..XXXXXXX 100644
164
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
165
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
166
@@ -XXX,XX +XXX,XX @@
167
#define CHECK_SXE
168
#endif
169
170
+static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
171
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
172
+ TCGv_i32, TCGv_i32))
173
+{
174
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
175
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
176
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
177
+ TCGv_i32 va = tcg_constant_i32(a->va);
178
+
179
+ CHECK_SXE;
180
+ func(cpu_env, vd, vj, vk, va);
181
+ return true;
182
+}
183
+
184
static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
185
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
186
{
187
@@ -XXX,XX +XXX,XX @@ TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b)
188
TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h)
189
TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b)
190
TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h)
191
+
192
+TRANS(vfadd_s, gen_vvv, gen_helper_vfadd_s)
193
+TRANS(vfadd_d, gen_vvv, gen_helper_vfadd_d)
194
+TRANS(vfsub_s, gen_vvv, gen_helper_vfsub_s)
195
+TRANS(vfsub_d, gen_vvv, gen_helper_vfsub_d)
196
+TRANS(vfmul_s, gen_vvv, gen_helper_vfmul_s)
197
+TRANS(vfmul_d, gen_vvv, gen_helper_vfmul_d)
198
+TRANS(vfdiv_s, gen_vvv, gen_helper_vfdiv_s)
199
+TRANS(vfdiv_d, gen_vvv, gen_helper_vfdiv_d)
200
+
201
+TRANS(vfmadd_s, gen_vvvv, gen_helper_vfmadd_s)
202
+TRANS(vfmadd_d, gen_vvvv, gen_helper_vfmadd_d)
203
+TRANS(vfmsub_s, gen_vvvv, gen_helper_vfmsub_s)
204
+TRANS(vfmsub_d, gen_vvvv, gen_helper_vfmsub_d)
205
+TRANS(vfnmadd_s, gen_vvvv, gen_helper_vfnmadd_s)
206
+TRANS(vfnmadd_d, gen_vvvv, gen_helper_vfnmadd_d)
207
+TRANS(vfnmsub_s, gen_vvvv, gen_helper_vfnmsub_s)
208
+TRANS(vfnmsub_d, gen_vvvv, gen_helper_vfnmsub_d)
209
+
210
+TRANS(vfmax_s, gen_vvv, gen_helper_vfmax_s)
211
+TRANS(vfmax_d, gen_vvv, gen_helper_vfmax_d)
212
+TRANS(vfmin_s, gen_vvv, gen_helper_vfmin_s)
213
+TRANS(vfmin_d, gen_vvv, gen_helper_vfmin_d)
214
+
215
+TRANS(vfmaxa_s, gen_vvv, gen_helper_vfmaxa_s)
216
+TRANS(vfmaxa_d, gen_vvv, gen_helper_vfmaxa_d)
217
+TRANS(vfmina_s, gen_vvv, gen_helper_vfmina_s)
218
+TRANS(vfmina_d, gen_vvv, gen_helper_vfmina_d)
219
+
220
+TRANS(vflogb_s, gen_vv, gen_helper_vflogb_s)
221
+TRANS(vflogb_d, gen_vv, gen_helper_vflogb_d)
222
+
223
+TRANS(vfclass_s, gen_vv, gen_helper_vfclass_s)
224
+TRANS(vfclass_d, gen_vv, gen_helper_vfclass_d)
225
+
226
+TRANS(vfsqrt_s, gen_vv, gen_helper_vfsqrt_s)
227
+TRANS(vfsqrt_d, gen_vv, gen_helper_vfsqrt_d)
228
+TRANS(vfrecip_s, gen_vv, gen_helper_vfrecip_s)
229
+TRANS(vfrecip_d, gen_vv, gen_helper_vfrecip_d)
230
+TRANS(vfrsqrt_s, gen_vv, gen_helper_vfrsqrt_s)
231
+TRANS(vfrsqrt_d, gen_vv, gen_helper_vfrsqrt_d)
232
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
233
index XXXXXXX..XXXXXXX 100644
234
--- a/target/loongarch/insns.decode
235
+++ b/target/loongarch/insns.decode
236
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
237
&vv vd vj
238
&vvv vd vj vk
239
&vv_i vd vj imm
240
+&vvvv vd vj vk va
241
242
#
243
# LSX Formats
244
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
245
@vv_ui7 .... ........ ... imm:7 vj:5 vd:5 &vv_i
246
@vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i
247
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
248
+@vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv
249
250
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
251
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
252
@@ -XXX,XX +XXX,XX @@ vfrstp_b 0111 00010010 10110 ..... ..... ..... @vvv
253
vfrstp_h 0111 00010010 10111 ..... ..... ..... @vvv
254
vfrstpi_b 0111 00101001 10100 ..... ..... ..... @vv_ui5
255
vfrstpi_h 0111 00101001 10101 ..... ..... ..... @vv_ui5
256
+
257
+vfadd_s 0111 00010011 00001 ..... ..... ..... @vvv
258
+vfadd_d 0111 00010011 00010 ..... ..... ..... @vvv
259
+vfsub_s 0111 00010011 00101 ..... ..... ..... @vvv
260
+vfsub_d 0111 00010011 00110 ..... ..... ..... @vvv
261
+vfmul_s 0111 00010011 10001 ..... ..... ..... @vvv
262
+vfmul_d 0111 00010011 10010 ..... ..... ..... @vvv
263
+vfdiv_s 0111 00010011 10101 ..... ..... ..... @vvv
264
+vfdiv_d 0111 00010011 10110 ..... ..... ..... @vvv
265
+
266
+vfmadd_s 0000 10010001 ..... ..... ..... ..... @vvvv
267
+vfmadd_d 0000 10010010 ..... ..... ..... ..... @vvvv
268
+vfmsub_s 0000 10010101 ..... ..... ..... ..... @vvvv
269
+vfmsub_d 0000 10010110 ..... ..... ..... ..... @vvvv
270
+vfnmadd_s 0000 10011001 ..... ..... ..... ..... @vvvv
271
+vfnmadd_d 0000 10011010 ..... ..... ..... ..... @vvvv
272
+vfnmsub_s 0000 10011101 ..... ..... ..... ..... @vvvv
273
+vfnmsub_d 0000 10011110 ..... ..... ..... ..... @vvvv
274
+
275
+vfmax_s 0111 00010011 11001 ..... ..... ..... @vvv
276
+vfmax_d 0111 00010011 11010 ..... ..... ..... @vvv
277
+vfmin_s 0111 00010011 11101 ..... ..... ..... @vvv
278
+vfmin_d 0111 00010011 11110 ..... ..... ..... @vvv
279
+
280
+vfmaxa_s 0111 00010100 00001 ..... ..... ..... @vvv
281
+vfmaxa_d 0111 00010100 00010 ..... ..... ..... @vvv
282
+vfmina_s 0111 00010100 00101 ..... ..... ..... @vvv
283
+vfmina_d 0111 00010100 00110 ..... ..... ..... @vvv
284
+
285
+vflogb_s 0111 00101001 11001 10001 ..... ..... @vv
286
+vflogb_d 0111 00101001 11001 10010 ..... ..... @vv
287
+
288
+vfclass_s 0111 00101001 11001 10101 ..... ..... @vv
289
+vfclass_d 0111 00101001 11001 10110 ..... ..... @vv
290
+
291
+vfsqrt_s 0111 00101001 11001 11001 ..... ..... @vv
292
+vfsqrt_d 0111 00101001 11001 11010 ..... ..... @vv
293
+vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv
294
+vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv
295
+vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv
296
+vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv
297
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
159
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
298
index XXXXXXX..XXXXXXX 100644
160
index XXXXXXX..XXXXXXX 100644
299
--- a/target/loongarch/internals.h
161
--- a/target/loongarch/internals.h
300
+++ b/target/loongarch/internals.h
162
+++ b/target/loongarch/internals.h
301
@@ -XXX,XX +XXX,XX @@ void G_NORETURN do_raise_exception(CPULoongArchState *env,
163
@@ -XXX,XX +XXX,XX @@ bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr,
302
164
int *index);
303
const char *loongarch_exception_name(int32_t exception);
165
int get_physical_address(CPULoongArchState *env, hwaddr *physical,
304
166
int *prot, target_ulong address,
305
+int ieee_ex_to_loongarch(int xcpt);
167
- MMUAccessType access_type, int mmu_idx);
306
void restore_fp_status(CPULoongArchState *env);
168
+ MMUAccessType access_type, int mmu_idx, int is_debug);
307
169
+void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
308
#ifndef CONFIG_USER_ONLY
170
+ uint64_t *dir_width, target_ulong level);
309
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
171
hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
172
173
#ifdef CONFIG_TCG
174
diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c
310
index XXXXXXX..XXXXXXX 100644
175
index XXXXXXX..XXXXXXX 100644
311
--- a/target/loongarch/lsx_helper.c
176
--- a/target/loongarch/tcg/tlb_helper.c
312
+++ b/target/loongarch/lsx_helper.c
177
+++ b/target/loongarch/tcg/tlb_helper.c
313
@@ -XXX,XX +XXX,XX @@
178
@@ -XXX,XX +XXX,XX @@
314
#include "cpu.h"
179
#include "exec/log.h"
315
#include "exec/exec-all.h"
180
#include "cpu-csr.h"
316
#include "exec/helper-proto.h"
181
317
+#include "fpu/softfloat.h"
182
-static void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
318
+#include "internals.h"
183
+void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base,
319
184
uint64_t *dir_width, target_ulong level)
320
#define DO_ADD(a, b) (a + b)
185
{
321
#define DO_SUB(a, b) (a - b)
186
switch (level) {
322
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(CPULoongArchState *env, \
187
@@ -XXX,XX +XXX,XX @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
323
188
324
VFRSTPI(vfrstpi_b, 8, B)
189
/* Data access */
325
VFRSTPI(vfrstpi_h, 16, H)
190
ret = get_physical_address(env, &physical, &prot, address,
326
+
191
- access_type, mmu_idx);
327
+static void vec_update_fcsr0_mask(CPULoongArchState *env,
192
+ access_type, mmu_idx, 0);
328
+ uintptr_t pc, int mask)
193
329
+{
194
if (ret == TLBRET_MATCH) {
330
+ int flags = get_float_exception_flags(&env->fp_status);
195
tlb_set_page(cs, address & TARGET_PAGE_MASK,
331
+
332
+ set_float_exception_flags(0, &env->fp_status);
333
+
334
+ flags &= ~mask;
335
+
336
+ if (flags) {
337
+ flags = ieee_ex_to_loongarch(flags);
338
+ UPDATE_FP_CAUSE(env->fcsr0, flags);
339
+ }
340
+
341
+ if (GET_FP_ENABLES(env->fcsr0) & flags) {
342
+ do_raise_exception(env, EXCCODE_FPE, pc);
343
+ } else {
344
+ UPDATE_FP_FLAGS(env->fcsr0, flags);
345
+ }
346
+}
347
+
348
+static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
349
+{
350
+ vec_update_fcsr0_mask(env, pc, 0);
351
+}
352
+
353
+static inline void vec_clear_cause(CPULoongArchState *env)
354
+{
355
+ SET_FP_CAUSE(env->fcsr0, 0);
356
+}
357
+
358
+#define DO_3OP_F(NAME, BIT, E, FN) \
359
+void HELPER(NAME)(CPULoongArchState *env, \
360
+ uint32_t vd, uint32_t vj, uint32_t vk) \
361
+{ \
362
+ int i; \
363
+ VReg *Vd = &(env->fpr[vd].vreg); \
364
+ VReg *Vj = &(env->fpr[vj].vreg); \
365
+ VReg *Vk = &(env->fpr[vk].vreg); \
366
+ \
367
+ vec_clear_cause(env); \
368
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
369
+ Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
370
+ vec_update_fcsr0(env, GETPC()); \
371
+ } \
372
+}
373
+
374
+DO_3OP_F(vfadd_s, 32, UW, float32_add)
375
+DO_3OP_F(vfadd_d, 64, UD, float64_add)
376
+DO_3OP_F(vfsub_s, 32, UW, float32_sub)
377
+DO_3OP_F(vfsub_d, 64, UD, float64_sub)
378
+DO_3OP_F(vfmul_s, 32, UW, float32_mul)
379
+DO_3OP_F(vfmul_d, 64, UD, float64_mul)
380
+DO_3OP_F(vfdiv_s, 32, UW, float32_div)
381
+DO_3OP_F(vfdiv_d, 64, UD, float64_div)
382
+DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
383
+DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
384
+DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
385
+DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
386
+DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
387
+DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
388
+DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
389
+DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
390
+
391
+#define DO_4OP_F(NAME, BIT, E, FN, flags) \
392
+void HELPER(NAME)(CPULoongArchState *env, \
393
+ uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \
394
+{ \
395
+ int i; \
396
+ VReg *Vd = &(env->fpr[vd].vreg); \
397
+ VReg *Vj = &(env->fpr[vj].vreg); \
398
+ VReg *Vk = &(env->fpr[vk].vreg); \
399
+ VReg *Va = &(env->fpr[va].vreg); \
400
+ \
401
+ vec_clear_cause(env); \
402
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
403
+ Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
404
+ vec_update_fcsr0(env, GETPC()); \
405
+ } \
406
+}
407
+
408
+DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
409
+DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
410
+DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
411
+DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
412
+DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
413
+DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
414
+DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
415
+ float_muladd_negate_c | float_muladd_negate_result)
416
+DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
417
+ float_muladd_negate_c | float_muladd_negate_result)
418
+
419
+#define DO_2OP_F(NAME, BIT, E, FN) \
420
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
421
+{ \
422
+ int i; \
423
+ VReg *Vd = &(env->fpr[vd].vreg); \
424
+ VReg *Vj = &(env->fpr[vj].vreg); \
425
+ \
426
+ vec_clear_cause(env); \
427
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
428
+ Vd->E(i) = FN(env, Vj->E(i)); \
429
+ } \
430
+}
431
+
432
+#define FLOGB(BIT, T) \
433
+static T do_flogb_## BIT(CPULoongArchState *env, T fj) \
434
+{ \
435
+ T fp, fd; \
436
+ float_status *status = &env->fp_status; \
437
+ FloatRoundMode old_mode = get_float_rounding_mode(status); \
438
+ \
439
+ set_float_rounding_mode(float_round_down, status); \
440
+ fp = float ## BIT ##_log2(fj, status); \
441
+ fd = float ## BIT ##_round_to_int(fp, status); \
442
+ set_float_rounding_mode(old_mode, status); \
443
+ vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \
444
+ return fd; \
445
+}
446
+
447
+FLOGB(32, uint32_t)
448
+FLOGB(64, uint64_t)
449
+
450
+#define FCLASS(NAME, BIT, E, FN) \
451
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
452
+{ \
453
+ int i; \
454
+ VReg *Vd = &(env->fpr[vd].vreg); \
455
+ VReg *Vj = &(env->fpr[vj].vreg); \
456
+ \
457
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
458
+ Vd->E(i) = FN(env, Vj->E(i)); \
459
+ } \
460
+}
461
+
462
+FCLASS(vfclass_s, 32, UW, helper_fclass_s)
463
+FCLASS(vfclass_d, 64, UD, helper_fclass_d)
464
+
465
+#define FSQRT(BIT, T) \
466
+static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
467
+{ \
468
+ T fd; \
469
+ fd = float ## BIT ##_sqrt(fj, &env->fp_status); \
470
+ vec_update_fcsr0(env, GETPC()); \
471
+ return fd; \
472
+}
473
+
474
+FSQRT(32, uint32_t)
475
+FSQRT(64, uint64_t)
476
+
477
+#define FRECIP(BIT, T) \
478
+static T do_frecip_## BIT(CPULoongArchState *env, T fj) \
479
+{ \
480
+ T fd; \
481
+ fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
482
+ vec_update_fcsr0(env, GETPC()); \
483
+ return fd; \
484
+}
485
+
486
+FRECIP(32, uint32_t)
487
+FRECIP(64, uint64_t)
488
+
489
+#define FRSQRT(BIT, T) \
490
+static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \
491
+{ \
492
+ T fd, fp; \
493
+ fp = float ## BIT ##_sqrt(fj, &env->fp_status); \
494
+ fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
495
+ vec_update_fcsr0(env, GETPC()); \
496
+ return fd; \
497
+}
498
+
499
+FRSQRT(32, uint32_t)
500
+FRSQRT(64, uint64_t)
501
+
502
+DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
503
+DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
504
+DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
505
+DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
506
+DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
507
+DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
508
+DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
509
+DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
510
--
196
--
511
2.31.1
197
2.43.5
diff view generated by jsdifflib
1
This patch includes:
1
Add realize interface for loongarch ipi device.
2
- VBITSEL.V;
3
- VBITSELI.B;
4
- VSET{EQZ/NEZ}.V;
5
- VSETANYEQZ.{B/H/W/D};
6
- VSETALLNEZ.{B/H/W/D}.
7
2
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
9
Signed-off-by: Song Gao <gaosong@loongson.cn>
4
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
10
Message-Id: <20230504122810.4094787-38-gaosong@loongson.cn>
11
---
5
---
12
target/loongarch/disas.c | 20 ++++++
6
hw/intc/loongarch_ipi.c | 19 +++++++++++++++++++
13
target/loongarch/helper.h | 11 +++
7
include/hw/intc/loongarch_ipi.h | 1 +
14
target/loongarch/insn_trans/trans_lsx.c.inc | 74 +++++++++++++++++++++
8
2 files changed, 20 insertions(+)
15
target/loongarch/insns.decode | 17 +++++
16
target/loongarch/lsx_helper.c | 52 +++++++++++++++
17
5 files changed, 174 insertions(+)
18
9
19
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
10
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/target/loongarch/disas.c
12
--- a/hw/intc/loongarch_ipi.c
22
+++ b/target/loongarch/disas.c
13
+++ b/hw/intc/loongarch_ipi.c
23
@@ -XXX,XX +XXX,XX @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
14
@@ -XXX,XX +XXX,XX @@
24
return true; \
15
16
#include "qemu/osdep.h"
17
#include "hw/boards.h"
18
+#include "qapi/error.h"
19
#include "hw/intc/loongarch_ipi.h"
20
#include "target/loongarch/cpu.h"
21
22
@@ -XXX,XX +XXX,XX @@ static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id)
23
return NULL;
25
}
24
}
26
25
27
+static void output_cv(DisasContext *ctx, arg_cv *a,
26
+static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
28
+ const char *mnemonic)
29
+{
27
+{
30
+ output(ctx, mnemonic, "fcc%d, v%d", a->cd, a->vj);
28
+ LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev);
31
+}
29
+ Error *local_err = NULL;
32
+
30
+
33
static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic)
31
+ lic->parent_realize(dev, &local_err);
34
{
32
+ if (local_err) {
35
output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
33
+ error_propagate(errp, local_err);
36
@@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, \
34
+ return;
37
38
LSX_FCMP_INSN(s)
39
LSX_FCMP_INSN(d)
40
+
41
+INSN_LSX(vbitsel_v, vvvv)
42
+INSN_LSX(vbitseli_b, vv_i)
43
+
44
+INSN_LSX(vseteqz_v, cv)
45
+INSN_LSX(vsetnez_v, cv)
46
+INSN_LSX(vsetanyeqz_b, cv)
47
+INSN_LSX(vsetanyeqz_h, cv)
48
+INSN_LSX(vsetanyeqz_w, cv)
49
+INSN_LSX(vsetanyeqz_d, cv)
50
+INSN_LSX(vsetallnez_b, cv)
51
+INSN_LSX(vsetallnez_h, cv)
52
+INSN_LSX(vsetallnez_w, cv)
53
+INSN_LSX(vsetallnez_d, cv)
54
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/loongarch/helper.h
57
+++ b/target/loongarch/helper.h
58
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
59
DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
60
DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
61
DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
62
+
63
+DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
64
+
65
+DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32)
66
+DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32)
67
+DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32)
68
+DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32)
69
+DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
70
+DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
71
+DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
72
+DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
73
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
76
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
77
@@ -XXX,XX +XXX,XX @@ static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
78
return true;
79
}
80
81
+static bool gen_cv(DisasContext *ctx, arg_cv *a,
82
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
83
+{
84
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
85
+ TCGv_i32 cd = tcg_constant_i32(a->cd);
86
+
87
+ CHECK_SXE;
88
+ func(cpu_env, cd, vj);
89
+ return true;
90
+}
91
+
92
static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
93
void (*func)(unsigned, uint32_t, uint32_t,
94
uint32_t, uint32_t, uint32_t))
95
@@ -XXX,XX +XXX,XX @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
96
97
return true;
98
}
99
+
100
+static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
101
+{
102
+ CHECK_SXE;
103
+
104
+ tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
105
+ vec_full_offset(a->vk), vec_full_offset(a->vj),
106
+ 16, ctx->vl/8);
107
+ return true;
108
+}
109
+
110
+static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
111
+{
112
+ tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
113
+}
114
+
115
+static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
116
+{
117
+ static const GVecGen2i op = {
118
+ .fniv = gen_vbitseli,
119
+ .fnoi = gen_helper_vbitseli_b,
120
+ .vece = MO_8,
121
+ .load_dest = true
122
+ };
123
+
124
+ CHECK_SXE;
125
+
126
+ tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
127
+ 16, ctx->vl/8, a->imm, &op);
128
+ return true;
129
+}
130
+
131
+#define VSET(NAME, COND) \
132
+static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
133
+{ \
134
+ TCGv_i64 t1, al, ah; \
135
+ \
136
+ al = tcg_temp_new_i64(); \
137
+ ah = tcg_temp_new_i64(); \
138
+ t1 = tcg_temp_new_i64(); \
139
+ \
140
+ get_vreg64(ah, a->vj, 1); \
141
+ get_vreg64(al, a->vj, 0); \
142
+ \
143
+ CHECK_SXE; \
144
+ tcg_gen_or_i64(t1, al, ah); \
145
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
146
+ tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
147
+ \
148
+ return true; \
149
+}
150
+
151
+VSET(vseteqz_v, TCG_COND_EQ)
152
+VSET(vsetnez_v, TCG_COND_NE)
153
+
154
+TRANS(vsetanyeqz_b, gen_cv, gen_helper_vsetanyeqz_b)
155
+TRANS(vsetanyeqz_h, gen_cv, gen_helper_vsetanyeqz_h)
156
+TRANS(vsetanyeqz_w, gen_cv, gen_helper_vsetanyeqz_w)
157
+TRANS(vsetanyeqz_d, gen_cv, gen_helper_vsetanyeqz_d)
158
+TRANS(vsetallnez_b, gen_cv, gen_helper_vsetallnez_b)
159
+TRANS(vsetallnez_h, gen_cv, gen_helper_vsetallnez_h)
160
+TRANS(vsetallnez_w, gen_cv, gen_helper_vsetallnez_w)
161
+TRANS(vsetallnez_d, gen_cv, gen_helper_vsetallnez_d)
162
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
163
index XXXXXXX..XXXXXXX 100644
164
--- a/target/loongarch/insns.decode
165
+++ b/target/loongarch/insns.decode
166
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
167
#
168
169
&vv vd vj
170
+&cv cd vj
171
&vvv vd vj vk
172
&vv_i vd vj imm
173
&vvvv vd vj vk va
174
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
175
# LSX Formats
176
#
177
@vv .... ........ ..... ..... vj:5 vd:5 &vv
178
+@cv .... ........ ..... ..... vj:5 .. cd:3 &cv
179
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
180
@vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i
181
@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
182
@@ -XXX,XX +XXX,XX @@ vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5
183
184
vfcmp_cond_s 0000 11000101 ..... ..... ..... ..... @vvv_fcond
185
vfcmp_cond_d 0000 11000110 ..... ..... ..... ..... @vvv_fcond
186
+
187
+vbitsel_v 0000 11010001 ..... ..... ..... ..... @vvvv
188
+
189
+vbitseli_b 0111 00111100 01 ........ ..... ..... @vv_ui8
190
+
191
+vseteqz_v 0111 00101001 11001 00110 ..... 00 ... @cv
192
+vsetnez_v 0111 00101001 11001 00111 ..... 00 ... @cv
193
+vsetanyeqz_b 0111 00101001 11001 01000 ..... 00 ... @cv
194
+vsetanyeqz_h 0111 00101001 11001 01001 ..... 00 ... @cv
195
+vsetanyeqz_w 0111 00101001 11001 01010 ..... 00 ... @cv
196
+vsetanyeqz_d 0111 00101001 11001 01011 ..... 00 ... @cv
197
+vsetallnez_b 0111 00101001 11001 01100 ..... 00 ... @cv
198
+vsetallnez_h 0111 00101001 11001 01101 ..... 00 ... @cv
199
+vsetallnez_w 0111 00101001 11001 01110 ..... 00 ... @cv
200
+vsetallnez_d 0111 00101001 11001 01111 ..... 00 ... @cv
201
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/target/loongarch/lsx_helper.c
204
+++ b/target/loongarch/lsx_helper.c
205
@@ -XXX,XX +XXX,XX @@
206
#include "exec/helper-proto.h"
207
#include "fpu/softfloat.h"
208
#include "internals.h"
209
+#include "tcg/tcg.h"
210
211
#define DO_ADD(a, b) (a + b)
212
#define DO_SUB(a, b) (a - b)
213
@@ -XXX,XX +XXX,XX @@ VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
214
VFCMP(vfcmp_s_s, 32, UW, float32_compare)
215
VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
216
VFCMP(vfcmp_s_d, 64, UD, float64_compare)
217
+
218
+void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
219
+{
220
+ int i;
221
+ VReg *Vd = (VReg *)vd;
222
+ VReg *Vj = (VReg *)vj;
223
+
224
+ for (i = 0; i < 16; i++) {
225
+ Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
226
+ }
35
+ }
227
+}
36
+}
228
+
37
+
229
+/* Copy from target/arm/tcg/sve_helper.c */
38
static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
230
+static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
39
{
231
+{
40
LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
232
+ uint64_t bits = 8 << esz;
41
+ LoongarchIPIClass *lic = LOONGARCH_IPI_CLASS(klass);
233
+ uint64_t ones = dup_const(esz, 1);
42
+ DeviceClass *dc = DEVICE_CLASS(klass);
234
+ uint64_t signs = ones << (bits - 1);
43
235
+ uint64_t cmp0, cmp1;
44
+ device_class_set_parent_realize(dc, loongarch_ipi_realize,
236
+
45
+ &lic->parent_realize);
237
+ cmp1 = dup_const(esz, n);
46
licc->get_iocsr_as = get_iocsr_as;
238
+ cmp0 = cmp1 ^ m0;
47
licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
239
+ cmp1 = cmp1 ^ m1;
48
}
240
+ cmp0 = (cmp0 - ones) & ~cmp0;
49
@@ -XXX,XX +XXX,XX @@ static const TypeInfo loongarch_ipi_types[] = {
241
+ cmp1 = (cmp1 - ones) & ~cmp1;
50
{
242
+ return (cmp0 | cmp1) & signs;
51
.name = TYPE_LOONGARCH_IPI,
243
+}
52
.parent = TYPE_LOONGSON_IPI_COMMON,
244
+
53
+ .instance_size = sizeof(LoongarchIPIState),
245
+#define SETANYEQZ(NAME, MO) \
54
+ .class_size = sizeof(LoongarchIPIClass),
246
+void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
55
.class_init = loongarch_ipi_class_init,
247
+{ \
56
}
248
+ VReg *Vj = &(env->fpr[vj].vreg); \
57
};
249
+ \
58
diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h
250
+ env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
59
index XXXXXXX..XXXXXXX 100644
251
+}
60
--- a/include/hw/intc/loongarch_ipi.h
252
+SETANYEQZ(vsetanyeqz_b, MO_8)
61
+++ b/include/hw/intc/loongarch_ipi.h
253
+SETANYEQZ(vsetanyeqz_h, MO_16)
62
@@ -XXX,XX +XXX,XX @@ struct LoongarchIPIState {
254
+SETANYEQZ(vsetanyeqz_w, MO_32)
63
255
+SETANYEQZ(vsetanyeqz_d, MO_64)
64
struct LoongarchIPIClass {
256
+
65
LoongsonIPICommonClass parent_class;
257
+#define SETALLNEZ(NAME, MO) \
66
+ DeviceRealize parent_realize;
258
+void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
67
};
259
+{ \
68
260
+ VReg *Vj = &(env->fpr[vj].vreg); \
69
#endif
261
+ \
262
+ env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
263
+}
264
+SETALLNEZ(vsetallnez_b, MO_8)
265
+SETALLNEZ(vsetallnez_h, MO_16)
266
+SETALLNEZ(vsetallnez_w, MO_32)
267
+SETALLNEZ(vsetallnez_d, MO_64)
268
--
70
--
269
2.31.1
71
2.43.5
diff view generated by jsdifflib
1
This patch includes:
1
With mips64 loongson ipi, num_cpu property is used. With loongarch
2
- VREPLVE[I].{B/H/W/D};
2
ipi, num_cpu can be acquired from possible_cpu_arch_ids.
3
- VBSLL.V, VBSRL.V;
4
- VPACK{EV/OD}.{B/H/W/D};
5
- VPICK{EV/OD}.{B/H/W/D}.
6
3
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Here remove num_cpu setting from loongson_ipi_common, and this piece
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
of code is put into loongson and loongarch ipi separately.
9
Message-Id: <20230504122810.4094787-40-gaosong@loongson.cn>
6
7
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
8
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
10
---
9
---
11
target/loongarch/disas.c | 35 +++++
10
hw/intc/loongarch_ipi.c | 13 +++++++++++++
12
target/loongarch/helper.h | 18 +++
11
hw/intc/loongson_ipi.c | 14 +++++++++++++-
13
target/loongarch/insn_trans/trans_lsx.c.inc | 144 ++++++++++++++++++++
12
hw/intc/loongson_ipi_common.c | 14 --------------
14
target/loongarch/insns.decode | 34 +++++
13
3 files changed, 26 insertions(+), 15 deletions(-)
15
target/loongarch/lsx_helper.c | 88 ++++++++++++
16
5 files changed, 319 insertions(+)
17
14
18
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
15
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/target/loongarch/disas.c
17
--- a/hw/intc/loongarch_ipi.c
21
+++ b/target/loongarch/disas.c
18
+++ b/hw/intc/loongarch_ipi.c
22
@@ -XXX,XX +XXX,XX @@ static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic)
19
@@ -XXX,XX +XXX,XX @@ static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id)
23
output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj);
20
24
}
21
static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
25
22
{
26
+static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
23
+ LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev);
27
+{
24
LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev);
28
+ output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
25
Error *local_err = NULL;
29
+}
26
+ int i;
27
28
lic->parent_realize(dev, &local_err);
29
if (local_err) {
30
error_propagate(errp, local_err);
31
return;
32
}
30
+
33
+
31
INSN_LSX(vadd_b, vvv)
34
+ if (lics->num_cpu == 0) {
32
INSN_LSX(vadd_h, vvv)
35
+ error_setg(errp, "num-cpu must be at least 1");
33
INSN_LSX(vadd_w, vvv)
36
+ return;
34
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vreplgr2vr_b, vr)
35
INSN_LSX(vreplgr2vr_h, vr)
36
INSN_LSX(vreplgr2vr_w, vr)
37
INSN_LSX(vreplgr2vr_d, vr)
38
+
39
+INSN_LSX(vreplve_b, vvr)
40
+INSN_LSX(vreplve_h, vvr)
41
+INSN_LSX(vreplve_w, vvr)
42
+INSN_LSX(vreplve_d, vvr)
43
+INSN_LSX(vreplvei_b, vv_i)
44
+INSN_LSX(vreplvei_h, vv_i)
45
+INSN_LSX(vreplvei_w, vv_i)
46
+INSN_LSX(vreplvei_d, vv_i)
47
+
48
+INSN_LSX(vbsll_v, vv_i)
49
+INSN_LSX(vbsrl_v, vv_i)
50
+
51
+INSN_LSX(vpackev_b, vvv)
52
+INSN_LSX(vpackev_h, vvv)
53
+INSN_LSX(vpackev_w, vvv)
54
+INSN_LSX(vpackev_d, vvv)
55
+INSN_LSX(vpackod_b, vvv)
56
+INSN_LSX(vpackod_h, vvv)
57
+INSN_LSX(vpackod_w, vvv)
58
+INSN_LSX(vpackod_d, vvv)
59
+
60
+INSN_LSX(vpickev_b, vvv)
61
+INSN_LSX(vpickev_h, vvv)
62
+INSN_LSX(vpickev_w, vvv)
63
+INSN_LSX(vpickev_d, vvv)
64
+INSN_LSX(vpickod_b, vvv)
65
+INSN_LSX(vpickod_h, vvv)
66
+INSN_LSX(vpickod_w, vvv)
67
+INSN_LSX(vpickod_d, vvv)
68
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/helper.h
71
+++ b/target/loongarch/helper.h
72
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
73
DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
74
DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
75
DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
76
+
77
+DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
78
+DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
79
+DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
80
+DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
81
+DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
82
+DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
83
+DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
84
+DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
85
+
86
+DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
87
+DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
88
+DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
89
+DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
90
+DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
91
+DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
92
+DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
93
+DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
94
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
95
index XXXXXXX..XXXXXXX 100644
96
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
97
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
98
@@ -XXX,XX +XXX,XX @@ TRANS(vreplgr2vr_b, gvec_dup, MO_8)
99
TRANS(vreplgr2vr_h, gvec_dup, MO_16)
100
TRANS(vreplgr2vr_w, gvec_dup, MO_32)
101
TRANS(vreplgr2vr_d, gvec_dup, MO_64)
102
+
103
+static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
104
+{
105
+ CHECK_SXE;
106
+ tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
107
+ offsetof(CPULoongArchState,
108
+ fpr[a->vj].vreg.B((a->imm))),
109
+ 16, ctx->vl/8);
110
+ return true;
111
+}
112
+
113
+static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
114
+{
115
+ CHECK_SXE;
116
+ tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
117
+ offsetof(CPULoongArchState,
118
+ fpr[a->vj].vreg.H((a->imm))),
119
+ 16, ctx->vl/8);
120
+ return true;
121
+}
122
+static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
123
+{
124
+ CHECK_SXE;
125
+ tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
126
+ offsetof(CPULoongArchState,
127
+ fpr[a->vj].vreg.W((a->imm))),
128
+ 16, ctx->vl/8);
129
+ return true;
130
+}
131
+static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
132
+{
133
+ CHECK_SXE;
134
+ tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
135
+ offsetof(CPULoongArchState,
136
+ fpr[a->vj].vreg.D((a->imm))),
137
+ 16, ctx->vl/8);
138
+ return true;
139
+}
140
+
141
+static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
142
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
143
+{
144
+ TCGv_i64 t0 = tcg_temp_new_i64();
145
+ TCGv_ptr t1 = tcg_temp_new_ptr();
146
+ TCGv_i64 t2 = tcg_temp_new_i64();
147
+
148
+ CHECK_SXE;
149
+
150
+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
151
+ tcg_gen_shli_i64(t0, t0, vece);
152
+ if (HOST_BIG_ENDIAN) {
153
+ tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
154
+ }
37
+ }
155
+
38
+
156
+ tcg_gen_trunc_i64_ptr(t1, t0);
39
+ lics->cpu = g_new0(IPICore, lics->num_cpu);
157
+ tcg_gen_add_ptr(t1, t1, cpu_env);
40
+ for (i = 0; i < lics->num_cpu; i++) {
158
+ func(t2, t1, vec_full_offset(a->vj));
41
+ lics->cpu[i].ipi = lics;
159
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
42
+ qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1);
160
+
43
+ }
161
+ return true;
44
}
162
+}
45
163
+
46
static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
164
+TRANS(vreplve_b, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
47
diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c
165
+TRANS(vreplve_h, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
48
index XXXXXXX..XXXXXXX 100644
166
+TRANS(vreplve_w, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
49
--- a/hw/intc/loongson_ipi.c
167
+TRANS(vreplve_d, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
50
+++ b/hw/intc/loongson_ipi.c
168
+
51
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp)
169
+static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
52
LoongsonIPIClass *lic = LOONGSON_IPI_GET_CLASS(dev);
170
+{
53
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
171
+ int ofs;
54
Error *local_err = NULL;
172
+ TCGv_i64 desthigh, destlow, high, low;
55
+ int i;
173
+
56
174
+ CHECK_SXE;
57
lic->parent_realize(dev, &local_err);
175
+
58
if (local_err) {
176
+ desthigh = tcg_temp_new_i64();
59
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp)
177
+ destlow = tcg_temp_new_i64();
60
return;
178
+ high = tcg_temp_new_i64();
61
}
179
+ low = tcg_temp_new_i64();
62
180
+
63
+ if (sc->num_cpu == 0) {
181
+ get_vreg64(low, a->vj, 0);
64
+ error_setg(errp, "num-cpu must be at least 1");
182
+
65
+ return;
183
+ ofs = ((a->imm) & 0xf) * 8;
184
+ if (ofs < 64) {
185
+ get_vreg64(high, a->vj, 1);
186
+ tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
187
+ tcg_gen_shli_i64(destlow, low, ofs);
188
+ } else {
189
+ tcg_gen_shli_i64(desthigh, low, ofs - 64);
190
+ destlow = tcg_constant_i64(0);
191
+ }
66
+ }
192
+
67
+
193
+ set_vreg64(desthigh, a->vd, 1);
68
+ sc->cpu = g_new0(IPICore, sc->num_cpu);
194
+ set_vreg64(destlow, a->vd, 0);
69
+ for (i = 0; i < sc->num_cpu; i++) {
195
+
70
+ sc->cpu[i].ipi = sc;
196
+ return true;
71
+ qdev_init_gpio_out(dev, &sc->cpu[i].irq, 1);
197
+}
198
+
199
+static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
200
+{
201
+ TCGv_i64 desthigh, destlow, high, low;
202
+ int ofs;
203
+
204
+ CHECK_SXE;
205
+
206
+ desthigh = tcg_temp_new_i64();
207
+ destlow = tcg_temp_new_i64();
208
+ high = tcg_temp_new_i64();
209
+ low = tcg_temp_new_i64();
210
+
211
+ get_vreg64(high, a->vj, 1);
212
+
213
+ ofs = ((a->imm) & 0xf) * 8;
214
+ if (ofs < 64) {
215
+ get_vreg64(low, a->vj, 0);
216
+ tcg_gen_extract2_i64(destlow, low, high, ofs);
217
+ tcg_gen_shri_i64(desthigh, high, ofs);
218
+ } else {
219
+ tcg_gen_shri_i64(destlow, high, ofs - 64);
220
+ desthigh = tcg_constant_i64(0);
221
+ }
72
+ }
222
+
73
+
223
+ set_vreg64(desthigh, a->vd, 1);
74
s->ipi_mmio_mem = g_new0(MemoryRegion, sc->num_cpu);
224
+ set_vreg64(destlow, a->vd, 0);
75
- for (unsigned i = 0; i < sc->num_cpu; i++) {
225
+
76
+ for (i = 0; i < sc->num_cpu; i++) {
226
+ return true;
77
g_autofree char *name = g_strdup_printf("loongson_ipi_cpu%d_mmio", i);
227
+}
78
228
+
79
memory_region_init_io(&s->ipi_mmio_mem[i], OBJECT(dev),
229
+TRANS(vpackev_b, gen_vvv, gen_helper_vpackev_b)
80
diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c
230
+TRANS(vpackev_h, gen_vvv, gen_helper_vpackev_h)
231
+TRANS(vpackev_w, gen_vvv, gen_helper_vpackev_w)
232
+TRANS(vpackev_d, gen_vvv, gen_helper_vpackev_d)
233
+TRANS(vpackod_b, gen_vvv, gen_helper_vpackod_b)
234
+TRANS(vpackod_h, gen_vvv, gen_helper_vpackod_h)
235
+TRANS(vpackod_w, gen_vvv, gen_helper_vpackod_w)
236
+TRANS(vpackod_d, gen_vvv, gen_helper_vpackod_d)
237
+
238
+TRANS(vpickev_b, gen_vvv, gen_helper_vpickev_b)
239
+TRANS(vpickev_h, gen_vvv, gen_helper_vpickev_h)
240
+TRANS(vpickev_w, gen_vvv, gen_helper_vpickev_w)
241
+TRANS(vpickev_d, gen_vvv, gen_helper_vpickev_d)
242
+TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b)
243
+TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h)
244
+TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w)
245
+TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d)
246
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
247
index XXXXXXX..XXXXXXX 100644
81
index XXXXXXX..XXXXXXX 100644
248
--- a/target/loongarch/insns.decode
82
--- a/hw/intc/loongson_ipi_common.c
249
+++ b/target/loongarch/insns.decode
83
+++ b/hw/intc/loongson_ipi_common.c
250
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
84
@@ -XXX,XX +XXX,XX @@
251
&vr_i vd rj imm
85
#include "hw/intc/loongson_ipi_common.h"
252
&rv_i rd vj imm
86
#include "hw/irq.h"
253
&vr vd rj
87
#include "hw/qdev-properties.h"
254
+&vvr vd vj rk
88
-#include "qapi/error.h"
255
89
#include "qemu/log.h"
256
#
90
#include "migration/vmstate.h"
257
# LSX Formats
91
#include "trace.h"
258
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
92
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp)
259
@vv .... ........ ..... ..... vj:5 vd:5 &vv
93
{
260
@cv .... ........ ..... ..... vj:5 .. cd:3 &cv
94
LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev);
261
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
95
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
262
+@vv_ui1 .... ........ ..... .... imm:1 vj:5 vd:5 &vv_i
96
- int i;
263
+@vv_ui2 .... ........ ..... ... imm:2 vj:5 vd:5 &vv_i
97
-
264
@vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i
98
- if (s->num_cpu == 0) {
265
@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
99
- error_setg(errp, "num-cpu must be at least 1");
266
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
100
- return;
267
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
101
- }
268
@rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i
102
269
@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i
103
memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev),
270
@vr .... ........ ..... ..... rj:5 vd:5 &vr
104
&loongson_ipi_iocsr_ops,
271
+@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr
105
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp)
272
106
&loongson_ipi64_ops,
273
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
107
s, "loongson_ipi64_iocsr", 0x118);
274
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
108
sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem);
275
@@ -XXX,XX +XXX,XX @@ vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr
109
-
276
vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr
110
- s->cpu = g_new0(IPICore, s->num_cpu);
277
vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr
111
- for (i = 0; i < s->num_cpu; i++) {
278
vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr
112
- s->cpu[i].ipi = s;
279
+
113
-
280
+vreplve_b 0111 00010010 00100 ..... ..... ..... @vvr
114
- qdev_init_gpio_out(dev, &s->cpu[i].irq, 1);
281
+vreplve_h 0111 00010010 00101 ..... ..... ..... @vvr
115
- }
282
+vreplve_w 0111 00010010 00110 ..... ..... ..... @vvr
116
}
283
+vreplve_d 0111 00010010 00111 ..... ..... ..... @vvr
117
284
+vreplvei_b 0111 00101111 01111 0 .... ..... ..... @vv_ui4
118
static void loongson_ipi_common_unrealize(DeviceState *dev)
285
+vreplvei_h 0111 00101111 01111 10 ... ..... ..... @vv_ui3
286
+vreplvei_w 0111 00101111 01111 110 .. ..... ..... @vv_ui2
287
+vreplvei_d 0111 00101111 01111 1110 . ..... ..... @vv_ui1
288
+
289
+vbsll_v 0111 00101000 11100 ..... ..... ..... @vv_ui5
290
+vbsrl_v 0111 00101000 11101 ..... ..... ..... @vv_ui5
291
+
292
+vpackev_b 0111 00010001 01100 ..... ..... ..... @vvv
293
+vpackev_h 0111 00010001 01101 ..... ..... ..... @vvv
294
+vpackev_w 0111 00010001 01110 ..... ..... ..... @vvv
295
+vpackev_d 0111 00010001 01111 ..... ..... ..... @vvv
296
+vpackod_b 0111 00010001 10000 ..... ..... ..... @vvv
297
+vpackod_h 0111 00010001 10001 ..... ..... ..... @vvv
298
+vpackod_w 0111 00010001 10010 ..... ..... ..... @vvv
299
+vpackod_d 0111 00010001 10011 ..... ..... ..... @vvv
300
+
301
+vpickev_b 0111 00010001 11100 ..... ..... ..... @vvv
302
+vpickev_h 0111 00010001 11101 ..... ..... ..... @vvv
303
+vpickev_w 0111 00010001 11110 ..... ..... ..... @vvv
304
+vpickev_d 0111 00010001 11111 ..... ..... ..... @vvv
305
+vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv
306
+vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv
307
+vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv
308
+vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv
309
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
310
index XXXXXXX..XXXXXXX 100644
311
--- a/target/loongarch/lsx_helper.c
312
+++ b/target/loongarch/lsx_helper.c
313
@@ -XXX,XX +XXX,XX @@ SETALLNEZ(vsetallnez_b, MO_8)
314
SETALLNEZ(vsetallnez_h, MO_16)
315
SETALLNEZ(vsetallnez_w, MO_32)
316
SETALLNEZ(vsetallnez_d, MO_64)
317
+
318
+#define VPACKEV(NAME, BIT, E) \
319
+void HELPER(NAME)(CPULoongArchState *env, \
320
+ uint32_t vd, uint32_t vj, uint32_t vk) \
321
+{ \
322
+ int i; \
323
+ VReg temp; \
324
+ VReg *Vd = &(env->fpr[vd].vreg); \
325
+ VReg *Vj = &(env->fpr[vj].vreg); \
326
+ VReg *Vk = &(env->fpr[vk].vreg); \
327
+ \
328
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
329
+ temp.E(2 * i + 1) = Vj->E(2 * i); \
330
+ temp.E(2 *i) = Vk->E(2 * i); \
331
+ } \
332
+ *Vd = temp; \
333
+}
334
+
335
+VPACKEV(vpackev_b, 16, B)
336
+VPACKEV(vpackev_h, 32, H)
337
+VPACKEV(vpackev_w, 64, W)
338
+VPACKEV(vpackev_d, 128, D)
339
+
340
+#define VPACKOD(NAME, BIT, E) \
341
+void HELPER(NAME)(CPULoongArchState *env, \
342
+ uint32_t vd, uint32_t vj, uint32_t vk) \
343
+{ \
344
+ int i; \
345
+ VReg temp; \
346
+ VReg *Vd = &(env->fpr[vd].vreg); \
347
+ VReg *Vj = &(env->fpr[vj].vreg); \
348
+ VReg *Vk = &(env->fpr[vk].vreg); \
349
+ \
350
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
351
+ temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
352
+ temp.E(2 * i) = Vk->E(2 * i + 1); \
353
+ } \
354
+ *Vd = temp; \
355
+}
356
+
357
+VPACKOD(vpackod_b, 16, B)
358
+VPACKOD(vpackod_h, 32, H)
359
+VPACKOD(vpackod_w, 64, W)
360
+VPACKOD(vpackod_d, 128, D)
361
+
362
+#define VPICKEV(NAME, BIT, E) \
363
+void HELPER(NAME)(CPULoongArchState *env, \
364
+ uint32_t vd, uint32_t vj, uint32_t vk) \
365
+{ \
366
+ int i; \
367
+ VReg temp; \
368
+ VReg *Vd = &(env->fpr[vd].vreg); \
369
+ VReg *Vj = &(env->fpr[vj].vreg); \
370
+ VReg *Vk = &(env->fpr[vk].vreg); \
371
+ \
372
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
373
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
374
+ temp.E(i) = Vk->E(2 * i); \
375
+ } \
376
+ *Vd = temp; \
377
+}
378
+
379
+VPICKEV(vpickev_b, 16, B)
380
+VPICKEV(vpickev_h, 32, H)
381
+VPICKEV(vpickev_w, 64, W)
382
+VPICKEV(vpickev_d, 128, D)
383
+
384
+#define VPICKOD(NAME, BIT, E) \
385
+void HELPER(NAME)(CPULoongArchState *env, \
386
+ uint32_t vd, uint32_t vj, uint32_t vk) \
387
+{ \
388
+ int i; \
389
+ VReg temp; \
390
+ VReg *Vd = &(env->fpr[vd].vreg); \
391
+ VReg *Vj = &(env->fpr[vj].vreg); \
392
+ VReg *Vk = &(env->fpr[vk].vreg); \
393
+ \
394
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
395
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
396
+ temp.E(i) = Vk->E(2 * i + 1); \
397
+ } \
398
+ *Vd = temp; \
399
+}
400
+
401
+VPICKOD(vpickod_b, 16, B)
402
+VPICKOD(vpickod_h, 32, H)
403
+VPICKOD(vpickod_w, 64, W)
404
+VPICKOD(vpickod_d, 128, D)
405
--
119
--
406
2.31.1
120
2.43.5
diff view generated by jsdifflib
1
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
1
With mips64 loongson ipi, num_cpu property is used. With loongarch
2
Signed-off-by: Song Gao <gaosong@loongson.cn>
2
ipi, num_cpu can be acquired from possible_cpu_arch_ids.
3
Message-Id: <20230504122810.4094787-2-gaosong@loongson.cn>
3
4
Here remove property num_cpu from loongson_ipi_common, and put it into
5
loongson and loongarch ipi separately.
6
7
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
8
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
4
---
9
---
5
linux-user/loongarch64/signal.c | 4 +-
10
hw/intc/loongarch_ipi.c | 6 ++++++
6
target/loongarch/cpu.c | 2 +-
11
hw/intc/loongson_ipi.c | 6 ++++++
7
target/loongarch/cpu.h | 21 ++++++++-
12
hw/intc/loongson_ipi_common.c | 6 ------
8
target/loongarch/gdbstub.c | 4 +-
13
3 files changed, 12 insertions(+), 6 deletions(-)
9
target/loongarch/internals.h | 22 +++++++++
10
target/loongarch/machine.c | 79 ++++++++++++++++++++++++++++++---
11
6 files changed, 119 insertions(+), 13 deletions(-)
12
14
13
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
15
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/linux-user/loongarch64/signal.c
17
--- a/hw/intc/loongarch_ipi.c
16
+++ b/linux-user/loongarch64/signal.c
18
+++ b/hw/intc/loongarch_ipi.c
17
@@ -XXX,XX +XXX,XX @@ static void setup_sigframe(CPULoongArchState *env,
19
@@ -XXX,XX +XXX,XX @@
18
20
#include "hw/boards.h"
19
fpu_ctx = (struct target_fpu_context *)(info + 1);
21
#include "qapi/error.h"
20
for (i = 0; i < 32; ++i) {
22
#include "hw/intc/loongarch_ipi.h"
21
- __put_user(env->fpr[i], &fpu_ctx->regs[i]);
23
+#include "hw/qdev-properties.h"
22
+ __put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
24
#include "target/loongarch/cpu.h"
25
26
static AddressSpace *get_iocsr_as(CPUState *cpu)
27
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
23
}
28
}
24
__put_user(read_fcc(env), &fpu_ctx->fcc);
29
}
25
__put_user(env->fcsr0, &fpu_ctx->fcsr);
30
26
@@ -XXX,XX +XXX,XX @@ static void restore_sigframe(CPULoongArchState *env,
31
+static const Property loongarch_ipi_properties[] = {
27
uint64_t fcc;
32
+ DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1),
28
29
for (i = 0; i < 32; ++i) {
30
- __get_user(env->fpr[i], &fpu_ctx->regs[i]);
31
+ __get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
32
}
33
__get_user(fcc, &fpu_ctx->fcc);
34
write_fcc(env, fcc);
35
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/loongarch/cpu.c
38
+++ b/target/loongarch/cpu.c
39
@@ -XXX,XX +XXX,XX @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
40
/* fpr */
41
if (flags & CPU_DUMP_FPU) {
42
for (i = 0; i < 32; i++) {
43
- qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], env->fpr[i]);
44
+ qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], env->fpr[i].vreg.D(0));
45
if ((i & 3) == 3) {
46
qemu_fprintf(f, "\n");
47
}
48
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/loongarch/cpu.h
51
+++ b/target/loongarch/cpu.h
52
@@ -XXX,XX +XXX,XX @@
53
#ifndef LOONGARCH_CPU_H
54
#define LOONGARCH_CPU_H
55
56
+#include "qemu/int128.h"
57
#include "exec/cpu-defs.h"
58
#include "fpu/softfloat-types.h"
59
#include "hw/registerfields.h"
60
@@ -XXX,XX +XXX,XX @@ FIELD(TLB_MISC, ASID, 1, 10)
61
FIELD(TLB_MISC, VPPN, 13, 35)
62
FIELD(TLB_MISC, PS, 48, 6)
63
64
+#define LSX_LEN (128)
65
+typedef union VReg {
66
+ int8_t B[LSX_LEN / 8];
67
+ int16_t H[LSX_LEN / 16];
68
+ int32_t W[LSX_LEN / 32];
69
+ int64_t D[LSX_LEN / 64];
70
+ uint8_t UB[LSX_LEN / 8];
71
+ uint16_t UH[LSX_LEN / 16];
72
+ uint32_t UW[LSX_LEN / 32];
73
+ uint64_t UD[LSX_LEN / 64];
74
+ Int128 Q[LSX_LEN / 128];
75
+}VReg;
76
+
77
+typedef union fpr_t fpr_t;
78
+union fpr_t {
79
+ VReg vreg;
80
+};
33
+};
81
+
34
+
82
struct LoongArchTLB {
35
static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
83
uint64_t tlb_misc;
36
{
84
/* Fields corresponding to CSR_TLBELO0/1 */
37
LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
85
@@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState {
38
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
86
uint64_t gpr[32];
39
87
uint64_t pc;
40
device_class_set_parent_realize(dc, loongarch_ipi_realize,
88
41
&lic->parent_realize);
89
- uint64_t fpr[32];
42
+ device_class_set_props(dc, loongarch_ipi_properties);
90
+ fpr_t fpr[32];
43
licc->get_iocsr_as = get_iocsr_as;
91
float_status fp_status;
44
licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
92
bool cf[8];
45
}
93
46
diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c
94
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
95
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
96
--- a/target/loongarch/gdbstub.c
48
--- a/hw/intc/loongson_ipi.c
97
+++ b/target/loongarch/gdbstub.c
49
+++ b/hw/intc/loongson_ipi.c
98
@@ -XXX,XX +XXX,XX @@ static int loongarch_gdb_get_fpu(CPULoongArchState *env,
99
GByteArray *mem_buf, int n)
100
{
101
if (0 <= n && n < 32) {
102
- return gdb_get_reg64(mem_buf, env->fpr[n]);
103
+ return gdb_get_reg64(mem_buf, env->fpr[n].vreg.D(0));
104
} else if (n == 32) {
105
uint64_t val = read_fcc(env);
106
return gdb_get_reg64(mem_buf, val);
107
@@ -XXX,XX +XXX,XX @@ static int loongarch_gdb_set_fpu(CPULoongArchState *env,
108
int length = 0;
109
110
if (0 <= n && n < 32) {
111
- env->fpr[n] = ldq_p(mem_buf);
112
+ env->fpr[n].vreg.D(0) = ldq_p(mem_buf);
113
length = 8;
114
} else if (n == 32) {
115
uint64_t val = ldq_p(mem_buf);
116
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/loongarch/internals.h
119
+++ b/target/loongarch/internals.h
120
@@ -XXX,XX +XXX,XX @@
50
@@ -XXX,XX +XXX,XX @@
121
/* Global bit for huge page */
51
122
#define LOONGARCH_HGLOBAL_SHIFT 12
52
#include "qemu/osdep.h"
123
53
#include "hw/intc/loongson_ipi.h"
124
+#if HOST_BIG_ENDIAN
54
+#include "hw/qdev-properties.h"
125
+#define B(x) B[15 - (x)]
55
#include "qapi/error.h"
126
+#define H(x) H[7 - (x)]
56
#include "target/mips/cpu.h"
127
+#define W(x) W[3 - (x)]
57
128
+#define D(x) D[1 - (x)]
58
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_unrealize(DeviceState *dev)
129
+#define UB(x) UB[15 - (x)]
59
k->parent_unrealize(dev);
130
+#define UH(x) UH[7 - (x)]
60
}
131
+#define UW(x) UW[3 - (x)]
61
132
+#define UD(x) UD[1 -(x)]
62
+static const Property loongson_ipi_properties[] = {
133
+#define Q(x) Q[x]
63
+ DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1),
134
+#else
135
+#define B(x) B[x]
136
+#define H(x) H[x]
137
+#define W(x) W[x]
138
+#define D(x) D[x]
139
+#define UB(x) UB[x]
140
+#define UH(x) UH[x]
141
+#define UW(x) UW[x]
142
+#define UD(x) UD[x]
143
+#define Q(x) Q[x]
144
+#endif
145
+
146
void loongarch_translate_init(void);
147
148
void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
149
diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c
150
index XXXXXXX..XXXXXXX 100644
151
--- a/target/loongarch/machine.c
152
+++ b/target/loongarch/machine.c
153
@@ -XXX,XX +XXX,XX @@
154
#include "migration/cpu.h"
155
#include "internals.h"
156
157
+static const VMStateDescription vmstate_fpu_reg = {
158
+ .name = "fpu_reg",
159
+ .version_id = 1,
160
+ .minimum_version_id = 1,
161
+ .fields = (VMStateField[]) {
162
+ VMSTATE_UINT64(UD(0), VReg),
163
+ VMSTATE_END_OF_LIST()
164
+ }
165
+};
64
+};
166
+
65
+
167
+#define VMSTATE_FPU_REGS(_field, _state, _start) \
66
static void loongson_ipi_class_init(ObjectClass *klass, void *data)
168
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \
67
{
169
+ vmstate_fpu_reg, fpr_t)
68
DeviceClass *dc = DEVICE_CLASS(klass);
170
+
69
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data)
171
+static bool fpu_needed(void *opaque)
70
&lic->parent_realize);
172
+{
71
device_class_set_parent_unrealize(dc, loongson_ipi_unrealize,
173
+ LoongArchCPU *cpu = opaque;
72
&lic->parent_unrealize);
174
+
73
+ device_class_set_props(dc, loongson_ipi_properties);
175
+ return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, FP);
74
licc->get_iocsr_as = get_iocsr_as;
176
+}
75
licc->cpu_by_arch_id = cpu_by_arch_id;
177
+
76
}
178
+static const VMStateDescription vmstate_fpu = {
77
diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c
179
+ .name = "cpu/fpu",
78
index XXXXXXX..XXXXXXX 100644
180
+ .version_id = 1,
79
--- a/hw/intc/loongson_ipi_common.c
181
+ .minimum_version_id = 1,
80
+++ b/hw/intc/loongson_ipi_common.c
182
+ .needed = fpu_needed,
81
@@ -XXX,XX +XXX,XX @@
183
+ .fields = (VMStateField[]) {
82
#include "hw/sysbus.h"
184
+ VMSTATE_FPU_REGS(env.fpr, LoongArchCPU, 0),
83
#include "hw/intc/loongson_ipi_common.h"
185
+ VMSTATE_UINT32(env.fcsr0, LoongArchCPU),
84
#include "hw/irq.h"
186
+ VMSTATE_BOOL_ARRAY(env.cf, LoongArchCPU, 8),
85
-#include "hw/qdev-properties.h"
187
+ VMSTATE_END_OF_LIST()
86
#include "qemu/log.h"
188
+ },
87
#include "migration/vmstate.h"
189
+};
88
#include "trace.h"
190
+
89
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_loongson_ipi_common = {
191
+static const VMStateDescription vmstate_lsxh_reg = {
90
}
192
+ .name = "lsxh_reg",
193
+ .version_id = 1,
194
+ .minimum_version_id = 1,
195
+ .fields = (VMStateField[]) {
196
+ VMSTATE_UINT64(UD(1), VReg),
197
+ VMSTATE_END_OF_LIST()
198
+ }
199
+};
200
+
201
+#define VMSTATE_LSXH_REGS(_field, _state, _start) \
202
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \
203
+ vmstate_lsxh_reg, fpr_t)
204
+
205
+static bool lsx_needed(void *opaque)
206
+{
207
+ LoongArchCPU *cpu = opaque;
208
+
209
+ return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LSX);
210
+}
211
+
212
+static const VMStateDescription vmstate_lsx = {
213
+ .name = "cpu/lsx",
214
+ .version_id = 1,
215
+ .minimum_version_id = 1,
216
+ .needed = lsx_needed,
217
+ .fields = (VMStateField[]) {
218
+ VMSTATE_LSXH_REGS(env.fpr, LoongArchCPU, 0),
219
+ VMSTATE_END_OF_LIST()
220
+ },
221
+};
222
+
223
/* TLB state */
224
const VMStateDescription vmstate_tlb = {
225
.name = "cpu/tlb",
226
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_tlb = {
227
};
91
};
228
92
229
/* LoongArch CPU state */
93
-static const Property ipi_common_properties[] = {
94
- DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1),
95
-};
230
-
96
-
231
const VMStateDescription vmstate_loongarch_cpu = {
97
static void loongson_ipi_common_class_init(ObjectClass *klass, void *data)
232
.name = "cpu",
98
{
233
- .version_id = 0,
99
DeviceClass *dc = DEVICE_CLASS(klass);
234
- .minimum_version_id = 0,
100
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_common_class_init(ObjectClass *klass, void *data)
235
+ .version_id = 1,
101
&licc->parent_realize);
236
+ .minimum_version_id = 1,
102
device_class_set_parent_unrealize(dc, loongson_ipi_common_unrealize,
237
.fields = (VMStateField[]) {
103
&licc->parent_unrealize);
238
-
104
- device_class_set_props(dc, ipi_common_properties);
239
VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32),
105
dc->vmsd = &vmstate_loongson_ipi_common;
240
VMSTATE_UINTTL(env.pc, LoongArchCPU),
106
}
241
- VMSTATE_UINT64_ARRAY(env.fpr, LoongArchCPU, 32),
107
242
- VMSTATE_UINT32(env.fcsr0, LoongArchCPU),
243
- VMSTATE_BOOL_ARRAY(env.cf, LoongArchCPU, 8),
244
245
/* Remaining CSRs */
246
VMSTATE_UINT64(env.CSR_CRMD, LoongArchCPU),
247
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_loongarch_cpu = {
248
249
VMSTATE_END_OF_LIST()
250
},
251
+ .subsections = (const VMStateDescription*[]) {
252
+ &vmstate_fpu,
253
+ &vmstate_lsx,
254
+ }
255
};
256
--
108
--
257
2.31.1
109
2.43.5
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Song Gao <gaosong@loongson.cn>
3
Message-Id: <20230504122810.4094787-3-gaosong@loongson.cn>
4
---
5
target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++++
6
target/loongarch/lsx_helper.c | 6 ++++++
7
target/loongarch/meson.build | 1 +
8
target/loongarch/translate.c | 1 +
9
4 files changed, 13 insertions(+)
10
create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc
11
create mode 100644 target/loongarch/lsx_helper.c
12
1
13
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
17
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
18
@@ -XXX,XX +XXX,XX @@
19
+/* SPDX-License-Identifier: GPL-2.0-or-later */
20
+/*
21
+ * LSX translate functions
22
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
23
+ */
24
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
25
new file mode 100644
26
index XXXXXXX..XXXXXXX
27
--- /dev/null
28
+++ b/target/loongarch/lsx_helper.c
29
@@ -XXX,XX +XXX,XX @@
30
+/* SPDX-License-Identifier: GPL-2.0-or-later */
31
+/*
32
+ * QEMU LoongArch LSX helper functions.
33
+ *
34
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
35
+ */
36
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/loongarch/meson.build
39
+++ b/target/loongarch/meson.build
40
@@ -XXX,XX +XXX,XX @@ loongarch_tcg_ss.add(files(
41
'op_helper.c',
42
'translate.c',
43
'gdbstub.c',
44
+ 'lsx_helper.c',
45
))
46
loongarch_tcg_ss.add(zlib)
47
48
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/loongarch/translate.c
51
+++ b/target/loongarch/translate.c
52
@@ -XXX,XX +XXX,XX @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext)
53
#include "insn_trans/trans_fmemory.c.inc"
54
#include "insn_trans/trans_branch.c.inc"
55
#include "insn_trans/trans_privileged.c.inc"
56
+#include "insn_trans/trans_lsx.c.inc"
57
58
static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
59
{
60
--
61
2.31.1
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Song Gao <gaosong@loongson.cn>
3
Message-Id: <20230504122810.4094787-4-gaosong@loongson.cn>
4
---
5
target/loongarch/cpu.c | 2 ++
6
target/loongarch/cpu.h | 2 ++
7
target/loongarch/insn_trans/trans_lsx.c.inc | 11 +++++++++++
8
3 files changed, 15 insertions(+)
9
1
10
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/cpu.c
13
+++ b/target/loongarch/cpu.c
14
@@ -XXX,XX +XXX,XX @@ static const char * const excp_names[] = {
15
[EXCCODE_FPE] = "Floating Point Exception",
16
[EXCCODE_DBP] = "Debug breakpoint",
17
[EXCCODE_BCE] = "Bound Check Exception",
18
+ [EXCCODE_SXD] = "128 bit vector instructions Disable exception",
19
};
20
21
const char *loongarch_exception_name(int32_t exception)
22
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
23
case EXCCODE_FPD:
24
case EXCCODE_FPE:
25
case EXCCODE_BCE:
26
+ case EXCCODE_SXD:
27
env->CSR_BADV = env->pc;
28
QEMU_FALLTHROUGH;
29
case EXCCODE_ADEM:
30
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/loongarch/cpu.h
33
+++ b/target/loongarch/cpu.h
34
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch)
35
#define HW_FLAGS_PLV_MASK R_CSR_CRMD_PLV_MASK /* 0x03 */
36
#define HW_FLAGS_CRMD_PG R_CSR_CRMD_PG_MASK /* 0x10 */
37
#define HW_FLAGS_EUEN_FPE 0x04
38
+#define HW_FLAGS_EUEN_SXE 0x08
39
40
static inline void cpu_get_tb_cpu_state(CPULoongArchState *env,
41
target_ulong *pc,
42
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env,
43
*cs_base = 0;
44
*flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK);
45
*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE;
46
+ *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE;
47
}
48
49
void loongarch_cpu_list(void);
50
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
53
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
54
@@ -XXX,XX +XXX,XX @@
55
* LSX translate functions
56
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
57
*/
58
+
59
+#ifndef CONFIG_USER_ONLY
60
+#define CHECK_SXE do { \
61
+ if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
62
+ generate_exception(ctx, EXCCODE_SXD); \
63
+ return true; \
64
+ } \
65
+} while (0)
66
+#else
67
+#define CHECK_SXE
68
+#endif
69
--
70
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VADD.{B/H/W/D/Q};
3
- VSUB.{B/H/W/D/Q}.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-5-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 23 +++++++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 69 +++++++++++++++++++++
11
target/loongarch/insns.decode | 22 +++++++
12
target/loongarch/translate.c | 24 +++++++
13
target/loongarch/translate.h | 1 +
14
5 files changed, 139 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ PCADD_INSN(pcaddi)
21
PCADD_INSN(pcalau12i)
22
PCADD_INSN(pcaddu12i)
23
PCADD_INSN(pcaddu18i)
24
+
25
+#define INSN_LSX(insn, type) \
26
+static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
27
+{ \
28
+ output_##type(ctx, a, #insn); \
29
+ return true; \
30
+}
31
+
32
+static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic)
33
+{
34
+ output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
35
+}
36
+
37
+INSN_LSX(vadd_b, vvv)
38
+INSN_LSX(vadd_h, vvv)
39
+INSN_LSX(vadd_w, vvv)
40
+INSN_LSX(vadd_d, vvv)
41
+INSN_LSX(vadd_q, vvv)
42
+INSN_LSX(vsub_b, vvv)
43
+INSN_LSX(vsub_h, vvv)
44
+INSN_LSX(vsub_w, vvv)
45
+INSN_LSX(vsub_d, vvv)
46
+INSN_LSX(vsub_q, vvv)
47
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
50
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
51
@@ -XXX,XX +XXX,XX @@
52
#else
53
#define CHECK_SXE
54
#endif
55
+
56
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
57
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
58
+{
59
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
60
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
61
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
62
+
63
+ CHECK_SXE;
64
+
65
+ func(cpu_env, vd, vj, vk);
66
+ return true;
67
+}
68
+
69
+static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
70
+ void (*func)(unsigned, uint32_t, uint32_t,
71
+ uint32_t, uint32_t, uint32_t))
72
+{
73
+ uint32_t vd_ofs, vj_ofs, vk_ofs;
74
+
75
+ CHECK_SXE;
76
+
77
+ vd_ofs = vec_full_offset(a->vd);
78
+ vj_ofs = vec_full_offset(a->vj);
79
+ vk_ofs = vec_full_offset(a->vk);
80
+
81
+ func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
82
+ return true;
83
+}
84
+
85
+TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add)
86
+TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add)
87
+TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add)
88
+TRANS(vadd_d, gvec_vvv, MO_64, tcg_gen_gvec_add)
89
+
90
+#define VADDSUB_Q(NAME) \
91
+static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
92
+{ \
93
+ TCGv_i64 rh, rl, ah, al, bh, bl; \
94
+ \
95
+ CHECK_SXE; \
96
+ \
97
+ rh = tcg_temp_new_i64(); \
98
+ rl = tcg_temp_new_i64(); \
99
+ ah = tcg_temp_new_i64(); \
100
+ al = tcg_temp_new_i64(); \
101
+ bh = tcg_temp_new_i64(); \
102
+ bl = tcg_temp_new_i64(); \
103
+ \
104
+ get_vreg64(ah, a->vj, 1); \
105
+ get_vreg64(al, a->vj, 0); \
106
+ get_vreg64(bh, a->vk, 1); \
107
+ get_vreg64(bl, a->vk, 0); \
108
+ \
109
+ tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \
110
+ \
111
+ set_vreg64(rh, a->vd, 1); \
112
+ set_vreg64(rl, a->vd, 0); \
113
+ \
114
+ return true; \
115
+}
116
+
117
+VADDSUB_Q(add)
118
+VADDSUB_Q(sub)
119
+
120
+TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub)
121
+TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub)
122
+TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub)
123
+TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub)
124
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/loongarch/insns.decode
127
+++ b/target/loongarch/insns.decode
128
@@ -XXX,XX +XXX,XX @@ ldpte 0000 01100100 01 ........ ..... 00000 @j_i
129
ertn 0000 01100100 10000 01110 00000 00000 @empty
130
idle 0000 01100100 10001 ............... @i15
131
dbcl 0000 00000010 10101 ............... @i15
132
+
133
+#
134
+# LSX Argument sets
135
+#
136
+
137
+&vvv vd vj vk
138
+
139
+#
140
+# LSX Formats
141
+#
142
+@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
143
+
144
+vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
145
+vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
146
+vadd_w 0111 00000000 10110 ..... ..... ..... @vvv
147
+vadd_d 0111 00000000 10111 ..... ..... ..... @vvv
148
+vadd_q 0111 00010010 11010 ..... ..... ..... @vvv
149
+vsub_b 0111 00000000 11000 ..... ..... ..... @vvv
150
+vsub_h 0111 00000000 11001 ..... ..... ..... @vvv
151
+vsub_w 0111 00000000 11010 ..... ..... ..... @vvv
152
+vsub_d 0111 00000000 11011 ..... ..... ..... @vvv
153
+vsub_q 0111 00010010 11011 ..... ..... ..... @vvv
154
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
155
index XXXXXXX..XXXXXXX 100644
156
--- a/target/loongarch/translate.c
157
+++ b/target/loongarch/translate.c
158
@@ -XXX,XX +XXX,XX @@
159
#include "qemu/osdep.h"
160
#include "cpu.h"
161
#include "tcg/tcg-op.h"
162
+#include "tcg/tcg-op-gvec.h"
163
+
164
#include "exec/translator.h"
165
#include "exec/helper-proto.h"
166
#include "exec/helper-gen.h"
167
@@ -XXX,XX +XXX,XX @@ TCGv_i64 cpu_fpr[32];
168
#define DISAS_EXIT DISAS_TARGET_1
169
#define DISAS_EXIT_UPDATE DISAS_TARGET_2
170
171
+static inline int vec_full_offset(int regno)
172
+{
173
+ return offsetof(CPULoongArchState, fpr[regno]);
174
+}
175
+
176
+static inline void get_vreg64(TCGv_i64 dest, int regno, int index)
177
+{
178
+ tcg_gen_ld_i64(dest, cpu_env,
179
+ offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
180
+}
181
+
182
+static inline void set_vreg64(TCGv_i64 src, int regno, int index)
183
+{
184
+ tcg_gen_st_i64(src, cpu_env,
185
+ offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
186
+}
187
+
188
static inline int plus_1(DisasContext *ctx, int x)
189
{
190
return x + 1;
191
@@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
192
CPUState *cs)
193
{
194
int64_t bound;
195
+ CPULoongArchState *env = cs->env_ptr;
196
DisasContext *ctx = container_of(dcbase, DisasContext, base);
197
198
ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
199
@@ -XXX,XX +XXX,XX @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
200
bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
201
ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
202
203
+ if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) {
204
+ ctx->vl = LSX_LEN;
205
+ }
206
+
207
ctx->zero = tcg_constant_tl(0);
208
}
209
210
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
211
index XXXXXXX..XXXXXXX 100644
212
--- a/target/loongarch/translate.h
213
+++ b/target/loongarch/translate.h
214
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
215
uint32_t opcode;
216
uint16_t mem_idx;
217
uint16_t plv;
218
+ int vl; /* Vector length */
219
TCGv zero;
220
} DisasContext;
221
222
--
223
2.31.1
diff view generated by jsdifflib
1
Introduce set_fpr() and get_fpr() and remove cpu_fpr.
1
Supported CPU number can be acquired from function
2
possible_cpu_arch_ids(), cpu-num property is not necessary and can
3
be removed.
2
4
3
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
4
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
5
Message-Id: <20230504122810.4094787-44-gaosong@loongson.cn>
6
---
7
---
7
.../loongarch/insn_trans/trans_farith.c.inc | 72 +++++++++++++++----
8
hw/intc/loongarch_ipi.c | 13 ++++++++-----
8
target/loongarch/insn_trans/trans_fcmp.c.inc | 12 ++--
9
include/hw/intc/loongson_ipi_common.h | 2 ++
9
.../loongarch/insn_trans/trans_fmemory.c.inc | 37 ++++++----
10
2 files changed, 10 insertions(+), 5 deletions(-)
10
target/loongarch/insn_trans/trans_fmov.c.inc | 31 +++++---
11
target/loongarch/translate.c | 20 ++++--
12
5 files changed, 129 insertions(+), 43 deletions(-)
13
11
14
diff --git a/target/loongarch/insn_trans/trans_farith.c.inc b/target/loongarch/insn_trans/trans_farith.c.inc
12
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/insn_trans/trans_farith.c.inc
14
--- a/hw/intc/loongarch_ipi.c
17
+++ b/target/loongarch/insn_trans/trans_farith.c.inc
15
+++ b/hw/intc/loongarch_ipi.c
18
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
19
static bool gen_fff(DisasContext *ctx, arg_fff *a,
20
void (*func)(TCGv, TCGv_env, TCGv, TCGv))
21
{
17
{
22
+ TCGv dest = get_fpr(ctx, a->fd);
18
LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev);
23
+ TCGv src1 = get_fpr(ctx, a->fj);
19
LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev);
24
+ TCGv src2 = get_fpr(ctx, a->fk);
20
+ MachineState *machine = MACHINE(qdev_get_machine());
25
+
21
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
26
CHECK_FPE;
22
+ const CPUArchIdList *id_list;
27
23
Error *local_err = NULL;
28
- func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk]);
24
int i;
29
+ func(dest, cpu_env, src1, src2);
25
30
+ set_fpr(a->fd, dest);
26
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
31
+
27
return;
32
return true;
33
}
34
35
static bool gen_ff(DisasContext *ctx, arg_ff *a,
36
void (*func)(TCGv, TCGv_env, TCGv))
37
{
38
+ TCGv dest = get_fpr(ctx, a->fd);
39
+ TCGv src = get_fpr(ctx, a->fj);
40
+
41
CHECK_FPE;
42
43
- func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj]);
44
+ func(dest, cpu_env, src);
45
+ set_fpr(a->fd, dest);
46
+
47
return true;
48
}
49
50
@@ -XXX,XX +XXX,XX @@ static bool gen_muladd(DisasContext *ctx, arg_ffff *a,
51
int flag)
52
{
53
TCGv_i32 tflag = tcg_constant_i32(flag);
54
+ TCGv dest = get_fpr(ctx, a->fd);
55
+ TCGv src1 = get_fpr(ctx, a->fj);
56
+ TCGv src2 = get_fpr(ctx, a->fk);
57
+ TCGv src3 = get_fpr(ctx, a->fa);
58
59
CHECK_FPE;
60
61
- func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj],
62
- cpu_fpr[a->fk], cpu_fpr[a->fa], tflag);
63
+ func(dest, cpu_env, src1, src2, src3, tflag);
64
+ set_fpr(a->fd, dest);
65
+
66
return true;
67
}
68
69
static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a)
70
{
71
+ TCGv dest = get_fpr(ctx, a->fd);
72
+ TCGv src1 = get_fpr(ctx, a->fk);
73
+ TCGv src2 = get_fpr(ctx, a->fj);
74
+
75
CHECK_FPE;
76
77
- tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 31);
78
+ tcg_gen_deposit_i64(dest, src1, src2, 0, 31);
79
+ set_fpr(a->fd, dest);
80
+
81
return true;
82
}
83
84
static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a)
85
{
86
+ TCGv dest = get_fpr(ctx, a->fd);
87
+ TCGv src1 = get_fpr(ctx, a->fk);
88
+ TCGv src2 = get_fpr(ctx, a->fj);
89
+
90
CHECK_FPE;
91
92
- tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 63);
93
+ tcg_gen_deposit_i64(dest, src1, src2, 0, 63);
94
+ set_fpr(a->fd, dest);
95
+
96
return true;
97
}
98
99
static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a)
100
{
101
+ TCGv dest = get_fpr(ctx, a->fd);
102
+ TCGv src = get_fpr(ctx, a->fj);
103
+
104
CHECK_FPE;
105
106
- tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 31));
107
- gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]);
108
+ tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31));
109
+ gen_nanbox_s(dest, dest);
110
+ set_fpr(a->fd, dest);
111
+
112
return true;
113
}
114
115
static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a)
116
{
117
+ TCGv dest = get_fpr(ctx, a->fd);
118
+ TCGv src = get_fpr(ctx, a->fj);
119
+
120
CHECK_FPE;
121
122
- tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 63));
123
+ tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63));
124
+ set_fpr(a->fd, dest);
125
+
126
return true;
127
}
128
129
static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a)
130
{
131
+ TCGv dest = get_fpr(ctx, a->fd);
132
+ TCGv src = get_fpr(ctx, a->fj);
133
+
134
CHECK_FPE;
135
136
- tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x80000000);
137
- gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]);
138
+ tcg_gen_xori_i64(dest, src, 0x80000000);
139
+ gen_nanbox_s(dest, dest);
140
+ set_fpr(a->fd, dest);
141
+
142
return true;
143
}
144
145
static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a)
146
{
147
+ TCGv dest = get_fpr(ctx, a->fd);
148
+ TCGv src = get_fpr(ctx, a->fj);
149
+
150
CHECK_FPE;
151
152
- tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x8000000000000000LL);
153
+ tcg_gen_xori_i64(dest, src, 0x8000000000000000LL);
154
+ set_fpr(a->fd, dest);
155
+
156
return true;
157
}
158
159
diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/insn_trans/trans_fcmp.c.inc
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/loongarch/insn_trans/trans_fcmp.c.inc
162
+++ b/target/loongarch/insn_trans/trans_fcmp.c.inc
163
@@ -XXX,XX +XXX,XX @@ static uint32_t get_fcmp_flags(int cond)
164
165
static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
166
{
167
- TCGv var;
168
+ TCGv var, src1, src2;
169
uint32_t flags;
170
void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
171
172
CHECK_FPE;
173
174
var = tcg_temp_new();
175
+ src1 = get_fpr(ctx, a->fj);
176
+ src2 = get_fpr(ctx, a->fk);
177
fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s);
178
flags = get_fcmp_flags(a->fcond >> 1);
179
180
- fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags));
181
+ fn(var, cpu_env, src1, src2, tcg_constant_i32(flags));
182
183
tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd]));
184
return true;
185
@@ -XXX,XX +XXX,XX @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
186
187
static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a)
188
{
189
- TCGv var;
190
+ TCGv var, src1, src2;
191
uint32_t flags;
192
void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
193
194
CHECK_FPE;
195
196
var = tcg_temp_new();
197
+ src1 = get_fpr(ctx, a->fj);
198
+ src2 = get_fpr(ctx, a->fk);
199
fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d);
200
flags = get_fcmp_flags(a->fcond >> 1);
201
202
- fn(var, cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk], tcg_constant_i32(flags));
203
+ fn(var, cpu_env, src1, src2, tcg_constant_i32(flags));
204
205
tcg_gen_st8_tl(var, cpu_env, offsetof(CPULoongArchState, cf[a->cd]));
206
return true;
207
diff --git a/target/loongarch/insn_trans/trans_fmemory.c.inc b/target/loongarch/insn_trans/trans_fmemory.c.inc
208
index XXXXXXX..XXXXXXX 100644
209
--- a/target/loongarch/insn_trans/trans_fmemory.c.inc
210
+++ b/target/loongarch/insn_trans/trans_fmemory.c.inc
211
@@ -XXX,XX +XXX,XX @@ static void maybe_nanbox_load(TCGv freg, MemOp mop)
212
static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
213
{
214
TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
215
+ TCGv dest = get_fpr(ctx, a->fd);
216
217
CHECK_FPE;
218
219
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
220
addr = temp;
221
}
28
}
222
29
223
- tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
30
- if (lics->num_cpu == 0) {
224
- maybe_nanbox_load(cpu_fpr[a->fd], mop);
31
- error_setg(errp, "num-cpu must be at least 1");
225
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
32
- return;
226
+ maybe_nanbox_load(dest, mop);
227
+ set_fpr(a->fd, dest);
228
229
return true;
230
}
231
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
232
static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
233
{
234
TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
235
+ TCGv src = get_fpr(ctx, a->fd);
236
237
CHECK_FPE;
238
239
@@ -XXX,XX +XXX,XX @@ static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
240
addr = temp;
241
}
242
243
- tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
244
+ tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop);
245
+
246
return true;
247
}
248
249
@@ -XXX,XX +XXX,XX @@ static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop)
250
{
251
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
252
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
253
+ TCGv dest = get_fpr(ctx, a->fd);
254
TCGv addr;
255
256
CHECK_FPE;
257
258
addr = tcg_temp_new();
259
tcg_gen_add_tl(addr, src1, src2);
260
- tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
261
- maybe_nanbox_load(cpu_fpr[a->fd], mop);
262
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
263
+ maybe_nanbox_load(dest, mop);
264
+ set_fpr(a->fd, dest);
265
266
return true;
267
}
268
@@ -XXX,XX +XXX,XX @@ static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop)
269
{
270
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
271
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
272
+ TCGv src3 = get_fpr(ctx, a->fd);
273
TCGv addr;
274
275
CHECK_FPE;
276
277
addr = tcg_temp_new();
278
tcg_gen_add_tl(addr, src1, src2);
279
- tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
280
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
281
282
return true;
283
}
284
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
285
{
286
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
287
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
288
+ TCGv dest = get_fpr(ctx, a->fd);
289
TCGv addr;
290
291
CHECK_FPE;
292
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
293
addr = tcg_temp_new();
294
gen_helper_asrtgt_d(cpu_env, src1, src2);
295
tcg_gen_add_tl(addr, src1, src2);
296
- tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
297
- maybe_nanbox_load(cpu_fpr[a->fd], mop);
298
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
299
+ maybe_nanbox_load(dest, mop);
300
+ set_fpr(a->fd, dest);
301
302
return true;
303
}
304
@@ -XXX,XX +XXX,XX @@ static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
305
{
306
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
307
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
308
+ TCGv src3 = get_fpr(ctx, a->fd);
309
TCGv addr;
310
311
CHECK_FPE;
312
@@ -XXX,XX +XXX,XX @@ static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
313
addr = tcg_temp_new();
314
gen_helper_asrtgt_d(cpu_env, src1, src2);
315
tcg_gen_add_tl(addr, src1, src2);
316
- tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
317
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
318
319
return true;
320
}
321
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop)
322
{
323
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
324
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
325
+ TCGv dest = get_fpr(ctx, a->fd);
326
TCGv addr;
327
328
CHECK_FPE;
329
@@ -XXX,XX +XXX,XX @@ static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop)
330
addr = tcg_temp_new();
331
gen_helper_asrtle_d(cpu_env, src1, src2);
332
tcg_gen_add_tl(addr, src1, src2);
333
- tcg_gen_qemu_ld_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
334
- maybe_nanbox_load(cpu_fpr[a->fd], mop);
335
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
336
+ maybe_nanbox_load(dest, mop);
337
+ set_fpr(a->fd, dest);
338
339
return true;
340
}
341
@@ -XXX,XX +XXX,XX @@ static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop)
342
{
343
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
344
TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
345
+ TCGv src3 = get_fpr(ctx, a->fd);
346
TCGv addr;
347
348
CHECK_FPE;
349
@@ -XXX,XX +XXX,XX @@ static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop)
350
addr = tcg_temp_new();
351
gen_helper_asrtle_d(cpu_env, src1, src2);
352
tcg_gen_add_tl(addr, src1, src2);
353
- tcg_gen_qemu_st_tl(cpu_fpr[a->fd], addr, ctx->mem_idx, mop);
354
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
355
356
return true;
357
}
358
diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc
359
index XXXXXXX..XXXXXXX 100644
360
--- a/target/loongarch/insn_trans/trans_fmov.c.inc
361
+++ b/target/loongarch/insn_trans/trans_fmov.c.inc
362
@@ -XXX,XX +XXX,XX @@ static const uint32_t fcsr_mask[4] = {
363
static bool trans_fsel(DisasContext *ctx, arg_fsel *a)
364
{
365
TCGv zero = tcg_constant_tl(0);
366
+ TCGv dest = get_fpr(ctx, a->fd);
367
+ TCGv src1 = get_fpr(ctx, a->fj);
368
+ TCGv src2 = get_fpr(ctx, a->fk);
369
TCGv cond;
370
371
CHECK_FPE;
372
373
cond = tcg_temp_new();
374
tcg_gen_ld8u_tl(cond, cpu_env, offsetof(CPULoongArchState, cf[a->ca]));
375
- tcg_gen_movcond_tl(TCG_COND_EQ, cpu_fpr[a->fd], cond, zero,
376
- cpu_fpr[a->fj], cpu_fpr[a->fk]);
377
+ tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2);
378
+ set_fpr(a->fd, dest);
379
380
return true;
381
}
382
@@ -XXX,XX +XXX,XX @@ static bool trans_fsel(DisasContext *ctx, arg_fsel *a)
383
static bool gen_f2f(DisasContext *ctx, arg_ff *a,
384
void (*func)(TCGv, TCGv), bool nanbox)
385
{
386
- TCGv dest = cpu_fpr[a->fd];
387
- TCGv src = cpu_fpr[a->fj];
388
+ TCGv dest = get_fpr(ctx, a->fd);
389
+ TCGv src = get_fpr(ctx, a->fj);
390
391
CHECK_FPE;
392
393
func(dest, src);
394
if (nanbox) {
395
- gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]);
396
+ gen_nanbox_s(dest, dest);
397
}
398
+ set_fpr(a->fd, dest);
399
400
return true;
401
}
402
@@ -XXX,XX +XXX,XX @@ static bool gen_r2f(DisasContext *ctx, arg_fr *a,
403
void (*func)(TCGv, TCGv))
404
{
405
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
406
+ TCGv dest = get_fpr(ctx, a->fd);
407
408
CHECK_FPE;
409
410
- func(cpu_fpr[a->fd], src);
411
+ func(dest, src);
412
+ set_fpr(a->fd, dest);
413
+
414
return true;
415
}
416
417
@@ -XXX,XX +XXX,XX @@ static bool gen_f2r(DisasContext *ctx, arg_rf *a,
418
void (*func)(TCGv, TCGv))
419
{
420
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
421
+ TCGv src = get_fpr(ctx, a->fj);
422
423
CHECK_FPE;
424
425
- func(dest, cpu_fpr[a->fj]);
426
+ func(dest, src);
427
gen_set_gpr(a->rd, dest, EXT_NONE);
428
429
return true;
430
@@ -XXX,XX +XXX,XX @@ static void gen_movfrh2gr_s(TCGv dest, TCGv src)
431
static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a)
432
{
433
TCGv t0;
434
+ TCGv src = get_fpr(ctx, a->fj);
435
436
CHECK_FPE;
437
438
t0 = tcg_temp_new();
439
- tcg_gen_andi_tl(t0, cpu_fpr[a->fj], 0x1);
440
+ tcg_gen_andi_tl(t0, src, 0x1);
441
tcg_gen_st8_tl(t0, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
442
443
return true;
444
@@ -XXX,XX +XXX,XX @@ static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a)
445
446
static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a)
447
{
448
+ TCGv dest = get_fpr(ctx, a->fd);
449
+
450
CHECK_FPE;
451
452
- tcg_gen_ld8u_tl(cpu_fpr[a->fd], cpu_env,
453
+ tcg_gen_ld8u_tl(dest, cpu_env,
454
offsetof(CPULoongArchState, cf[a->cj & 0x7]));
455
+ set_fpr(a->fd, dest);
456
+
457
return true;
458
}
459
460
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
461
index XXXXXXX..XXXXXXX 100644
462
--- a/target/loongarch/translate.c
463
+++ b/target/loongarch/translate.c
464
@@ -XXX,XX +XXX,XX @@
465
/* Global register indices */
466
TCGv cpu_gpr[32], cpu_pc;
467
static TCGv cpu_lladdr, cpu_llval;
468
-TCGv_i64 cpu_fpr[32];
469
470
#include "exec/gen-icount.h"
471
472
@@ -XXX,XX +XXX,XX @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext)
473
}
474
}
475
476
+static TCGv get_fpr(DisasContext *ctx, int reg_num)
477
+{
478
+ TCGv t = tcg_temp_new();
479
+ tcg_gen_ld_i64(t, cpu_env,
480
+ offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
481
+ return t;
482
+}
483
+
484
+static void set_fpr(int reg_num, TCGv val)
485
+{
486
+ tcg_gen_st_i64(val, cpu_env,
487
+ offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
488
+}
489
+
490
#include "decode-insns.c.inc"
491
#include "insn_trans/trans_arith.c.inc"
492
#include "insn_trans/trans_shift.c.inc"
493
@@ -XXX,XX +XXX,XX @@ void loongarch_translate_init(void)
494
regnames[i]);
495
}
496
497
- for (i = 0; i < 32; i++) {
498
- int off = offsetof(CPULoongArchState, fpr[i]);
499
- cpu_fpr[i] = tcg_global_mem_new_i64(cpu_env, off, fregnames[i]);
500
- }
33
- }
501
-
34
-
502
cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc");
35
+ assert(mc->possible_cpu_arch_ids);
503
cpu_lladdr = tcg_global_mem_new(cpu_env,
36
+ id_list = mc->possible_cpu_arch_ids(machine);
504
offsetof(CPULoongArchState, lladdr), "lladdr");
37
+ lics->num_cpu = id_list->len;
38
lics->cpu = g_new0(IPICore, lics->num_cpu);
39
for (i = 0; i < lics->num_cpu; i++) {
40
+ lics->cpu[i].arch_id = id_list->cpus[i].arch_id;
41
+ lics->cpu[i].cpu = CPU(id_list->cpus[i].cpu);
42
lics->cpu[i].ipi = lics;
43
qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1);
44
}
45
diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/include/hw/intc/loongson_ipi_common.h
48
+++ b/include/hw/intc/loongson_ipi_common.h
49
@@ -XXX,XX +XXX,XX @@ typedef struct IPICore {
50
/* 64bit buf divide into 2 32-bit buf */
51
uint32_t buf[IPI_MBX_NUM * 2];
52
qemu_irq irq;
53
+ uint64_t arch_id;
54
+ CPUState *cpu;
55
} IPICore;
56
57
struct LoongsonIPICommonState {
505
--
58
--
506
2.31.1
59
2.43.5
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
Since cpu number can be acquired from possible_cpu_arch_ids(),
2
num-cpu property is not necessary. Here remove num-cpu property
3
for object TYPE_LOONGARCH_IPI object.
2
4
3
The calling function is already working with hwaddr and uint64_t so
5
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
4
lets avoid bringing target_ulong in if we don't need to.
6
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
5
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230404132711.2563638-1-alex.bennee@linaro.org>
10
Signed-off-by: Song Gao <gaosong@loongson.cn>
11
---
7
---
12
hw/intc/loongarch_ipi.c | 2 +-
8
hw/intc/loongarch_ipi.c | 5 -----
13
1 file changed, 1 insertion(+), 1 deletion(-)
9
hw/loongarch/virt.c | 1 -
10
2 files changed, 6 deletions(-)
14
11
15
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
12
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/intc/loongarch_ipi.c
14
--- a/hw/intc/loongarch_ipi.c
18
+++ b/hw/intc/loongarch_ipi.c
15
+++ b/hw/intc/loongarch_ipi.c
19
@@ -XXX,XX +XXX,XX @@ static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size)
16
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
20
return ret;
17
}
21
}
18
}
22
19
23
-static void send_ipi_data(CPULoongArchState *env, target_ulong val, target_ulong addr)
20
-static const Property loongarch_ipi_properties[] = {
24
+static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr)
21
- DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1),
22
-};
23
-
24
static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
25
{
25
{
26
int i, mask = 0, data = 0;
26
LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
27
27
@@ -XXX,XX +XXX,XX @@ static void loongarch_ipi_class_init(ObjectClass *klass, void *data)
28
29
device_class_set_parent_realize(dc, loongarch_ipi_realize,
30
&lic->parent_realize);
31
- device_class_set_props(dc, loongarch_ipi_properties);
32
licc->get_iocsr_as = get_iocsr_as;
33
licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
34
}
35
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/loongarch/virt.c
38
+++ b/hw/loongarch/virt.c
39
@@ -XXX,XX +XXX,XX @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
40
41
/* Create IPI device */
42
ipi = qdev_new(TYPE_LOONGARCH_IPI);
43
- qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus);
44
sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal);
45
46
/* IPI iocsr memory region */
28
--
47
--
29
2.31.1
48
2.43.5
30
31
diff view generated by jsdifflib
1
This patch includes:
1
Add logic cpu index input parameter for function cpu_by_arch_id,
2
- VLD[X], VST[X];
2
CPUState::cpu_index is logic cpu slot index for possible_cpus.
3
- VLDREPL.{B/H/W/D};
4
- VSTELM.{B/H/W/D}.
5
3
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
At the same time it is logic index with LoongsonIPICommonState::IPICore,
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
5
here hide access for CPUState::cpu_index directly, it comes from
8
Message-Id: <20230504122810.4094787-42-gaosong@loongson.cn>
6
function cpu_by_arch_id().
7
8
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
9
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
9
---
10
---
10
target/loongarch/disas.c | 34 +++++
11
hw/intc/loongarch_ipi.c | 19 +++++++++++++++----
11
target/loongarch/insn_trans/trans_lsx.c.inc | 159 ++++++++++++++++++++
12
hw/intc/loongson_ipi.c | 23 ++++++++++++++++++++++-
12
target/loongarch/insns.decode | 36 +++++
13
hw/intc/loongson_ipi_common.c | 21 ++++++++++++---------
13
target/loongarch/translate.c | 10 ++
14
include/hw/intc/loongson_ipi_common.h | 3 ++-
14
4 files changed, 239 insertions(+)
15
4 files changed, 51 insertions(+), 15 deletions(-)
15
16
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
17
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
--- a/hw/intc/loongarch_ipi.c
19
+++ b/target/loongarch/disas.c
20
+++ b/hw/intc/loongarch_ipi.c
20
@@ -XXX,XX +XXX,XX @@ static inline int plus_1(DisasContext *ctx, int x)
21
@@ -XXX,XX +XXX,XX @@ static CPUArchId *find_cpu_by_archid(MachineState *ms, uint32_t id)
21
return x + 1;
22
return found_cpu;
22
}
23
}
23
24
24
+static inline int shl_1(DisasContext *ctx, int x)
25
-static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id)
26
+static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics,
27
+ int64_t arch_id, int *index, CPUState **pcs)
28
{
29
MachineState *machine = MACHINE(qdev_get_machine());
30
CPUArchId *archid;
31
+ CPUState *cs;
32
33
archid = find_cpu_by_archid(machine, arch_id);
34
- if (archid) {
35
- return CPU(archid->cpu);
36
+ if (archid && archid->cpu) {
37
+ cs = archid->cpu;
38
+ if (index) {
39
+ *index = cs->cpu_index;
40
+ }
41
+
42
+ if (pcs) {
43
+ *pcs = cs;
44
+ }
45
+
46
+ return MEMTX_OK;
47
}
48
49
- return NULL;
50
+ return MEMTX_ERROR;
51
}
52
53
static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
54
diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/hw/intc/loongson_ipi.c
57
+++ b/hw/intc/loongson_ipi.c
58
@@ -XXX,XX +XXX,XX @@ static AddressSpace *get_iocsr_as(CPUState *cpu)
59
return NULL;
60
}
61
62
+static int loongson_cpu_by_arch_id(LoongsonIPICommonState *lics,
63
+ int64_t arch_id, int *index, CPUState **pcs)
25
+{
64
+{
26
+ return x << 1;
65
+ CPUState *cs;
66
+
67
+ cs = cpu_by_arch_id(arch_id);
68
+ if (cs == NULL) {
69
+ return MEMTX_ERROR;
70
+ }
71
+
72
+ if (index) {
73
+ *index = cs->cpu_index;
74
+ }
75
+
76
+ if (pcs) {
77
+ *pcs = cs;
78
+ }
79
+
80
+ return MEMTX_OK;
27
+}
81
+}
28
+
82
+
29
static inline int shl_2(DisasContext *ctx, int x)
83
static const MemoryRegionOps loongson_ipi_core_ops = {
30
{
84
.read_with_attrs = loongson_ipi_core_readl,
31
return x << 2;
85
.write_with_attrs = loongson_ipi_core_writel,
86
@@ -XXX,XX +XXX,XX @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data)
87
&lic->parent_unrealize);
88
device_class_set_props(dc, loongson_ipi_properties);
89
licc->get_iocsr_as = get_iocsr_as;
90
- licc->cpu_by_arch_id = cpu_by_arch_id;
91
+ licc->cpu_by_arch_id = loongson_cpu_by_arch_id;
32
}
92
}
33
93
34
+static inline int shl_3(DisasContext *ctx, int x)
94
static const TypeInfo loongson_ipi_types[] = {
35
+{
95
diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c
36
+ return x << 3;
96
index XXXXXXX..XXXXXXX 100644
37
+}
97
--- a/hw/intc/loongson_ipi_common.c
38
+
98
+++ b/hw/intc/loongson_ipi_common.c
39
#define CSR_NAME(REG) \
99
@@ -XXX,XX +XXX,XX @@ static MemTxResult mail_send(LoongsonIPICommonState *ipi,
40
[LOONGARCH_CSR_##REG] = (#REG)
100
uint32_t cpuid;
41
101
hwaddr addr;
42
@@ -XXX,XX +XXX,XX @@ static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
102
CPUState *cs;
43
output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm);
103
+ int cpu, ret;
104
105
cpuid = extract32(val, 16, 10);
106
- cs = licc->cpu_by_arch_id(cpuid);
107
- if (cs == NULL) {
108
+ ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs);
109
+ if (ret != MEMTX_OK) {
110
return MEMTX_DECODE_ERROR;
111
}
112
113
/* override requester_id */
114
addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c);
115
- attrs.requester_id = cs->cpu_index;
116
+ attrs.requester_id = cpu;
117
return send_ipi_data(ipi, cs, val, addr, attrs);
44
}
118
}
45
119
46
+static void output_vr_ii(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
120
@@ -XXX,XX +XXX,XX @@ static MemTxResult any_send(LoongsonIPICommonState *ipi,
47
+{
121
uint32_t cpuid;
48
+ output(ctx, mnemonic, "v%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2);
122
hwaddr addr;
49
+}
123
CPUState *cs;
50
+
124
+ int cpu, ret;
51
static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
125
52
{
126
cpuid = extract32(val, 16, 10);
53
output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj, a->imm);
127
- cs = licc->cpu_by_arch_id(cpuid);
54
@@ -XXX,XX +XXX,XX @@ static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
128
- if (cs == NULL) {
55
output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
129
+ ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs);
130
+ if (ret != MEMTX_OK) {
131
return MEMTX_DECODE_ERROR;
132
}
133
134
/* override requester_id */
135
addr = val & 0xffff;
136
- attrs.requester_id = cs->cpu_index;
137
+ attrs.requester_id = cpu;
138
return send_ipi_data(ipi, cs, val, addr, attrs);
56
}
139
}
57
140
58
+static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
141
@@ -XXX,XX +XXX,XX @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val,
59
+{
142
uint32_t cpuid;
60
+ output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk);
143
uint8_t vector;
61
+}
144
CPUState *cs;
62
+
145
+ int cpu, ret;
63
INSN_LSX(vadd_b, vvv)
146
64
INSN_LSX(vadd_h, vvv)
147
addr &= 0xff;
65
INSN_LSX(vadd_w, vvv)
148
trace_loongson_ipi_write(size, (uint64_t)addr, val);
66
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vextrins_d, vv_i)
149
@@ -XXX,XX +XXX,XX @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val,
67
INSN_LSX(vextrins_w, vv_i)
150
cpuid = extract32(val, 16, 10);
68
INSN_LSX(vextrins_h, vv_i)
151
/* IPI status vector */
69
INSN_LSX(vextrins_b, vv_i)
152
vector = extract8(val, 0, 5);
70
+
153
- cs = licc->cpu_by_arch_id(cpuid);
71
+INSN_LSX(vld, vr_i)
154
- if (cs == NULL || cs->cpu_index >= ipi->num_cpu) {
72
+INSN_LSX(vst, vr_i)
155
+ ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs);
73
+INSN_LSX(vldx, vrr)
156
+ if (ret != MEMTX_OK || cpu >= ipi->num_cpu) {
74
+INSN_LSX(vstx, vrr)
157
return MEMTX_DECODE_ERROR;
75
+
158
}
76
+INSN_LSX(vldrepl_d, vr_i)
159
- loongson_ipi_core_writel(&ipi->cpu[cs->cpu_index], CORE_SET_OFF,
77
+INSN_LSX(vldrepl_w, vr_i)
160
+ loongson_ipi_core_writel(&ipi->cpu[cpu], CORE_SET_OFF,
78
+INSN_LSX(vldrepl_h, vr_i)
161
BIT(vector), 4, attrs);
79
+INSN_LSX(vldrepl_b, vr_i)
162
break;
80
+INSN_LSX(vstelm_d, vr_ii)
163
default:
81
+INSN_LSX(vstelm_w, vr_ii)
164
diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h
82
+INSN_LSX(vstelm_h, vr_ii)
83
+INSN_LSX(vstelm_b, vr_ii)
84
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
85
index XXXXXXX..XXXXXXX 100644
165
index XXXXXXX..XXXXXXX 100644
86
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
166
--- a/include/hw/intc/loongson_ipi_common.h
87
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
167
+++ b/include/hw/intc/loongson_ipi_common.h
88
@@ -XXX,XX +XXX,XX @@ TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b)
168
@@ -XXX,XX +XXX,XX @@ struct LoongsonIPICommonClass {
89
TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h)
169
DeviceRealize parent_realize;
90
TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w)
170
DeviceUnrealize parent_unrealize;
91
TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d)
171
AddressSpace *(*get_iocsr_as)(CPUState *cpu);
92
+
172
- CPUState *(*cpu_by_arch_id)(int64_t id);
93
+static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
173
+ int (*cpu_by_arch_id)(LoongsonIPICommonState *lics, int64_t id,
94
+{
174
+ int *index, CPUState **pcs);
95
+ TCGv addr, temp;
175
};
96
+ TCGv_i64 rl, rh;
176
97
+ TCGv_i128 val;
177
MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data,
98
+
99
+ CHECK_SXE;
100
+
101
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
102
+ val = tcg_temp_new_i128();
103
+ rl = tcg_temp_new_i64();
104
+ rh = tcg_temp_new_i64();
105
+
106
+ if (a->imm) {
107
+ temp = tcg_temp_new();
108
+ tcg_gen_addi_tl(temp, addr, a->imm);
109
+ addr = temp;
110
+ }
111
+
112
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
113
+ tcg_gen_extr_i128_i64(rl, rh, val);
114
+ set_vreg64(rh, a->vd, 1);
115
+ set_vreg64(rl, a->vd, 0);
116
+
117
+ return true;
118
+}
119
+
120
+static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
121
+{
122
+ TCGv addr, temp;
123
+ TCGv_i128 val;
124
+ TCGv_i64 ah, al;
125
+
126
+ CHECK_SXE;
127
+
128
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
129
+ val = tcg_temp_new_i128();
130
+ ah = tcg_temp_new_i64();
131
+ al = tcg_temp_new_i64();
132
+
133
+ if (a->imm) {
134
+ temp = tcg_temp_new();
135
+ tcg_gen_addi_tl(temp, addr, a->imm);
136
+ addr = temp;
137
+ }
138
+
139
+ get_vreg64(ah, a->vd, 1);
140
+ get_vreg64(al, a->vd, 0);
141
+ tcg_gen_concat_i64_i128(val, al, ah);
142
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
143
+
144
+ return true;
145
+}
146
+
147
+static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
148
+{
149
+ TCGv addr, src1, src2;
150
+ TCGv_i64 rl, rh;
151
+ TCGv_i128 val;
152
+
153
+ CHECK_SXE;
154
+
155
+ addr = tcg_temp_new();
156
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
157
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
158
+ val = tcg_temp_new_i128();
159
+ rl = tcg_temp_new_i64();
160
+ rh = tcg_temp_new_i64();
161
+
162
+ tcg_gen_add_tl(addr, src1, src2);
163
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
164
+ tcg_gen_extr_i128_i64(rl, rh, val);
165
+ set_vreg64(rh, a->vd, 1);
166
+ set_vreg64(rl, a->vd, 0);
167
+
168
+ return true;
169
+}
170
+
171
+static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
172
+{
173
+ TCGv addr, src1, src2;
174
+ TCGv_i64 ah, al;
175
+ TCGv_i128 val;
176
+
177
+ CHECK_SXE;
178
+
179
+ addr = tcg_temp_new();
180
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
181
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
182
+ val = tcg_temp_new_i128();
183
+ ah = tcg_temp_new_i64();
184
+ al = tcg_temp_new_i64();
185
+
186
+ tcg_gen_add_tl(addr, src1, src2);
187
+ get_vreg64(ah, a->vd, 1);
188
+ get_vreg64(al, a->vd, 0);
189
+ tcg_gen_concat_i64_i128(val, al, ah);
190
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
191
+
192
+ return true;
193
+}
194
+
195
+#define VLDREPL(NAME, MO) \
196
+static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
197
+{ \
198
+ TCGv addr, temp; \
199
+ TCGv_i64 val; \
200
+ \
201
+ CHECK_SXE; \
202
+ \
203
+ addr = gpr_src(ctx, a->rj, EXT_NONE); \
204
+ val = tcg_temp_new_i64(); \
205
+ \
206
+ if (a->imm) { \
207
+ temp = tcg_temp_new(); \
208
+ tcg_gen_addi_tl(temp, addr, a->imm); \
209
+ addr = temp; \
210
+ } \
211
+ \
212
+ tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \
213
+ tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
214
+ \
215
+ return true; \
216
+}
217
+
218
+VLDREPL(vldrepl_b, MO_8)
219
+VLDREPL(vldrepl_h, MO_16)
220
+VLDREPL(vldrepl_w, MO_32)
221
+VLDREPL(vldrepl_d, MO_64)
222
+
223
+#define VSTELM(NAME, MO, E) \
224
+static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
225
+{ \
226
+ TCGv addr, temp; \
227
+ TCGv_i64 val; \
228
+ \
229
+ CHECK_SXE; \
230
+ \
231
+ addr = gpr_src(ctx, a->rj, EXT_NONE); \
232
+ val = tcg_temp_new_i64(); \
233
+ \
234
+ if (a->imm) { \
235
+ temp = tcg_temp_new(); \
236
+ tcg_gen_addi_tl(temp, addr, a->imm); \
237
+ addr = temp; \
238
+ } \
239
+ \
240
+ tcg_gen_ld_i64(val, cpu_env, \
241
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
242
+ tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \
243
+ \
244
+ return true; \
245
+}
246
+
247
+VSTELM(vstelm_b, MO_8, B)
248
+VSTELM(vstelm_h, MO_16, H)
249
+VSTELM(vstelm_w, MO_32, W)
250
+VSTELM(vstelm_d, MO_64, D)
251
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/loongarch/insns.decode
254
+++ b/target/loongarch/insns.decode
255
@@ -XXX,XX +XXX,XX @@ ertn 0000 01100100 10000 01110 00000 00000 @empty
256
idle 0000 01100100 10001 ............... @i15
257
dbcl 0000 00000010 10101 ............... @i15
258
259
+#
260
+# LSX Fields
261
+#
262
+
263
+%i9s3 10:s9 !function=shl_3
264
+%i10s2 10:s10 !function=shl_2
265
+%i11s1 10:s11 !function=shl_1
266
+%i8s3 10:s8 !function=shl_3
267
+%i8s2 10:s8 !function=shl_2
268
+%i8s1 10:s8 !function=shl_1
269
+
270
#
271
# LSX Argument sets
272
#
273
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
274
&rv_i rd vj imm
275
&vr vd rj
276
&vvr vd vj rk
277
+&vrr vd rj rk
278
+&vr_ii vd rj imm imm2
279
280
#
281
# LSX Formats
282
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
283
@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i
284
@vr .... ........ ..... ..... rj:5 vd:5 &vr
285
@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr
286
+@vr_i9 .... ........ . ......... rj:5 vd:5 &vr_i imm=%i9s3
287
+@vr_i10 .... ........ .......... rj:5 vd:5 &vr_i imm=%i10s2
288
+@vr_i11 .... ....... ........... rj:5 vd:5 &vr_i imm=%i11s1
289
+@vr_i12 .... ...... imm:s12 rj:5 vd:5 &vr_i
290
+@vr_i8i1 .... ........ . imm2:1 ........ rj:5 vd:5 &vr_ii imm=%i8s3
291
+@vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2
292
+@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1
293
+@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii
294
+@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr
295
296
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
297
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
298
@@ -XXX,XX +XXX,XX @@ vextrins_d 0111 00111000 00 ........ ..... ..... @vv_ui8
299
vextrins_w 0111 00111000 01 ........ ..... ..... @vv_ui8
300
vextrins_h 0111 00111000 10 ........ ..... ..... @vv_ui8
301
vextrins_b 0111 00111000 11 ........ ..... ..... @vv_ui8
302
+
303
+vld 0010 110000 ............ ..... ..... @vr_i12
304
+vst 0010 110001 ............ ..... ..... @vr_i12
305
+vldx 0011 10000100 00000 ..... ..... ..... @vrr
306
+vstx 0011 10000100 01000 ..... ..... ..... @vrr
307
+
308
+vldrepl_d 0011 00000001 0 ......... ..... ..... @vr_i9
309
+vldrepl_w 0011 00000010 .......... ..... ..... @vr_i10
310
+vldrepl_h 0011 0000010 ........... ..... ..... @vr_i11
311
+vldrepl_b 0011 000010 ............ ..... ..... @vr_i12
312
+vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1
313
+vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2
314
+vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3
315
+vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
316
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
317
index XXXXXXX..XXXXXXX 100644
318
--- a/target/loongarch/translate.c
319
+++ b/target/loongarch/translate.c
320
@@ -XXX,XX +XXX,XX @@ static inline int plus_1(DisasContext *ctx, int x)
321
return x + 1;
322
}
323
324
+static inline int shl_1(DisasContext *ctx, int x)
325
+{
326
+ return x << 1;
327
+}
328
+
329
static inline int shl_2(DisasContext *ctx, int x)
330
{
331
return x << 2;
332
}
333
334
+static inline int shl_3(DisasContext *ctx, int x)
335
+{
336
+ return x << 3;
337
+}
338
+
339
/*
340
* LoongArch the upper 32 bits are undefined ("can be any value").
341
* QEMU chooses to nanbox, because it is most likely to show guest bugs early.
342
--
178
--
343
2.31.1
179
2.43.5
diff view generated by jsdifflib
1
This patch includes:
1
There is arch_id and CPUState pointer in IPICore object. With function
2
- VADDI.{B/H/W/D}U;
2
cpu_by_arch_id() it can be implemented by parsing IPICore array inside,
3
- VSUBI.{B/H/W/D}U.
3
rather than possible_cpus array.
4
4
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
7
Message-Id: <20230504122810.4094787-6-gaosong@loongson.cn>
8
---
7
---
9
target/loongarch/disas.c | 14 ++++++++
8
hw/intc/loongarch_ipi.c | 36 +++++++++++-------------------------
10
target/loongarch/insn_trans/trans_lsx.c.inc | 37 +++++++++++++++++++++
9
1 file changed, 11 insertions(+), 25 deletions(-)
11
target/loongarch/insns.decode | 11 ++++++
12
3 files changed, 62 insertions(+)
13
10
14
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
11
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/disas.c
13
--- a/hw/intc/loongarch_ipi.c
17
+++ b/target/loongarch/disas.c
14
+++ b/hw/intc/loongarch_ipi.c
18
@@ -XXX,XX +XXX,XX @@ static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic)
15
@@ -XXX,XX +XXX,XX @@ static AddressSpace *get_iocsr_as(CPUState *cpu)
19
output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
16
return LOONGARCH_CPU(cpu)->env.address_space_iocsr;
20
}
17
}
21
18
22
+static void output_vv_i(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
19
-static int archid_cmp(const void *a, const void *b)
23
+{
20
+static int loongarch_ipi_cmp(const void *a, const void *b)
24
+ output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm);
21
{
25
+}
22
- CPUArchId *archid_a = (CPUArchId *)a;
26
+
23
- CPUArchId *archid_b = (CPUArchId *)b;
27
INSN_LSX(vadd_b, vvv)
24
+ IPICore *ipi_a = (IPICore *)a;
28
INSN_LSX(vadd_h, vvv)
25
+ IPICore *ipi_b = (IPICore *)b;
29
INSN_LSX(vadd_w, vvv)
26
30
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsub_h, vvv)
27
- return archid_a->arch_id - archid_b->arch_id;
31
INSN_LSX(vsub_w, vvv)
28
-}
32
INSN_LSX(vsub_d, vvv)
29
-
33
INSN_LSX(vsub_q, vvv)
30
-static CPUArchId *find_cpu_by_archid(MachineState *ms, uint32_t id)
34
+
31
-{
35
+INSN_LSX(vaddi_bu, vv_i)
32
- CPUArchId apic_id, *found_cpu;
36
+INSN_LSX(vaddi_hu, vv_i)
33
-
37
+INSN_LSX(vaddi_wu, vv_i)
34
- apic_id.arch_id = id;
38
+INSN_LSX(vaddi_du, vv_i)
35
- found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus,
39
+INSN_LSX(vsubi_bu, vv_i)
36
- ms->possible_cpus->len,
40
+INSN_LSX(vsubi_hu, vv_i)
37
- sizeof(*ms->possible_cpus->cpus),
41
+INSN_LSX(vsubi_wu, vv_i)
38
- archid_cmp);
42
+INSN_LSX(vsubi_du, vv_i)
39
-
43
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
40
- return found_cpu;
44
index XXXXXXX..XXXXXXX 100644
41
+ return ipi_a->arch_id - ipi_b->arch_id;
45
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
46
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
47
@@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
48
return true;
49
}
42
}
50
43
51
+static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
44
static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics,
52
+ void (*func)(unsigned, uint32_t, uint32_t,
45
int64_t arch_id, int *index, CPUState **pcs)
53
+ int64_t, uint32_t, uint32_t))
46
{
54
+{
47
- MachineState *machine = MACHINE(qdev_get_machine());
55
+ uint32_t vd_ofs, vj_ofs;
48
- CPUArchId *archid;
56
+
49
- CPUState *cs;
57
+ CHECK_SXE;
50
+ IPICore ipi, *found;
58
+
51
59
+ vd_ofs = vec_full_offset(a->vd);
52
- archid = find_cpu_by_archid(machine, arch_id);
60
+ vj_ofs = vec_full_offset(a->vj);
53
- if (archid && archid->cpu) {
61
+
54
- cs = archid->cpu;
62
+ func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
55
+ ipi.arch_id = arch_id;
63
+ return true;
56
+ found = bsearch(&ipi, lics->cpu, lics->num_cpu, sizeof(IPICore),
64
+}
57
+ loongarch_ipi_cmp);
65
+
58
+ if (found && found->cpu) {
66
+static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
59
if (index) {
67
+{
60
- *index = cs->cpu_index;
68
+ uint32_t vd_ofs, vj_ofs;
61
+ *index = found - lics->cpu;
69
+
62
}
70
+ CHECK_SXE;
63
71
+
64
if (pcs) {
72
+ vd_ofs = vec_full_offset(a->vd);
65
- *pcs = cs;
73
+ vj_ofs = vec_full_offset(a->vj);
66
+ *pcs = found->cpu;
74
+
67
}
75
+ tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
68
76
+ return true;
69
return MEMTX_OK;
77
+}
78
+
79
TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add)
80
TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add)
81
TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add)
82
@@ -XXX,XX +XXX,XX @@ TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub)
83
TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub)
84
TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub)
85
TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub)
86
+
87
+TRANS(vaddi_bu, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
88
+TRANS(vaddi_hu, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
89
+TRANS(vaddi_wu, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
90
+TRANS(vaddi_du, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
91
+TRANS(vsubi_bu, gvec_subi, MO_8)
92
+TRANS(vsubi_hu, gvec_subi, MO_16)
93
+TRANS(vsubi_wu, gvec_subi, MO_32)
94
+TRANS(vsubi_du, gvec_subi, MO_64)
95
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
96
index XXXXXXX..XXXXXXX 100644
97
--- a/target/loongarch/insns.decode
98
+++ b/target/loongarch/insns.decode
99
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
100
#
101
102
&vvv vd vj vk
103
+&vv_i vd vj imm
104
105
#
106
# LSX Formats
107
#
108
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
109
+@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
110
111
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
112
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
113
@@ -XXX,XX +XXX,XX @@ vsub_h 0111 00000000 11001 ..... ..... ..... @vvv
114
vsub_w 0111 00000000 11010 ..... ..... ..... @vvv
115
vsub_d 0111 00000000 11011 ..... ..... ..... @vvv
116
vsub_q 0111 00010010 11011 ..... ..... ..... @vvv
117
+
118
+vaddi_bu 0111 00101000 10100 ..... ..... ..... @vv_ui5
119
+vaddi_hu 0111 00101000 10101 ..... ..... ..... @vv_ui5
120
+vaddi_wu 0111 00101000 10110 ..... ..... ..... @vv_ui5
121
+vaddi_du 0111 00101000 10111 ..... ..... ..... @vv_ui5
122
+vsubi_bu 0111 00101000 11000 ..... ..... ..... @vv_ui5
123
+vsubi_hu 0111 00101000 11001 ..... ..... ..... @vv_ui5
124
+vsubi_wu 0111 00101000 11010 ..... ..... ..... @vv_ui5
125
+vsubi_du 0111 00101000 11011 ..... ..... ..... @vv_ui5
126
--
70
--
127
2.31.1
71
2.43.5
diff view generated by jsdifflib
Deleted patch
1
This patch includes;
2
- VNEG.{B/H/W/D}.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-7-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 10 ++++++++++
9
target/loongarch/insn_trans/trans_lsx.c.inc | 20 ++++++++++++++++++++
10
target/loongarch/insns.decode | 7 +++++++
11
3 files changed, 37 insertions(+)
12
13
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/disas.c
16
+++ b/target/loongarch/disas.c
17
@@ -XXX,XX +XXX,XX @@ static void output_vv_i(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
18
output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm);
19
}
20
21
+static void output_vv(DisasContext *ctx, arg_vv *a, const char *mnemonic)
22
+{
23
+ output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj);
24
+}
25
+
26
INSN_LSX(vadd_b, vvv)
27
INSN_LSX(vadd_h, vvv)
28
INSN_LSX(vadd_w, vvv)
29
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsubi_bu, vv_i)
30
INSN_LSX(vsubi_hu, vv_i)
31
INSN_LSX(vsubi_wu, vv_i)
32
INSN_LSX(vsubi_du, vv_i)
33
+
34
+INSN_LSX(vneg_b, vv)
35
+INSN_LSX(vneg_h, vv)
36
+INSN_LSX(vneg_w, vv)
37
+INSN_LSX(vneg_d, vv)
38
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
41
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
42
@@ -XXX,XX +XXX,XX @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
43
return true;
44
}
45
46
+static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
47
+ void (*func)(unsigned, uint32_t, uint32_t,
48
+ uint32_t, uint32_t))
49
+{
50
+ uint32_t vd_ofs, vj_ofs;
51
+
52
+ CHECK_SXE;
53
+
54
+ vd_ofs = vec_full_offset(a->vd);
55
+ vj_ofs = vec_full_offset(a->vj);
56
+
57
+ func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
58
+ return true;
59
+}
60
+
61
static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
62
void (*func)(unsigned, uint32_t, uint32_t,
63
int64_t, uint32_t, uint32_t))
64
@@ -XXX,XX +XXX,XX @@ TRANS(vsubi_bu, gvec_subi, MO_8)
65
TRANS(vsubi_hu, gvec_subi, MO_16)
66
TRANS(vsubi_wu, gvec_subi, MO_32)
67
TRANS(vsubi_du, gvec_subi, MO_64)
68
+
69
+TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg)
70
+TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg)
71
+TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg)
72
+TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg)
73
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
74
index XXXXXXX..XXXXXXX 100644
75
--- a/target/loongarch/insns.decode
76
+++ b/target/loongarch/insns.decode
77
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
78
# LSX Argument sets
79
#
80
81
+&vv vd vj
82
&vvv vd vj vk
83
&vv_i vd vj imm
84
85
#
86
# LSX Formats
87
#
88
+@vv .... ........ ..... ..... vj:5 vd:5 &vv
89
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
90
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
91
92
@@ -XXX,XX +XXX,XX @@ vsubi_bu 0111 00101000 11000 ..... ..... ..... @vv_ui5
93
vsubi_hu 0111 00101000 11001 ..... ..... ..... @vv_ui5
94
vsubi_wu 0111 00101000 11010 ..... ..... ..... @vv_ui5
95
vsubi_du 0111 00101000 11011 ..... ..... ..... @vv_ui5
96
+
97
+vneg_b 0111 00101001 11000 01100 ..... ..... @vv
98
+vneg_h 0111 00101001 11000 01101 ..... ..... @vv
99
+vneg_w 0111 00101001 11000 01110 ..... ..... @vv
100
+vneg_d 0111 00101001 11000 01111 ..... ..... @vv
101
--
102
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSADD.{B/H/W/D}[U];
3
- VSSUB.{B/H/W/D}[U].
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-8-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 17 +++++++++++++++++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 17 +++++++++++++++++
11
target/loongarch/insns.decode | 17 +++++++++++++++++
12
3 files changed, 51 insertions(+)
13
14
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/loongarch/disas.c
17
+++ b/target/loongarch/disas.c
18
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vneg_b, vv)
19
INSN_LSX(vneg_h, vv)
20
INSN_LSX(vneg_w, vv)
21
INSN_LSX(vneg_d, vv)
22
+
23
+INSN_LSX(vsadd_b, vvv)
24
+INSN_LSX(vsadd_h, vvv)
25
+INSN_LSX(vsadd_w, vvv)
26
+INSN_LSX(vsadd_d, vvv)
27
+INSN_LSX(vsadd_bu, vvv)
28
+INSN_LSX(vsadd_hu, vvv)
29
+INSN_LSX(vsadd_wu, vvv)
30
+INSN_LSX(vsadd_du, vvv)
31
+INSN_LSX(vssub_b, vvv)
32
+INSN_LSX(vssub_h, vvv)
33
+INSN_LSX(vssub_w, vvv)
34
+INSN_LSX(vssub_d, vvv)
35
+INSN_LSX(vssub_bu, vvv)
36
+INSN_LSX(vssub_hu, vvv)
37
+INSN_LSX(vssub_wu, vvv)
38
+INSN_LSX(vssub_du, vvv)
39
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
42
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
43
@@ -XXX,XX +XXX,XX @@ TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg)
44
TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg)
45
TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg)
46
TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg)
47
+
48
+TRANS(vsadd_b, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
49
+TRANS(vsadd_h, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
50
+TRANS(vsadd_w, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
51
+TRANS(vsadd_d, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
52
+TRANS(vsadd_bu, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
53
+TRANS(vsadd_hu, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
54
+TRANS(vsadd_wu, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
55
+TRANS(vsadd_du, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
56
+TRANS(vssub_b, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
57
+TRANS(vssub_h, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
58
+TRANS(vssub_w, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
59
+TRANS(vssub_d, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
60
+TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
61
+TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
62
+TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
63
+TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
64
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/loongarch/insns.decode
67
+++ b/target/loongarch/insns.decode
68
@@ -XXX,XX +XXX,XX @@ vneg_b 0111 00101001 11000 01100 ..... ..... @vv
69
vneg_h 0111 00101001 11000 01101 ..... ..... @vv
70
vneg_w 0111 00101001 11000 01110 ..... ..... @vv
71
vneg_d 0111 00101001 11000 01111 ..... ..... @vv
72
+
73
+vsadd_b 0111 00000100 01100 ..... ..... ..... @vvv
74
+vsadd_h 0111 00000100 01101 ..... ..... ..... @vvv
75
+vsadd_w 0111 00000100 01110 ..... ..... ..... @vvv
76
+vsadd_d 0111 00000100 01111 ..... ..... ..... @vvv
77
+vsadd_bu 0111 00000100 10100 ..... ..... ..... @vvv
78
+vsadd_hu 0111 00000100 10101 ..... ..... ..... @vvv
79
+vsadd_wu 0111 00000100 10110 ..... ..... ..... @vvv
80
+vsadd_du 0111 00000100 10111 ..... ..... ..... @vvv
81
+vssub_b 0111 00000100 10000 ..... ..... ..... @vvv
82
+vssub_h 0111 00000100 10001 ..... ..... ..... @vvv
83
+vssub_w 0111 00000100 10010 ..... ..... ..... @vvv
84
+vssub_d 0111 00000100 10011 ..... ..... ..... @vvv
85
+vssub_bu 0111 00000100 11000 ..... ..... ..... @vvv
86
+vssub_hu 0111 00000100 11001 ..... ..... ..... @vvv
87
+vssub_wu 0111 00000100 11010 ..... ..... ..... @vvv
88
+vssub_du 0111 00000100 11011 ..... ..... ..... @vvv
89
--
90
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU};
3
- VHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-9-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 17 +++++
10
target/loongarch/helper.h | 18 +++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 17 +++++
12
target/loongarch/insns.decode | 17 +++++
13
target/loongarch/lsx_helper.c | 81 +++++++++++++++++++++
14
5 files changed, 150 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vssub_bu, vvv)
21
INSN_LSX(vssub_hu, vvv)
22
INSN_LSX(vssub_wu, vvv)
23
INSN_LSX(vssub_du, vvv)
24
+
25
+INSN_LSX(vhaddw_h_b, vvv)
26
+INSN_LSX(vhaddw_w_h, vvv)
27
+INSN_LSX(vhaddw_d_w, vvv)
28
+INSN_LSX(vhaddw_q_d, vvv)
29
+INSN_LSX(vhaddw_hu_bu, vvv)
30
+INSN_LSX(vhaddw_wu_hu, vvv)
31
+INSN_LSX(vhaddw_du_wu, vvv)
32
+INSN_LSX(vhaddw_qu_du, vvv)
33
+INSN_LSX(vhsubw_h_b, vvv)
34
+INSN_LSX(vhsubw_w_h, vvv)
35
+INSN_LSX(vhsubw_d_w, vvv)
36
+INSN_LSX(vhsubw_q_d, vvv)
37
+INSN_LSX(vhsubw_hu_bu, vvv)
38
+INSN_LSX(vhsubw_wu_hu, vvv)
39
+INSN_LSX(vhsubw_du_wu, vvv)
40
+INSN_LSX(vhsubw_qu_du, vvv)
41
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/helper.h
44
+++ b/target/loongarch/helper.h
45
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(ldpte, void, env, tl, tl, i32)
46
DEF_HELPER_1(ertn, void, env)
47
DEF_HELPER_1(idle, void, env)
48
#endif
49
+
50
+/* LoongArch LSX */
51
+DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32)
52
+DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32)
53
+DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32)
54
+DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32)
55
+DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32)
56
+DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32)
57
+DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32)
58
+DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32)
59
+DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32)
60
+DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32)
61
+DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
66
+DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
67
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
70
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
71
@@ -XXX,XX +XXX,XX @@ TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
72
TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
73
TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
74
TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
75
+
76
+TRANS(vhaddw_h_b, gen_vvv, gen_helper_vhaddw_h_b)
77
+TRANS(vhaddw_w_h, gen_vvv, gen_helper_vhaddw_w_h)
78
+TRANS(vhaddw_d_w, gen_vvv, gen_helper_vhaddw_d_w)
79
+TRANS(vhaddw_q_d, gen_vvv, gen_helper_vhaddw_q_d)
80
+TRANS(vhaddw_hu_bu, gen_vvv, gen_helper_vhaddw_hu_bu)
81
+TRANS(vhaddw_wu_hu, gen_vvv, gen_helper_vhaddw_wu_hu)
82
+TRANS(vhaddw_du_wu, gen_vvv, gen_helper_vhaddw_du_wu)
83
+TRANS(vhaddw_qu_du, gen_vvv, gen_helper_vhaddw_qu_du)
84
+TRANS(vhsubw_h_b, gen_vvv, gen_helper_vhsubw_h_b)
85
+TRANS(vhsubw_w_h, gen_vvv, gen_helper_vhsubw_w_h)
86
+TRANS(vhsubw_d_w, gen_vvv, gen_helper_vhsubw_d_w)
87
+TRANS(vhsubw_q_d, gen_vvv, gen_helper_vhsubw_q_d)
88
+TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu)
89
+TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu)
90
+TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu)
91
+TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du)
92
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/loongarch/insns.decode
95
+++ b/target/loongarch/insns.decode
96
@@ -XXX,XX +XXX,XX @@ vssub_bu 0111 00000100 11000 ..... ..... ..... @vvv
97
vssub_hu 0111 00000100 11001 ..... ..... ..... @vvv
98
vssub_wu 0111 00000100 11010 ..... ..... ..... @vvv
99
vssub_du 0111 00000100 11011 ..... ..... ..... @vvv
100
+
101
+vhaddw_h_b 0111 00000101 01000 ..... ..... ..... @vvv
102
+vhaddw_w_h 0111 00000101 01001 ..... ..... ..... @vvv
103
+vhaddw_d_w 0111 00000101 01010 ..... ..... ..... @vvv
104
+vhaddw_q_d 0111 00000101 01011 ..... ..... ..... @vvv
105
+vhaddw_hu_bu 0111 00000101 10000 ..... ..... ..... @vvv
106
+vhaddw_wu_hu 0111 00000101 10001 ..... ..... ..... @vvv
107
+vhaddw_du_wu 0111 00000101 10010 ..... ..... ..... @vvv
108
+vhaddw_qu_du 0111 00000101 10011 ..... ..... ..... @vvv
109
+vhsubw_h_b 0111 00000101 01100 ..... ..... ..... @vvv
110
+vhsubw_w_h 0111 00000101 01101 ..... ..... ..... @vvv
111
+vhsubw_d_w 0111 00000101 01110 ..... ..... ..... @vvv
112
+vhsubw_q_d 0111 00000101 01111 ..... ..... ..... @vvv
113
+vhsubw_hu_bu 0111 00000101 10100 ..... ..... ..... @vvv
114
+vhsubw_wu_hu 0111 00000101 10101 ..... ..... ..... @vvv
115
+vhsubw_du_wu 0111 00000101 10110 ..... ..... ..... @vvv
116
+vhsubw_qu_du 0111 00000101 10111 ..... ..... ..... @vvv
117
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/loongarch/lsx_helper.c
120
+++ b/target/loongarch/lsx_helper.c
121
@@ -XXX,XX +XXX,XX @@
122
*
123
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
124
*/
125
+
126
+#include "qemu/osdep.h"
127
+#include "cpu.h"
128
+#include "exec/exec-all.h"
129
+#include "exec/helper-proto.h"
130
+
131
+#define DO_ADD(a, b) (a + b)
132
+#define DO_SUB(a, b) (a - b)
133
+
134
+#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
135
+void HELPER(NAME)(CPULoongArchState *env, \
136
+ uint32_t vd, uint32_t vj, uint32_t vk) \
137
+{ \
138
+ int i; \
139
+ VReg *Vd = &(env->fpr[vd].vreg); \
140
+ VReg *Vj = &(env->fpr[vj].vreg); \
141
+ VReg *Vk = &(env->fpr[vk].vreg); \
142
+ typedef __typeof(Vd->E1(0)) TD; \
143
+ \
144
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
145
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
146
+ } \
147
+}
148
+
149
+DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
150
+DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
151
+DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
152
+
153
+void HELPER(vhaddw_q_d)(CPULoongArchState *env,
154
+ uint32_t vd, uint32_t vj, uint32_t vk)
155
+{
156
+ VReg *Vd = &(env->fpr[vd].vreg);
157
+ VReg *Vj = &(env->fpr[vj].vreg);
158
+ VReg *Vk = &(env->fpr[vk].vreg);
159
+
160
+ Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
161
+}
162
+
163
+DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
164
+DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
165
+DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
166
+
167
+void HELPER(vhsubw_q_d)(CPULoongArchState *env,
168
+ uint32_t vd, uint32_t vj, uint32_t vk)
169
+{
170
+ VReg *Vd = &(env->fpr[vd].vreg);
171
+ VReg *Vj = &(env->fpr[vj].vreg);
172
+ VReg *Vk = &(env->fpr[vk].vreg);
173
+
174
+ Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
175
+}
176
+
177
+DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
178
+DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
179
+DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
180
+
181
+void HELPER(vhaddw_qu_du)(CPULoongArchState *env,
182
+ uint32_t vd, uint32_t vj, uint32_t vk)
183
+{
184
+ VReg *Vd = &(env->fpr[vd].vreg);
185
+ VReg *Vj = &(env->fpr[vj].vreg);
186
+ VReg *Vk = &(env->fpr[vk].vreg);
187
+
188
+ Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
189
+ int128_make64((uint64_t)Vk->D(0)));
190
+}
191
+
192
+DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
193
+DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
194
+DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
195
+
196
+void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
197
+ uint32_t vd, uint32_t vj, uint32_t vk)
198
+{
199
+ VReg *Vd = &(env->fpr[vd].vreg);
200
+ VReg *Vj = &(env->fpr[vj].vreg);
201
+ VReg *Vk = &(env->fpr[vk].vreg);
202
+
203
+ Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
204
+ int128_make64((uint64_t)Vk->D(0)));
205
+}
206
--
207
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
3
- VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
4
- VADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-10-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 43 ++
11
target/loongarch/helper.h | 45 ++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 795 ++++++++++++++++++++
13
target/loongarch/insns.decode | 43 ++
14
target/loongarch/lsx_helper.c | 190 +++++
15
5 files changed, 1116 insertions(+)
16
17
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/disas.c
20
+++ b/target/loongarch/disas.c
21
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vhsubw_hu_bu, vvv)
22
INSN_LSX(vhsubw_wu_hu, vvv)
23
INSN_LSX(vhsubw_du_wu, vvv)
24
INSN_LSX(vhsubw_qu_du, vvv)
25
+
26
+INSN_LSX(vaddwev_h_b, vvv)
27
+INSN_LSX(vaddwev_w_h, vvv)
28
+INSN_LSX(vaddwev_d_w, vvv)
29
+INSN_LSX(vaddwev_q_d, vvv)
30
+INSN_LSX(vaddwod_h_b, vvv)
31
+INSN_LSX(vaddwod_w_h, vvv)
32
+INSN_LSX(vaddwod_d_w, vvv)
33
+INSN_LSX(vaddwod_q_d, vvv)
34
+INSN_LSX(vsubwev_h_b, vvv)
35
+INSN_LSX(vsubwev_w_h, vvv)
36
+INSN_LSX(vsubwev_d_w, vvv)
37
+INSN_LSX(vsubwev_q_d, vvv)
38
+INSN_LSX(vsubwod_h_b, vvv)
39
+INSN_LSX(vsubwod_w_h, vvv)
40
+INSN_LSX(vsubwod_d_w, vvv)
41
+INSN_LSX(vsubwod_q_d, vvv)
42
+
43
+INSN_LSX(vaddwev_h_bu, vvv)
44
+INSN_LSX(vaddwev_w_hu, vvv)
45
+INSN_LSX(vaddwev_d_wu, vvv)
46
+INSN_LSX(vaddwev_q_du, vvv)
47
+INSN_LSX(vaddwod_h_bu, vvv)
48
+INSN_LSX(vaddwod_w_hu, vvv)
49
+INSN_LSX(vaddwod_d_wu, vvv)
50
+INSN_LSX(vaddwod_q_du, vvv)
51
+INSN_LSX(vsubwev_h_bu, vvv)
52
+INSN_LSX(vsubwev_w_hu, vvv)
53
+INSN_LSX(vsubwev_d_wu, vvv)
54
+INSN_LSX(vsubwev_q_du, vvv)
55
+INSN_LSX(vsubwod_h_bu, vvv)
56
+INSN_LSX(vsubwod_w_hu, vvv)
57
+INSN_LSX(vsubwod_d_wu, vvv)
58
+INSN_LSX(vsubwod_q_du, vvv)
59
+
60
+INSN_LSX(vaddwev_h_bu_b, vvv)
61
+INSN_LSX(vaddwev_w_hu_h, vvv)
62
+INSN_LSX(vaddwev_d_wu_w, vvv)
63
+INSN_LSX(vaddwev_q_du_d, vvv)
64
+INSN_LSX(vaddwod_h_bu_b, vvv)
65
+INSN_LSX(vaddwod_w_hu_h, vvv)
66
+INSN_LSX(vaddwod_d_wu_w, vvv)
67
+INSN_LSX(vaddwod_q_du_d, vvv)
68
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/helper.h
71
+++ b/target/loongarch/helper.h
72
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
73
DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
74
DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
75
DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
76
+
77
+DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
78
+DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
79
+DEF_HELPER_FLAGS_4(vaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
80
+DEF_HELPER_FLAGS_4(vaddwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
81
+DEF_HELPER_FLAGS_4(vaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
82
+DEF_HELPER_FLAGS_4(vaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
83
+DEF_HELPER_FLAGS_4(vaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
84
+DEF_HELPER_FLAGS_4(vaddwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
85
+
86
+DEF_HELPER_FLAGS_4(vsubwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
87
+DEF_HELPER_FLAGS_4(vsubwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
88
+DEF_HELPER_FLAGS_4(vsubwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
89
+DEF_HELPER_FLAGS_4(vsubwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
90
+DEF_HELPER_FLAGS_4(vsubwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
91
+DEF_HELPER_FLAGS_4(vsubwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
92
+DEF_HELPER_FLAGS_4(vsubwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
93
+DEF_HELPER_FLAGS_4(vsubwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
94
+
95
+DEF_HELPER_FLAGS_4(vaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
96
+DEF_HELPER_FLAGS_4(vaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
97
+DEF_HELPER_FLAGS_4(vaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
98
+DEF_HELPER_FLAGS_4(vaddwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
99
+DEF_HELPER_FLAGS_4(vaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
100
+DEF_HELPER_FLAGS_4(vaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
101
+DEF_HELPER_FLAGS_4(vaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
102
+DEF_HELPER_FLAGS_4(vaddwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
103
+
104
+DEF_HELPER_FLAGS_4(vsubwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
105
+DEF_HELPER_FLAGS_4(vsubwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
106
+DEF_HELPER_FLAGS_4(vsubwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
107
+DEF_HELPER_FLAGS_4(vsubwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
108
+DEF_HELPER_FLAGS_4(vsubwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
109
+DEF_HELPER_FLAGS_4(vsubwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
110
+DEF_HELPER_FLAGS_4(vsubwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
111
+DEF_HELPER_FLAGS_4(vsubwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
112
+
113
+DEF_HELPER_FLAGS_4(vaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
114
+DEF_HELPER_FLAGS_4(vaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
115
+DEF_HELPER_FLAGS_4(vaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
116
+DEF_HELPER_FLAGS_4(vaddwev_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
117
+DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
118
+DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
119
+DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
120
+DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
121
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
124
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
125
@@ -XXX,XX +XXX,XX @@ TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu)
126
TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu)
127
TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu)
128
TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du)
129
+
130
+static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
131
+{
132
+ TCGv_vec t1, t2;
133
+
134
+ int halfbits = 4 << vece;
135
+
136
+ t1 = tcg_temp_new_vec_matching(a);
137
+ t2 = tcg_temp_new_vec_matching(b);
138
+
139
+ /* Sign-extend the even elements from a */
140
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
141
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
142
+
143
+ /* Sign-extend the even elements from b */
144
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
145
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
146
+
147
+ tcg_gen_add_vec(vece, t, t1, t2);
148
+}
149
+
150
+static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
151
+{
152
+ TCGv_i32 t1, t2;
153
+
154
+ t1 = tcg_temp_new_i32();
155
+ t2 = tcg_temp_new_i32();
156
+ tcg_gen_ext16s_i32(t1, a);
157
+ tcg_gen_ext16s_i32(t2, b);
158
+ tcg_gen_add_i32(t, t1, t2);
159
+}
160
+
161
+static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
162
+{
163
+ TCGv_i64 t1, t2;
164
+
165
+ t1 = tcg_temp_new_i64();
166
+ t2 = tcg_temp_new_i64();
167
+ tcg_gen_ext32s_i64(t1, a);
168
+ tcg_gen_ext32s_i64(t2, b);
169
+ tcg_gen_add_i64(t, t1, t2);
170
+}
171
+
172
+static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
173
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
174
+{
175
+ static const TCGOpcode vecop_list[] = {
176
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
177
+ };
178
+ static const GVecGen3 op[4] = {
179
+ {
180
+ .fniv = gen_vaddwev_s,
181
+ .fno = gen_helper_vaddwev_h_b,
182
+ .opt_opc = vecop_list,
183
+ .vece = MO_16
184
+ },
185
+ {
186
+ .fni4 = gen_vaddwev_w_h,
187
+ .fniv = gen_vaddwev_s,
188
+ .fno = gen_helper_vaddwev_w_h,
189
+ .opt_opc = vecop_list,
190
+ .vece = MO_32
191
+ },
192
+ {
193
+ .fni8 = gen_vaddwev_d_w,
194
+ .fniv = gen_vaddwev_s,
195
+ .fno = gen_helper_vaddwev_d_w,
196
+ .opt_opc = vecop_list,
197
+ .vece = MO_64
198
+ },
199
+ {
200
+ .fno = gen_helper_vaddwev_q_d,
201
+ .vece = MO_128
202
+ },
203
+ };
204
+
205
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
206
+}
207
+
208
+TRANS(vaddwev_h_b, gvec_vvv, MO_8, do_vaddwev_s)
209
+TRANS(vaddwev_w_h, gvec_vvv, MO_16, do_vaddwev_s)
210
+TRANS(vaddwev_d_w, gvec_vvv, MO_32, do_vaddwev_s)
211
+TRANS(vaddwev_q_d, gvec_vvv, MO_64, do_vaddwev_s)
212
+
213
+static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
214
+{
215
+ TCGv_i32 t1, t2;
216
+
217
+ t1 = tcg_temp_new_i32();
218
+ t2 = tcg_temp_new_i32();
219
+ tcg_gen_sari_i32(t1, a, 16);
220
+ tcg_gen_sari_i32(t2, b, 16);
221
+ tcg_gen_add_i32(t, t1, t2);
222
+}
223
+
224
+static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
225
+{
226
+ TCGv_i64 t1, t2;
227
+
228
+ t1 = tcg_temp_new_i64();
229
+ t2 = tcg_temp_new_i64();
230
+ tcg_gen_sari_i64(t1, a, 32);
231
+ tcg_gen_sari_i64(t2, b, 32);
232
+ tcg_gen_add_i64(t, t1, t2);
233
+}
234
+
235
+static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
236
+{
237
+ TCGv_vec t1, t2;
238
+
239
+ int halfbits = 4 << vece;
240
+
241
+ t1 = tcg_temp_new_vec_matching(a);
242
+ t2 = tcg_temp_new_vec_matching(b);
243
+
244
+ /* Sign-extend the odd elements for vector */
245
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
246
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
247
+
248
+ tcg_gen_add_vec(vece, t, t1, t2);
249
+}
250
+
251
+static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
252
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
253
+{
254
+ static const TCGOpcode vecop_list[] = {
255
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
256
+ };
257
+ static const GVecGen3 op[4] = {
258
+ {
259
+ .fniv = gen_vaddwod_s,
260
+ .fno = gen_helper_vaddwod_h_b,
261
+ .opt_opc = vecop_list,
262
+ .vece = MO_16
263
+ },
264
+ {
265
+ .fni4 = gen_vaddwod_w_h,
266
+ .fniv = gen_vaddwod_s,
267
+ .fno = gen_helper_vaddwod_w_h,
268
+ .opt_opc = vecop_list,
269
+ .vece = MO_32
270
+ },
271
+ {
272
+ .fni8 = gen_vaddwod_d_w,
273
+ .fniv = gen_vaddwod_s,
274
+ .fno = gen_helper_vaddwod_d_w,
275
+ .opt_opc = vecop_list,
276
+ .vece = MO_64
277
+ },
278
+ {
279
+ .fno = gen_helper_vaddwod_q_d,
280
+ .vece = MO_128
281
+ },
282
+ };
283
+
284
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
285
+}
286
+
287
+TRANS(vaddwod_h_b, gvec_vvv, MO_8, do_vaddwod_s)
288
+TRANS(vaddwod_w_h, gvec_vvv, MO_16, do_vaddwod_s)
289
+TRANS(vaddwod_d_w, gvec_vvv, MO_32, do_vaddwod_s)
290
+TRANS(vaddwod_q_d, gvec_vvv, MO_64, do_vaddwod_s)
291
+
292
+static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
293
+{
294
+ TCGv_vec t1, t2;
295
+
296
+ int halfbits = 4 << vece;
297
+
298
+ t1 = tcg_temp_new_vec_matching(a);
299
+ t2 = tcg_temp_new_vec_matching(b);
300
+
301
+ /* Sign-extend the even elements from a */
302
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
303
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
304
+
305
+ /* Sign-extend the even elements from b */
306
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
307
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
308
+
309
+ tcg_gen_sub_vec(vece, t, t1, t2);
310
+}
311
+
312
+static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
313
+{
314
+ TCGv_i32 t1, t2;
315
+
316
+ t1 = tcg_temp_new_i32();
317
+ t2 = tcg_temp_new_i32();
318
+ tcg_gen_ext16s_i32(t1, a);
319
+ tcg_gen_ext16s_i32(t2, b);
320
+ tcg_gen_sub_i32(t, t1, t2);
321
+}
322
+
323
+static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
324
+{
325
+ TCGv_i64 t1, t2;
326
+
327
+ t1 = tcg_temp_new_i64();
328
+ t2 = tcg_temp_new_i64();
329
+ tcg_gen_ext32s_i64(t1, a);
330
+ tcg_gen_ext32s_i64(t2, b);
331
+ tcg_gen_sub_i64(t, t1, t2);
332
+}
333
+
334
+static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
335
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
336
+{
337
+ static const TCGOpcode vecop_list[] = {
338
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
339
+ };
340
+ static const GVecGen3 op[4] = {
341
+ {
342
+ .fniv = gen_vsubwev_s,
343
+ .fno = gen_helper_vsubwev_h_b,
344
+ .opt_opc = vecop_list,
345
+ .vece = MO_16
346
+ },
347
+ {
348
+ .fni4 = gen_vsubwev_w_h,
349
+ .fniv = gen_vsubwev_s,
350
+ .fno = gen_helper_vsubwev_w_h,
351
+ .opt_opc = vecop_list,
352
+ .vece = MO_32
353
+ },
354
+ {
355
+ .fni8 = gen_vsubwev_d_w,
356
+ .fniv = gen_vsubwev_s,
357
+ .fno = gen_helper_vsubwev_d_w,
358
+ .opt_opc = vecop_list,
359
+ .vece = MO_64
360
+ },
361
+ {
362
+ .fno = gen_helper_vsubwev_q_d,
363
+ .vece = MO_128
364
+ },
365
+ };
366
+
367
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
368
+}
369
+
370
+TRANS(vsubwev_h_b, gvec_vvv, MO_8, do_vsubwev_s)
371
+TRANS(vsubwev_w_h, gvec_vvv, MO_16, do_vsubwev_s)
372
+TRANS(vsubwev_d_w, gvec_vvv, MO_32, do_vsubwev_s)
373
+TRANS(vsubwev_q_d, gvec_vvv, MO_64, do_vsubwev_s)
374
+
375
+static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
376
+{
377
+ TCGv_vec t1, t2;
378
+
379
+ int halfbits = 4 << vece;
380
+
381
+ t1 = tcg_temp_new_vec_matching(a);
382
+ t2 = tcg_temp_new_vec_matching(b);
383
+
384
+ /* Sign-extend the odd elements for vector */
385
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
386
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
387
+
388
+ tcg_gen_sub_vec(vece, t, t1, t2);
389
+}
390
+
391
+static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
392
+{
393
+ TCGv_i32 t1, t2;
394
+
395
+ t1 = tcg_temp_new_i32();
396
+ t2 = tcg_temp_new_i32();
397
+ tcg_gen_sari_i32(t1, a, 16);
398
+ tcg_gen_sari_i32(t2, b, 16);
399
+ tcg_gen_sub_i32(t, t1, t2);
400
+}
401
+
402
+static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
403
+{
404
+ TCGv_i64 t1, t2;
405
+
406
+ t1 = tcg_temp_new_i64();
407
+ t2 = tcg_temp_new_i64();
408
+ tcg_gen_sari_i64(t1, a, 32);
409
+ tcg_gen_sari_i64(t2, b, 32);
410
+ tcg_gen_sub_i64(t, t1, t2);
411
+}
412
+
413
+static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
414
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
415
+{
416
+ static const TCGOpcode vecop_list[] = {
417
+ INDEX_op_sari_vec, INDEX_op_sub_vec, 0
418
+ };
419
+ static const GVecGen3 op[4] = {
420
+ {
421
+ .fniv = gen_vsubwod_s,
422
+ .fno = gen_helper_vsubwod_h_b,
423
+ .opt_opc = vecop_list,
424
+ .vece = MO_16
425
+ },
426
+ {
427
+ .fni4 = gen_vsubwod_w_h,
428
+ .fniv = gen_vsubwod_s,
429
+ .fno = gen_helper_vsubwod_w_h,
430
+ .opt_opc = vecop_list,
431
+ .vece = MO_32
432
+ },
433
+ {
434
+ .fni8 = gen_vsubwod_d_w,
435
+ .fniv = gen_vsubwod_s,
436
+ .fno = gen_helper_vsubwod_d_w,
437
+ .opt_opc = vecop_list,
438
+ .vece = MO_64
439
+ },
440
+ {
441
+ .fno = gen_helper_vsubwod_q_d,
442
+ .vece = MO_128
443
+ },
444
+ };
445
+
446
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
447
+}
448
+
449
+TRANS(vsubwod_h_b, gvec_vvv, MO_8, do_vsubwod_s)
450
+TRANS(vsubwod_w_h, gvec_vvv, MO_16, do_vsubwod_s)
451
+TRANS(vsubwod_d_w, gvec_vvv, MO_32, do_vsubwod_s)
452
+TRANS(vsubwod_q_d, gvec_vvv, MO_64, do_vsubwod_s)
453
+
454
+static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
455
+{
456
+ TCGv_vec t1, t2, t3;
457
+
458
+ t1 = tcg_temp_new_vec_matching(a);
459
+ t2 = tcg_temp_new_vec_matching(b);
460
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
461
+ tcg_gen_and_vec(vece, t1, a, t3);
462
+ tcg_gen_and_vec(vece, t2, b, t3);
463
+ tcg_gen_add_vec(vece, t, t1, t2);
464
+}
465
+
466
+static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
467
+{
468
+ TCGv_i32 t1, t2;
469
+
470
+ t1 = tcg_temp_new_i32();
471
+ t2 = tcg_temp_new_i32();
472
+ tcg_gen_ext16u_i32(t1, a);
473
+ tcg_gen_ext16u_i32(t2, b);
474
+ tcg_gen_add_i32(t, t1, t2);
475
+}
476
+
477
+static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
478
+{
479
+ TCGv_i64 t1, t2;
480
+
481
+ t1 = tcg_temp_new_i64();
482
+ t2 = tcg_temp_new_i64();
483
+ tcg_gen_ext32u_i64(t1, a);
484
+ tcg_gen_ext32u_i64(t2, b);
485
+ tcg_gen_add_i64(t, t1, t2);
486
+}
487
+
488
+static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
489
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
490
+{
491
+ static const TCGOpcode vecop_list[] = {
492
+ INDEX_op_add_vec, 0
493
+ };
494
+ static const GVecGen3 op[4] = {
495
+ {
496
+ .fniv = gen_vaddwev_u,
497
+ .fno = gen_helper_vaddwev_h_bu,
498
+ .opt_opc = vecop_list,
499
+ .vece = MO_16
500
+ },
501
+ {
502
+ .fni4 = gen_vaddwev_w_hu,
503
+ .fniv = gen_vaddwev_u,
504
+ .fno = gen_helper_vaddwev_w_hu,
505
+ .opt_opc = vecop_list,
506
+ .vece = MO_32
507
+ },
508
+ {
509
+ .fni8 = gen_vaddwev_d_wu,
510
+ .fniv = gen_vaddwev_u,
511
+ .fno = gen_helper_vaddwev_d_wu,
512
+ .opt_opc = vecop_list,
513
+ .vece = MO_64
514
+ },
515
+ {
516
+ .fno = gen_helper_vaddwev_q_du,
517
+ .vece = MO_128
518
+ },
519
+ };
520
+
521
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
522
+}
523
+
524
+TRANS(vaddwev_h_bu, gvec_vvv, MO_8, do_vaddwev_u)
525
+TRANS(vaddwev_w_hu, gvec_vvv, MO_16, do_vaddwev_u)
526
+TRANS(vaddwev_d_wu, gvec_vvv, MO_32, do_vaddwev_u)
527
+TRANS(vaddwev_q_du, gvec_vvv, MO_64, do_vaddwev_u)
528
+
529
+static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
530
+{
531
+ TCGv_vec t1, t2;
532
+
533
+ int halfbits = 4 << vece;
534
+
535
+ t1 = tcg_temp_new_vec_matching(a);
536
+ t2 = tcg_temp_new_vec_matching(b);
537
+
538
+ /* Zero-extend the odd elements for vector */
539
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
540
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
541
+
542
+ tcg_gen_add_vec(vece, t, t1, t2);
543
+}
544
+
545
+static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
546
+{
547
+ TCGv_i32 t1, t2;
548
+
549
+ t1 = tcg_temp_new_i32();
550
+ t2 = tcg_temp_new_i32();
551
+ tcg_gen_shri_i32(t1, a, 16);
552
+ tcg_gen_shri_i32(t2, b, 16);
553
+ tcg_gen_add_i32(t, t1, t2);
554
+}
555
+
556
+static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
557
+{
558
+ TCGv_i64 t1, t2;
559
+
560
+ t1 = tcg_temp_new_i64();
561
+ t2 = tcg_temp_new_i64();
562
+ tcg_gen_shri_i64(t1, a, 32);
563
+ tcg_gen_shri_i64(t2, b, 32);
564
+ tcg_gen_add_i64(t, t1, t2);
565
+}
566
+
567
+static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
568
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
569
+{
570
+ static const TCGOpcode vecop_list[] = {
571
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
572
+ };
573
+ static const GVecGen3 op[4] = {
574
+ {
575
+ .fniv = gen_vaddwod_u,
576
+ .fno = gen_helper_vaddwod_h_bu,
577
+ .opt_opc = vecop_list,
578
+ .vece = MO_16
579
+ },
580
+ {
581
+ .fni4 = gen_vaddwod_w_hu,
582
+ .fniv = gen_vaddwod_u,
583
+ .fno = gen_helper_vaddwod_w_hu,
584
+ .opt_opc = vecop_list,
585
+ .vece = MO_32
586
+ },
587
+ {
588
+ .fni8 = gen_vaddwod_d_wu,
589
+ .fniv = gen_vaddwod_u,
590
+ .fno = gen_helper_vaddwod_d_wu,
591
+ .opt_opc = vecop_list,
592
+ .vece = MO_64
593
+ },
594
+ {
595
+ .fno = gen_helper_vaddwod_q_du,
596
+ .vece = MO_128
597
+ },
598
+ };
599
+
600
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
601
+}
602
+
603
+TRANS(vaddwod_h_bu, gvec_vvv, MO_8, do_vaddwod_u)
604
+TRANS(vaddwod_w_hu, gvec_vvv, MO_16, do_vaddwod_u)
605
+TRANS(vaddwod_d_wu, gvec_vvv, MO_32, do_vaddwod_u)
606
+TRANS(vaddwod_q_du, gvec_vvv, MO_64, do_vaddwod_u)
607
+
608
+static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
609
+{
610
+ TCGv_vec t1, t2, t3;
611
+
612
+ t1 = tcg_temp_new_vec_matching(a);
613
+ t2 = tcg_temp_new_vec_matching(b);
614
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
615
+ tcg_gen_and_vec(vece, t1, a, t3);
616
+ tcg_gen_and_vec(vece, t2, b, t3);
617
+ tcg_gen_sub_vec(vece, t, t1, t2);
618
+}
619
+
620
+static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
621
+{
622
+ TCGv_i32 t1, t2;
623
+
624
+ t1 = tcg_temp_new_i32();
625
+ t2 = tcg_temp_new_i32();
626
+ tcg_gen_ext16u_i32(t1, a);
627
+ tcg_gen_ext16u_i32(t2, b);
628
+ tcg_gen_sub_i32(t, t1, t2);
629
+}
630
+
631
+static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
632
+{
633
+ TCGv_i64 t1, t2;
634
+
635
+ t1 = tcg_temp_new_i64();
636
+ t2 = tcg_temp_new_i64();
637
+ tcg_gen_ext32u_i64(t1, a);
638
+ tcg_gen_ext32u_i64(t2, b);
639
+ tcg_gen_sub_i64(t, t1, t2);
640
+}
641
+
642
+static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
643
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
644
+{
645
+ static const TCGOpcode vecop_list[] = {
646
+ INDEX_op_sub_vec, 0
647
+ };
648
+ static const GVecGen3 op[4] = {
649
+ {
650
+ .fniv = gen_vsubwev_u,
651
+ .fno = gen_helper_vsubwev_h_bu,
652
+ .opt_opc = vecop_list,
653
+ .vece = MO_16
654
+ },
655
+ {
656
+ .fni4 = gen_vsubwev_w_hu,
657
+ .fniv = gen_vsubwev_u,
658
+ .fno = gen_helper_vsubwev_w_hu,
659
+ .opt_opc = vecop_list,
660
+ .vece = MO_32
661
+ },
662
+ {
663
+ .fni8 = gen_vsubwev_d_wu,
664
+ .fniv = gen_vsubwev_u,
665
+ .fno = gen_helper_vsubwev_d_wu,
666
+ .opt_opc = vecop_list,
667
+ .vece = MO_64
668
+ },
669
+ {
670
+ .fno = gen_helper_vsubwev_q_du,
671
+ .vece = MO_128
672
+ },
673
+ };
674
+
675
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
676
+}
677
+
678
+TRANS(vsubwev_h_bu, gvec_vvv, MO_8, do_vsubwev_u)
679
+TRANS(vsubwev_w_hu, gvec_vvv, MO_16, do_vsubwev_u)
680
+TRANS(vsubwev_d_wu, gvec_vvv, MO_32, do_vsubwev_u)
681
+TRANS(vsubwev_q_du, gvec_vvv, MO_64, do_vsubwev_u)
682
+
683
+static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
684
+{
685
+ TCGv_vec t1, t2;
686
+
687
+ int halfbits = 4 << vece;
688
+
689
+ t1 = tcg_temp_new_vec_matching(a);
690
+ t2 = tcg_temp_new_vec_matching(b);
691
+
692
+ /* Zero-extend the odd elements for vector */
693
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
694
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
695
+
696
+ tcg_gen_sub_vec(vece, t, t1, t2);
697
+}
698
+
699
+static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
700
+{
701
+ TCGv_i32 t1, t2;
702
+
703
+ t1 = tcg_temp_new_i32();
704
+ t2 = tcg_temp_new_i32();
705
+ tcg_gen_shri_i32(t1, a, 16);
706
+ tcg_gen_shri_i32(t2, b, 16);
707
+ tcg_gen_sub_i32(t, t1, t2);
708
+}
709
+
710
+static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
711
+{
712
+ TCGv_i64 t1, t2;
713
+
714
+ t1 = tcg_temp_new_i64();
715
+ t2 = tcg_temp_new_i64();
716
+ tcg_gen_shri_i64(t1, a, 32);
717
+ tcg_gen_shri_i64(t2, b, 32);
718
+ tcg_gen_sub_i64(t, t1, t2);
719
+}
720
+
721
+static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
722
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
723
+{
724
+ static const TCGOpcode vecop_list[] = {
725
+ INDEX_op_shri_vec, INDEX_op_sub_vec, 0
726
+ };
727
+ static const GVecGen3 op[4] = {
728
+ {
729
+ .fniv = gen_vsubwod_u,
730
+ .fno = gen_helper_vsubwod_h_bu,
731
+ .opt_opc = vecop_list,
732
+ .vece = MO_16
733
+ },
734
+ {
735
+ .fni4 = gen_vsubwod_w_hu,
736
+ .fniv = gen_vsubwod_u,
737
+ .fno = gen_helper_vsubwod_w_hu,
738
+ .opt_opc = vecop_list,
739
+ .vece = MO_32
740
+ },
741
+ {
742
+ .fni8 = gen_vsubwod_d_wu,
743
+ .fniv = gen_vsubwod_u,
744
+ .fno = gen_helper_vsubwod_d_wu,
745
+ .opt_opc = vecop_list,
746
+ .vece = MO_64
747
+ },
748
+ {
749
+ .fno = gen_helper_vsubwod_q_du,
750
+ .vece = MO_128
751
+ },
752
+ };
753
+
754
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
755
+}
756
+
757
+TRANS(vsubwod_h_bu, gvec_vvv, MO_8, do_vsubwod_u)
758
+TRANS(vsubwod_w_hu, gvec_vvv, MO_16, do_vsubwod_u)
759
+TRANS(vsubwod_d_wu, gvec_vvv, MO_32, do_vsubwod_u)
760
+TRANS(vsubwod_q_du, gvec_vvv, MO_64, do_vsubwod_u)
761
+
762
+static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
763
+{
764
+ TCGv_vec t1, t2, t3;
765
+
766
+ int halfbits = 4 << vece;
767
+
768
+ t1 = tcg_temp_new_vec_matching(a);
769
+ t2 = tcg_temp_new_vec_matching(b);
770
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
771
+
772
+ /* Zero-extend the even elements from a */
773
+ tcg_gen_and_vec(vece, t1, a, t3);
774
+
775
+ /* Sign-extend the even elements from b */
776
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
777
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
778
+
779
+ tcg_gen_add_vec(vece, t, t1, t2);
780
+}
781
+
782
+static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
783
+{
784
+ TCGv_i32 t1, t2;
785
+
786
+ t1 = tcg_temp_new_i32();
787
+ t2 = tcg_temp_new_i32();
788
+ tcg_gen_ext16u_i32(t1, a);
789
+ tcg_gen_ext16s_i32(t2, b);
790
+ tcg_gen_add_i32(t, t1, t2);
791
+}
792
+
793
+static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
794
+{
795
+ TCGv_i64 t1, t2;
796
+
797
+ t1 = tcg_temp_new_i64();
798
+ t2 = tcg_temp_new_i64();
799
+ tcg_gen_ext32u_i64(t1, a);
800
+ tcg_gen_ext32s_i64(t2, b);
801
+ tcg_gen_add_i64(t, t1, t2);
802
+}
803
+
804
+static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
805
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
806
+{
807
+ static const TCGOpcode vecop_list[] = {
808
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
809
+ };
810
+ static const GVecGen3 op[4] = {
811
+ {
812
+ .fniv = gen_vaddwev_u_s,
813
+ .fno = gen_helper_vaddwev_h_bu_b,
814
+ .opt_opc = vecop_list,
815
+ .vece = MO_16
816
+ },
817
+ {
818
+ .fni4 = gen_vaddwev_w_hu_h,
819
+ .fniv = gen_vaddwev_u_s,
820
+ .fno = gen_helper_vaddwev_w_hu_h,
821
+ .opt_opc = vecop_list,
822
+ .vece = MO_32
823
+ },
824
+ {
825
+ .fni8 = gen_vaddwev_d_wu_w,
826
+ .fniv = gen_vaddwev_u_s,
827
+ .fno = gen_helper_vaddwev_d_wu_w,
828
+ .opt_opc = vecop_list,
829
+ .vece = MO_64
830
+ },
831
+ {
832
+ .fno = gen_helper_vaddwev_q_du_d,
833
+ .vece = MO_128
834
+ },
835
+ };
836
+
837
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
838
+}
839
+
840
+TRANS(vaddwev_h_bu_b, gvec_vvv, MO_8, do_vaddwev_u_s)
841
+TRANS(vaddwev_w_hu_h, gvec_vvv, MO_16, do_vaddwev_u_s)
842
+TRANS(vaddwev_d_wu_w, gvec_vvv, MO_32, do_vaddwev_u_s)
843
+TRANS(vaddwev_q_du_d, gvec_vvv, MO_64, do_vaddwev_u_s)
844
+
845
+static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
846
+{
847
+ TCGv_vec t1, t2;
848
+
849
+ int halfbits = 4 << vece;
850
+
851
+ t1 = tcg_temp_new_vec_matching(a);
852
+ t2 = tcg_temp_new_vec_matching(b);
853
+
854
+ /* Zero-extend the odd elements from a */
855
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
856
+ /* Sign-extend the odd elements from b */
857
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
858
+
859
+ tcg_gen_add_vec(vece, t, t1, t2);
860
+}
861
+
862
+static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
863
+{
864
+ TCGv_i32 t1, t2;
865
+
866
+ t1 = tcg_temp_new_i32();
867
+ t2 = tcg_temp_new_i32();
868
+ tcg_gen_shri_i32(t1, a, 16);
869
+ tcg_gen_sari_i32(t2, b, 16);
870
+ tcg_gen_add_i32(t, t1, t2);
871
+}
872
+
873
+static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
874
+{
875
+ TCGv_i64 t1, t2;
876
+
877
+ t1 = tcg_temp_new_i64();
878
+ t2 = tcg_temp_new_i64();
879
+ tcg_gen_shri_i64(t1, a, 32);
880
+ tcg_gen_sari_i64(t2, b, 32);
881
+ tcg_gen_add_i64(t, t1, t2);
882
+}
883
+
884
+static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
885
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
886
+{
887
+ static const TCGOpcode vecop_list[] = {
888
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
889
+ };
890
+ static const GVecGen3 op[4] = {
891
+ {
892
+ .fniv = gen_vaddwod_u_s,
893
+ .fno = gen_helper_vaddwod_h_bu_b,
894
+ .opt_opc = vecop_list,
895
+ .vece = MO_16
896
+ },
897
+ {
898
+ .fni4 = gen_vaddwod_w_hu_h,
899
+ .fniv = gen_vaddwod_u_s,
900
+ .fno = gen_helper_vaddwod_w_hu_h,
901
+ .opt_opc = vecop_list,
902
+ .vece = MO_32
903
+ },
904
+ {
905
+ .fni8 = gen_vaddwod_d_wu_w,
906
+ .fniv = gen_vaddwod_u_s,
907
+ .fno = gen_helper_vaddwod_d_wu_w,
908
+ .opt_opc = vecop_list,
909
+ .vece = MO_64
910
+ },
911
+ {
912
+ .fno = gen_helper_vaddwod_q_du_d,
913
+ .vece = MO_128
914
+ },
915
+ };
916
+
917
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
918
+}
919
+
920
+TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s)
921
+TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s)
922
+TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s)
923
+TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s)
924
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
925
index XXXXXXX..XXXXXXX 100644
926
--- a/target/loongarch/insns.decode
927
+++ b/target/loongarch/insns.decode
928
@@ -XXX,XX +XXX,XX @@ vhsubw_hu_bu 0111 00000101 10100 ..... ..... ..... @vvv
929
vhsubw_wu_hu 0111 00000101 10101 ..... ..... ..... @vvv
930
vhsubw_du_wu 0111 00000101 10110 ..... ..... ..... @vvv
931
vhsubw_qu_du 0111 00000101 10111 ..... ..... ..... @vvv
932
+
933
+vaddwev_h_b 0111 00000001 11100 ..... ..... ..... @vvv
934
+vaddwev_w_h 0111 00000001 11101 ..... ..... ..... @vvv
935
+vaddwev_d_w 0111 00000001 11110 ..... ..... ..... @vvv
936
+vaddwev_q_d 0111 00000001 11111 ..... ..... ..... @vvv
937
+vaddwod_h_b 0111 00000010 00100 ..... ..... ..... @vvv
938
+vaddwod_w_h 0111 00000010 00101 ..... ..... ..... @vvv
939
+vaddwod_d_w 0111 00000010 00110 ..... ..... ..... @vvv
940
+vaddwod_q_d 0111 00000010 00111 ..... ..... ..... @vvv
941
+vsubwev_h_b 0111 00000010 00000 ..... ..... ..... @vvv
942
+vsubwev_w_h 0111 00000010 00001 ..... ..... ..... @vvv
943
+vsubwev_d_w 0111 00000010 00010 ..... ..... ..... @vvv
944
+vsubwev_q_d 0111 00000010 00011 ..... ..... ..... @vvv
945
+vsubwod_h_b 0111 00000010 01000 ..... ..... ..... @vvv
946
+vsubwod_w_h 0111 00000010 01001 ..... ..... ..... @vvv
947
+vsubwod_d_w 0111 00000010 01010 ..... ..... ..... @vvv
948
+vsubwod_q_d 0111 00000010 01011 ..... ..... ..... @vvv
949
+
950
+vaddwev_h_bu 0111 00000010 11100 ..... ..... ..... @vvv
951
+vaddwev_w_hu 0111 00000010 11101 ..... ..... ..... @vvv
952
+vaddwev_d_wu 0111 00000010 11110 ..... ..... ..... @vvv
953
+vaddwev_q_du 0111 00000010 11111 ..... ..... ..... @vvv
954
+vaddwod_h_bu 0111 00000011 00100 ..... ..... ..... @vvv
955
+vaddwod_w_hu 0111 00000011 00101 ..... ..... ..... @vvv
956
+vaddwod_d_wu 0111 00000011 00110 ..... ..... ..... @vvv
957
+vaddwod_q_du 0111 00000011 00111 ..... ..... ..... @vvv
958
+vsubwev_h_bu 0111 00000011 00000 ..... ..... ..... @vvv
959
+vsubwev_w_hu 0111 00000011 00001 ..... ..... ..... @vvv
960
+vsubwev_d_wu 0111 00000011 00010 ..... ..... ..... @vvv
961
+vsubwev_q_du 0111 00000011 00011 ..... ..... ..... @vvv
962
+vsubwod_h_bu 0111 00000011 01000 ..... ..... ..... @vvv
963
+vsubwod_w_hu 0111 00000011 01001 ..... ..... ..... @vvv
964
+vsubwod_d_wu 0111 00000011 01010 ..... ..... ..... @vvv
965
+vsubwod_q_du 0111 00000011 01011 ..... ..... ..... @vvv
966
+
967
+vaddwev_h_bu_b 0111 00000011 11100 ..... ..... ..... @vvv
968
+vaddwev_w_hu_h 0111 00000011 11101 ..... ..... ..... @vvv
969
+vaddwev_d_wu_w 0111 00000011 11110 ..... ..... ..... @vvv
970
+vaddwev_q_du_d 0111 00000011 11111 ..... ..... ..... @vvv
971
+vaddwod_h_bu_b 0111 00000100 00000 ..... ..... ..... @vvv
972
+vaddwod_w_hu_h 0111 00000100 00001 ..... ..... ..... @vvv
973
+vaddwod_d_wu_w 0111 00000100 00010 ..... ..... ..... @vvv
974
+vaddwod_q_du_d 0111 00000100 00011 ..... ..... ..... @vvv
975
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
976
index XXXXXXX..XXXXXXX 100644
977
--- a/target/loongarch/lsx_helper.c
978
+++ b/target/loongarch/lsx_helper.c
979
@@ -XXX,XX +XXX,XX @@ void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
980
Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
981
int128_make64((uint64_t)Vk->D(0)));
982
}
983
+
984
+#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
985
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
986
+{ \
987
+ int i; \
988
+ VReg *Vd = (VReg *)vd; \
989
+ VReg *Vj = (VReg *)vj; \
990
+ VReg *Vk = (VReg *)vk; \
991
+ typedef __typeof(Vd->E1(0)) TD; \
992
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
993
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
994
+ } \
995
+}
996
+
997
+#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \
998
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
999
+{ \
1000
+ int i; \
1001
+ VReg *Vd = (VReg *)vd; \
1002
+ VReg *Vj = (VReg *)vj; \
1003
+ VReg *Vk = (VReg *)vk; \
1004
+ typedef __typeof(Vd->E1(0)) TD; \
1005
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
1006
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
1007
+ } \
1008
+}
1009
+
1010
+void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1011
+{
1012
+ VReg *Vd = (VReg *)vd;
1013
+ VReg *Vj = (VReg *)vj;
1014
+ VReg *Vk = (VReg *)vk;
1015
+
1016
+ Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
1017
+}
1018
+
1019
+DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
1020
+DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
1021
+DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
1022
+
1023
+void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1024
+{
1025
+ VReg *Vd = (VReg *)vd;
1026
+ VReg *Vj = (VReg *)vj;
1027
+ VReg *Vk = (VReg *)vk;
1028
+
1029
+ Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
1030
+}
1031
+
1032
+DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
1033
+DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
1034
+DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
1035
+
1036
+void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1037
+{
1038
+ VReg *Vd = (VReg *)vd;
1039
+ VReg *Vj = (VReg *)vj;
1040
+ VReg *Vk = (VReg *)vk;
1041
+
1042
+ Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
1043
+}
1044
+
1045
+DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
1046
+DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
1047
+DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
1048
+
1049
+void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1050
+{
1051
+ VReg *Vd = (VReg *)vd;
1052
+ VReg *Vj = (VReg *)vj;
1053
+ VReg *Vk = (VReg *)vk;
1054
+
1055
+ Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
1056
+}
1057
+
1058
+DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
1059
+DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
1060
+DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
1061
+
1062
+void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1063
+{
1064
+ VReg *Vd = (VReg *)vd;
1065
+ VReg *Vj = (VReg *)vj;
1066
+ VReg *Vk = (VReg *)vk;
1067
+
1068
+ Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
1069
+ int128_make64((uint64_t)Vk->D(0)));
1070
+}
1071
+
1072
+DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
1073
+DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
1074
+DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
1075
+
1076
+void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1077
+{
1078
+ VReg *Vd = (VReg *)vd;
1079
+ VReg *Vj = (VReg *)vj;
1080
+ VReg *Vk = (VReg *)vk;
1081
+
1082
+ Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
1083
+ int128_make64((uint64_t)Vk->D(1)));
1084
+}
1085
+
1086
+DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
1087
+DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
1088
+DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
1089
+
1090
+void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1091
+{
1092
+ VReg *Vd = (VReg *)vd;
1093
+ VReg *Vj = (VReg *)vj;
1094
+ VReg *Vk = (VReg *)vk;
1095
+
1096
+ Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)),
1097
+ int128_make64((uint64_t)Vk->D(0)));
1098
+}
1099
+
1100
+DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
1101
+DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
1102
+DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
1103
+
1104
+void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1105
+{
1106
+ VReg *Vd = (VReg *)vd;
1107
+ VReg *Vj = (VReg *)vj;
1108
+ VReg *Vk = (VReg *)vk;
1109
+
1110
+ Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
1111
+ int128_make64((uint64_t)Vk->D(1)));
1112
+}
1113
+
1114
+DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
1115
+DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
1116
+DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
1117
+
1118
+#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
1119
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
1120
+{ \
1121
+ int i; \
1122
+ VReg *Vd = (VReg *)vd; \
1123
+ VReg *Vj = (VReg *)vj; \
1124
+ VReg *Vk = (VReg *)vk; \
1125
+ typedef __typeof(Vd->ES1(0)) TDS; \
1126
+ typedef __typeof(Vd->EU1(0)) TDU; \
1127
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
1128
+ Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
1129
+ } \
1130
+}
1131
+
1132
+#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
1133
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
1134
+{ \
1135
+ int i; \
1136
+ VReg *Vd = (VReg *)vd; \
1137
+ VReg *Vj = (VReg *)vj; \
1138
+ VReg *Vk = (VReg *)vk; \
1139
+ typedef __typeof(Vd->ES1(0)) TDS; \
1140
+ typedef __typeof(Vd->EU1(0)) TDU; \
1141
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
1142
+ Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
1143
+ } \
1144
+}
1145
+
1146
+void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
1147
+{
1148
+ VReg *Vd = (VReg *)vd;
1149
+ VReg *Vj = (VReg *)vj;
1150
+ VReg *Vk = (VReg *)vk;
1151
+
1152
+ Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
1153
+ int128_makes64(Vk->D(0)));
1154
+}
1155
+
1156
+DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
1157
+DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
1158
+DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
1159
+
1160
+void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
1161
+{
1162
+ VReg *Vd = (VReg *)vd;
1163
+ VReg *Vj = (VReg *)vj;
1164
+ VReg *Vk = (VReg *)vk;
1165
+
1166
+ Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
1167
+ int128_makes64(Vk->D(1)));
1168
+}
1169
+
1170
+DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
1171
+DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
1172
+DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
1173
--
1174
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VAVG.{B/H/W/D}[U];
3
- VAVGR.{B/H/W/D}[U].
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-11-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 17 ++
10
target/loongarch/helper.h | 18 ++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 197 ++++++++++++++++++++
12
target/loongarch/insns.decode | 17 ++
13
target/loongarch/lsx_helper.c | 32 ++++
14
5 files changed, 281 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vaddwod_h_bu_b, vvv)
21
INSN_LSX(vaddwod_w_hu_h, vvv)
22
INSN_LSX(vaddwod_d_wu_w, vvv)
23
INSN_LSX(vaddwod_q_du_d, vvv)
24
+
25
+INSN_LSX(vavg_b, vvv)
26
+INSN_LSX(vavg_h, vvv)
27
+INSN_LSX(vavg_w, vvv)
28
+INSN_LSX(vavg_d, vvv)
29
+INSN_LSX(vavg_bu, vvv)
30
+INSN_LSX(vavg_hu, vvv)
31
+INSN_LSX(vavg_wu, vvv)
32
+INSN_LSX(vavg_du, vvv)
33
+INSN_LSX(vavgr_b, vvv)
34
+INSN_LSX(vavgr_h, vvv)
35
+INSN_LSX(vavgr_w, vvv)
36
+INSN_LSX(vavgr_d, vvv)
37
+INSN_LSX(vavgr_bu, vvv)
38
+INSN_LSX(vavgr_hu, vvv)
39
+INSN_LSX(vavgr_wu, vvv)
40
+INSN_LSX(vavgr_du, vvv)
41
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/helper.h
44
+++ b/target/loongarch/helper.h
45
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+
50
+DEF_HELPER_FLAGS_4(vavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
51
+DEF_HELPER_FLAGS_4(vavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
52
+DEF_HELPER_FLAGS_4(vavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
53
+DEF_HELPER_FLAGS_4(vavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
54
+DEF_HELPER_FLAGS_4(vavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
55
+DEF_HELPER_FLAGS_4(vavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
56
+DEF_HELPER_FLAGS_4(vavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
57
+DEF_HELPER_FLAGS_4(vavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
58
+
59
+DEF_HELPER_FLAGS_4(vavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
60
+DEF_HELPER_FLAGS_4(vavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
61
+DEF_HELPER_FLAGS_4(vavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
62
+DEF_HELPER_FLAGS_4(vavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
63
+DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
64
+DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
65
+DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
66
+DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
67
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
70
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
71
@@ -XXX,XX +XXX,XX @@ TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s)
72
TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s)
73
TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s)
74
TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s)
75
+
76
+static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
77
+ void (*gen_shr_vec)(unsigned, TCGv_vec,
78
+ TCGv_vec, int64_t),
79
+ void (*gen_round_vec)(unsigned, TCGv_vec,
80
+ TCGv_vec, TCGv_vec))
81
+{
82
+ TCGv_vec tmp = tcg_temp_new_vec_matching(t);
83
+ gen_round_vec(vece, tmp, a, b);
84
+ tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
85
+ gen_shr_vec(vece, a, a, 1);
86
+ gen_shr_vec(vece, b, b, 1);
87
+ tcg_gen_add_vec(vece, t, a, b);
88
+ tcg_gen_add_vec(vece, t, t, tmp);
89
+}
90
+
91
+static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
92
+{
93
+ do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
94
+}
95
+
96
+static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
97
+{
98
+ do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
99
+}
100
+
101
+static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
102
+{
103
+ do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
104
+}
105
+
106
+static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
107
+{
108
+ do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
109
+}
110
+
111
+static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
112
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
113
+{
114
+ static const TCGOpcode vecop_list[] = {
115
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
116
+ };
117
+ static const GVecGen3 op[4] = {
118
+ {
119
+ .fniv = gen_vavg_s,
120
+ .fno = gen_helper_vavg_b,
121
+ .opt_opc = vecop_list,
122
+ .vece = MO_8
123
+ },
124
+ {
125
+ .fniv = gen_vavg_s,
126
+ .fno = gen_helper_vavg_h,
127
+ .opt_opc = vecop_list,
128
+ .vece = MO_16
129
+ },
130
+ {
131
+ .fniv = gen_vavg_s,
132
+ .fno = gen_helper_vavg_w,
133
+ .opt_opc = vecop_list,
134
+ .vece = MO_32
135
+ },
136
+ {
137
+ .fniv = gen_vavg_s,
138
+ .fno = gen_helper_vavg_d,
139
+ .opt_opc = vecop_list,
140
+ .vece = MO_64
141
+ },
142
+ };
143
+
144
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
145
+}
146
+
147
+static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
148
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
149
+{
150
+ static const TCGOpcode vecop_list[] = {
151
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
152
+ };
153
+ static const GVecGen3 op[4] = {
154
+ {
155
+ .fniv = gen_vavg_u,
156
+ .fno = gen_helper_vavg_bu,
157
+ .opt_opc = vecop_list,
158
+ .vece = MO_8
159
+ },
160
+ {
161
+ .fniv = gen_vavg_u,
162
+ .fno = gen_helper_vavg_hu,
163
+ .opt_opc = vecop_list,
164
+ .vece = MO_16
165
+ },
166
+ {
167
+ .fniv = gen_vavg_u,
168
+ .fno = gen_helper_vavg_wu,
169
+ .opt_opc = vecop_list,
170
+ .vece = MO_32
171
+ },
172
+ {
173
+ .fniv = gen_vavg_u,
174
+ .fno = gen_helper_vavg_du,
175
+ .opt_opc = vecop_list,
176
+ .vece = MO_64
177
+ },
178
+ };
179
+
180
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
181
+}
182
+
183
+TRANS(vavg_b, gvec_vvv, MO_8, do_vavg_s)
184
+TRANS(vavg_h, gvec_vvv, MO_16, do_vavg_s)
185
+TRANS(vavg_w, gvec_vvv, MO_32, do_vavg_s)
186
+TRANS(vavg_d, gvec_vvv, MO_64, do_vavg_s)
187
+TRANS(vavg_bu, gvec_vvv, MO_8, do_vavg_u)
188
+TRANS(vavg_hu, gvec_vvv, MO_16, do_vavg_u)
189
+TRANS(vavg_wu, gvec_vvv, MO_32, do_vavg_u)
190
+TRANS(vavg_du, gvec_vvv, MO_64, do_vavg_u)
191
+
192
+static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
193
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
194
+{
195
+ static const TCGOpcode vecop_list[] = {
196
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
197
+ };
198
+ static const GVecGen3 op[4] = {
199
+ {
200
+ .fniv = gen_vavgr_s,
201
+ .fno = gen_helper_vavgr_b,
202
+ .opt_opc = vecop_list,
203
+ .vece = MO_8
204
+ },
205
+ {
206
+ .fniv = gen_vavgr_s,
207
+ .fno = gen_helper_vavgr_h,
208
+ .opt_opc = vecop_list,
209
+ .vece = MO_16
210
+ },
211
+ {
212
+ .fniv = gen_vavgr_s,
213
+ .fno = gen_helper_vavgr_w,
214
+ .opt_opc = vecop_list,
215
+ .vece = MO_32
216
+ },
217
+ {
218
+ .fniv = gen_vavgr_s,
219
+ .fno = gen_helper_vavgr_d,
220
+ .opt_opc = vecop_list,
221
+ .vece = MO_64
222
+ },
223
+ };
224
+
225
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
226
+}
227
+
228
+static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
229
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
230
+{
231
+ static const TCGOpcode vecop_list[] = {
232
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
233
+ };
234
+ static const GVecGen3 op[4] = {
235
+ {
236
+ .fniv = gen_vavgr_u,
237
+ .fno = gen_helper_vavgr_bu,
238
+ .opt_opc = vecop_list,
239
+ .vece = MO_8
240
+ },
241
+ {
242
+ .fniv = gen_vavgr_u,
243
+ .fno = gen_helper_vavgr_hu,
244
+ .opt_opc = vecop_list,
245
+ .vece = MO_16
246
+ },
247
+ {
248
+ .fniv = gen_vavgr_u,
249
+ .fno = gen_helper_vavgr_wu,
250
+ .opt_opc = vecop_list,
251
+ .vece = MO_32
252
+ },
253
+ {
254
+ .fniv = gen_vavgr_u,
255
+ .fno = gen_helper_vavgr_du,
256
+ .opt_opc = vecop_list,
257
+ .vece = MO_64
258
+ },
259
+ };
260
+
261
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
262
+}
263
+
264
+TRANS(vavgr_b, gvec_vvv, MO_8, do_vavgr_s)
265
+TRANS(vavgr_h, gvec_vvv, MO_16, do_vavgr_s)
266
+TRANS(vavgr_w, gvec_vvv, MO_32, do_vavgr_s)
267
+TRANS(vavgr_d, gvec_vvv, MO_64, do_vavgr_s)
268
+TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u)
269
+TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u)
270
+TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u)
271
+TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u)
272
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
273
index XXXXXXX..XXXXXXX 100644
274
--- a/target/loongarch/insns.decode
275
+++ b/target/loongarch/insns.decode
276
@@ -XXX,XX +XXX,XX @@ vaddwod_h_bu_b 0111 00000100 00000 ..... ..... ..... @vvv
277
vaddwod_w_hu_h 0111 00000100 00001 ..... ..... ..... @vvv
278
vaddwod_d_wu_w 0111 00000100 00010 ..... ..... ..... @vvv
279
vaddwod_q_du_d 0111 00000100 00011 ..... ..... ..... @vvv
280
+
281
+vavg_b 0111 00000110 01000 ..... ..... ..... @vvv
282
+vavg_h 0111 00000110 01001 ..... ..... ..... @vvv
283
+vavg_w 0111 00000110 01010 ..... ..... ..... @vvv
284
+vavg_d 0111 00000110 01011 ..... ..... ..... @vvv
285
+vavg_bu 0111 00000110 01100 ..... ..... ..... @vvv
286
+vavg_hu 0111 00000110 01101 ..... ..... ..... @vvv
287
+vavg_wu 0111 00000110 01110 ..... ..... ..... @vvv
288
+vavg_du 0111 00000110 01111 ..... ..... ..... @vvv
289
+vavgr_b 0111 00000110 10000 ..... ..... ..... @vvv
290
+vavgr_h 0111 00000110 10001 ..... ..... ..... @vvv
291
+vavgr_w 0111 00000110 10010 ..... ..... ..... @vvv
292
+vavgr_d 0111 00000110 10011 ..... ..... ..... @vvv
293
+vavgr_bu 0111 00000110 10100 ..... ..... ..... @vvv
294
+vavgr_hu 0111 00000110 10101 ..... ..... ..... @vvv
295
+vavgr_wu 0111 00000110 10110 ..... ..... ..... @vvv
296
+vavgr_du 0111 00000110 10111 ..... ..... ..... @vvv
297
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/target/loongarch/lsx_helper.c
300
+++ b/target/loongarch/lsx_helper.c
301
@@ -XXX,XX +XXX,XX @@ void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
302
DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
303
DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
304
DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
305
+
306
+#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
307
+#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
308
+
309
+#define DO_3OP(NAME, BIT, E, DO_OP) \
310
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
311
+{ \
312
+ int i; \
313
+ VReg *Vd = (VReg *)vd; \
314
+ VReg *Vj = (VReg *)vj; \
315
+ VReg *Vk = (VReg *)vk; \
316
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
317
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
318
+ } \
319
+}
320
+
321
+DO_3OP(vavg_b, 8, B, DO_VAVG)
322
+DO_3OP(vavg_h, 16, H, DO_VAVG)
323
+DO_3OP(vavg_w, 32, W, DO_VAVG)
324
+DO_3OP(vavg_d, 64, D, DO_VAVG)
325
+DO_3OP(vavgr_b, 8, B, DO_VAVGR)
326
+DO_3OP(vavgr_h, 16, H, DO_VAVGR)
327
+DO_3OP(vavgr_w, 32, W, DO_VAVGR)
328
+DO_3OP(vavgr_d, 64, D, DO_VAVGR)
329
+DO_3OP(vavg_bu, 8, UB, DO_VAVG)
330
+DO_3OP(vavg_hu, 16, UH, DO_VAVG)
331
+DO_3OP(vavg_wu, 32, UW, DO_VAVG)
332
+DO_3OP(vavg_du, 64, UD, DO_VAVG)
333
+DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
334
+DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
335
+DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
336
+DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
337
--
338
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VABSD.{B/H/W/D}[U].
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-12-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 9 ++
9
target/loongarch/helper.h | 9 ++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 95 +++++++++++++++++++++
11
target/loongarch/insns.decode | 9 ++
12
target/loongarch/lsx_helper.c | 11 +++
13
5 files changed, 133 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vavgr_bu, vvv)
20
INSN_LSX(vavgr_hu, vvv)
21
INSN_LSX(vavgr_wu, vvv)
22
INSN_LSX(vavgr_du, vvv)
23
+
24
+INSN_LSX(vabsd_b, vvv)
25
+INSN_LSX(vabsd_h, vvv)
26
+INSN_LSX(vabsd_w, vvv)
27
+INSN_LSX(vabsd_d, vvv)
28
+INSN_LSX(vabsd_bu, vvv)
29
+INSN_LSX(vabsd_hu, vvv)
30
+INSN_LSX(vabsd_wu, vvv)
31
+INSN_LSX(vabsd_du, vvv)
32
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/loongarch/helper.h
35
+++ b/target/loongarch/helper.h
36
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+
41
+DEF_HELPER_FLAGS_4(vabsd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
+DEF_HELPER_FLAGS_4(vabsd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
43
+DEF_HELPER_FLAGS_4(vabsd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
44
+DEF_HELPER_FLAGS_4(vabsd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
45
+DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
+DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
+DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
+DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
52
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
53
@@ -XXX,XX +XXX,XX @@ TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u)
54
TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u)
55
TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u)
56
TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u)
57
+
58
+static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
59
+{
60
+ tcg_gen_smax_vec(vece, t, a, b);
61
+ tcg_gen_smin_vec(vece, a, a, b);
62
+ tcg_gen_sub_vec(vece, t, t, a);
63
+}
64
+
65
+static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
66
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
67
+{
68
+ static const TCGOpcode vecop_list[] = {
69
+ INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
70
+ };
71
+ static const GVecGen3 op[4] = {
72
+ {
73
+ .fniv = gen_vabsd_s,
74
+ .fno = gen_helper_vabsd_b,
75
+ .opt_opc = vecop_list,
76
+ .vece = MO_8
77
+ },
78
+ {
79
+ .fniv = gen_vabsd_s,
80
+ .fno = gen_helper_vabsd_h,
81
+ .opt_opc = vecop_list,
82
+ .vece = MO_16
83
+ },
84
+ {
85
+ .fniv = gen_vabsd_s,
86
+ .fno = gen_helper_vabsd_w,
87
+ .opt_opc = vecop_list,
88
+ .vece = MO_32
89
+ },
90
+ {
91
+ .fniv = gen_vabsd_s,
92
+ .fno = gen_helper_vabsd_d,
93
+ .opt_opc = vecop_list,
94
+ .vece = MO_64
95
+ },
96
+ };
97
+
98
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
99
+}
100
+
101
+static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
102
+{
103
+ tcg_gen_umax_vec(vece, t, a, b);
104
+ tcg_gen_umin_vec(vece, a, a, b);
105
+ tcg_gen_sub_vec(vece, t, t, a);
106
+}
107
+
108
+static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
109
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
110
+{
111
+ static const TCGOpcode vecop_list[] = {
112
+ INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
113
+ };
114
+ static const GVecGen3 op[4] = {
115
+ {
116
+ .fniv = gen_vabsd_u,
117
+ .fno = gen_helper_vabsd_bu,
118
+ .opt_opc = vecop_list,
119
+ .vece = MO_8
120
+ },
121
+ {
122
+ .fniv = gen_vabsd_u,
123
+ .fno = gen_helper_vabsd_hu,
124
+ .opt_opc = vecop_list,
125
+ .vece = MO_16
126
+ },
127
+ {
128
+ .fniv = gen_vabsd_u,
129
+ .fno = gen_helper_vabsd_wu,
130
+ .opt_opc = vecop_list,
131
+ .vece = MO_32
132
+ },
133
+ {
134
+ .fniv = gen_vabsd_u,
135
+ .fno = gen_helper_vabsd_du,
136
+ .opt_opc = vecop_list,
137
+ .vece = MO_64
138
+ },
139
+ };
140
+
141
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
142
+}
143
+
144
+TRANS(vabsd_b, gvec_vvv, MO_8, do_vabsd_s)
145
+TRANS(vabsd_h, gvec_vvv, MO_16, do_vabsd_s)
146
+TRANS(vabsd_w, gvec_vvv, MO_32, do_vabsd_s)
147
+TRANS(vabsd_d, gvec_vvv, MO_64, do_vabsd_s)
148
+TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u)
149
+TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u)
150
+TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u)
151
+TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u)
152
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
153
index XXXXXXX..XXXXXXX 100644
154
--- a/target/loongarch/insns.decode
155
+++ b/target/loongarch/insns.decode
156
@@ -XXX,XX +XXX,XX @@ vavgr_bu 0111 00000110 10100 ..... ..... ..... @vvv
157
vavgr_hu 0111 00000110 10101 ..... ..... ..... @vvv
158
vavgr_wu 0111 00000110 10110 ..... ..... ..... @vvv
159
vavgr_du 0111 00000110 10111 ..... ..... ..... @vvv
160
+
161
+vabsd_b 0111 00000110 00000 ..... ..... ..... @vvv
162
+vabsd_h 0111 00000110 00001 ..... ..... ..... @vvv
163
+vabsd_w 0111 00000110 00010 ..... ..... ..... @vvv
164
+vabsd_d 0111 00000110 00011 ..... ..... ..... @vvv
165
+vabsd_bu 0111 00000110 00100 ..... ..... ..... @vvv
166
+vabsd_hu 0111 00000110 00101 ..... ..... ..... @vvv
167
+vabsd_wu 0111 00000110 00110 ..... ..... ..... @vvv
168
+vabsd_du 0111 00000110 00111 ..... ..... ..... @vvv
169
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/loongarch/lsx_helper.c
172
+++ b/target/loongarch/lsx_helper.c
173
@@ -XXX,XX +XXX,XX @@ DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
174
DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
175
DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
176
DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
177
+
178
+#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a))
179
+
180
+DO_3OP(vabsd_b, 8, B, DO_VABSD)
181
+DO_3OP(vabsd_h, 16, H, DO_VABSD)
182
+DO_3OP(vabsd_w, 32, W, DO_VABSD)
183
+DO_3OP(vabsd_d, 64, D, DO_VABSD)
184
+DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
185
+DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
186
+DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
187
+DO_3OP(vabsd_du, 64, UD, DO_VABSD)
188
--
189
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VADDA.{B/H/W/D}.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-13-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 5 ++
9
target/loongarch/helper.h | 5 ++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 53 +++++++++++++++++++++
11
target/loongarch/insns.decode | 5 ++
12
target/loongarch/lsx_helper.c | 19 ++++++++
13
5 files changed, 87 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vabsd_bu, vvv)
20
INSN_LSX(vabsd_hu, vvv)
21
INSN_LSX(vabsd_wu, vvv)
22
INSN_LSX(vabsd_du, vvv)
23
+
24
+INSN_LSX(vadda_b, vvv)
25
+INSN_LSX(vadda_h, vvv)
26
+INSN_LSX(vadda_w, vvv)
27
+INSN_LSX(vadda_d, vvv)
28
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/loongarch/helper.h
31
+++ b/target/loongarch/helper.h
32
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+
37
+DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
44
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
45
@@ -XXX,XX +XXX,XX @@ TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u)
46
TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u)
47
TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u)
48
TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u)
49
+
50
+static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
51
+{
52
+ TCGv_vec t1, t2;
53
+
54
+ t1 = tcg_temp_new_vec_matching(a);
55
+ t2 = tcg_temp_new_vec_matching(b);
56
+
57
+ tcg_gen_abs_vec(vece, t1, a);
58
+ tcg_gen_abs_vec(vece, t2, b);
59
+ tcg_gen_add_vec(vece, t, t1, t2);
60
+}
61
+
62
+static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
63
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
64
+{
65
+ static const TCGOpcode vecop_list[] = {
66
+ INDEX_op_abs_vec, INDEX_op_add_vec, 0
67
+ };
68
+ static const GVecGen3 op[4] = {
69
+ {
70
+ .fniv = gen_vadda,
71
+ .fno = gen_helper_vadda_b,
72
+ .opt_opc = vecop_list,
73
+ .vece = MO_8
74
+ },
75
+ {
76
+ .fniv = gen_vadda,
77
+ .fno = gen_helper_vadda_h,
78
+ .opt_opc = vecop_list,
79
+ .vece = MO_16
80
+ },
81
+ {
82
+ .fniv = gen_vadda,
83
+ .fno = gen_helper_vadda_w,
84
+ .opt_opc = vecop_list,
85
+ .vece = MO_32
86
+ },
87
+ {
88
+ .fniv = gen_vadda,
89
+ .fno = gen_helper_vadda_d,
90
+ .opt_opc = vecop_list,
91
+ .vece = MO_64
92
+ },
93
+ };
94
+
95
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
96
+}
97
+
98
+TRANS(vadda_b, gvec_vvv, MO_8, do_vadda)
99
+TRANS(vadda_h, gvec_vvv, MO_16, do_vadda)
100
+TRANS(vadda_w, gvec_vvv, MO_32, do_vadda)
101
+TRANS(vadda_d, gvec_vvv, MO_64, do_vadda)
102
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/loongarch/insns.decode
105
+++ b/target/loongarch/insns.decode
106
@@ -XXX,XX +XXX,XX @@ vabsd_bu 0111 00000110 00100 ..... ..... ..... @vvv
107
vabsd_hu 0111 00000110 00101 ..... ..... ..... @vvv
108
vabsd_wu 0111 00000110 00110 ..... ..... ..... @vvv
109
vabsd_du 0111 00000110 00111 ..... ..... ..... @vvv
110
+
111
+vadda_b 0111 00000101 11000 ..... ..... ..... @vvv
112
+vadda_h 0111 00000101 11001 ..... ..... ..... @vvv
113
+vadda_w 0111 00000101 11010 ..... ..... ..... @vvv
114
+vadda_d 0111 00000101 11011 ..... ..... ..... @vvv
115
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/loongarch/lsx_helper.c
118
+++ b/target/loongarch/lsx_helper.c
119
@@ -XXX,XX +XXX,XX @@ DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
120
DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
121
DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
122
DO_3OP(vabsd_du, 64, UD, DO_VABSD)
123
+
124
+#define DO_VABS(a) ((a < 0) ? (-a) : (a))
125
+
126
+#define DO_VADDA(NAME, BIT, E, DO_OP) \
127
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
128
+{ \
129
+ int i; \
130
+ VReg *Vd = (VReg *)vd; \
131
+ VReg *Vj = (VReg *)vj; \
132
+ VReg *Vk = (VReg *)vk; \
133
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
134
+ Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \
135
+ } \
136
+}
137
+
138
+DO_VADDA(vadda_b, 8, B, DO_VABS)
139
+DO_VADDA(vadda_h, 16, H, DO_VABS)
140
+DO_VADDA(vadda_w, 32, W, DO_VABS)
141
+DO_VADDA(vadda_d, 64, D, DO_VABS)
142
--
143
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VMAX[I].{B/H/W/D}[U];
3
- VMIN[I].{B/H/W/D}[U].
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-14-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 33 ++++
10
target/loongarch/helper.h | 18 ++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 200 ++++++++++++++++++++
12
target/loongarch/insns.decode | 35 ++++
13
target/loongarch/lsx_helper.c | 33 ++++
14
5 files changed, 319 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vadda_b, vvv)
21
INSN_LSX(vadda_h, vvv)
22
INSN_LSX(vadda_w, vvv)
23
INSN_LSX(vadda_d, vvv)
24
+
25
+INSN_LSX(vmax_b, vvv)
26
+INSN_LSX(vmax_h, vvv)
27
+INSN_LSX(vmax_w, vvv)
28
+INSN_LSX(vmax_d, vvv)
29
+INSN_LSX(vmin_b, vvv)
30
+INSN_LSX(vmin_h, vvv)
31
+INSN_LSX(vmin_w, vvv)
32
+INSN_LSX(vmin_d, vvv)
33
+INSN_LSX(vmax_bu, vvv)
34
+INSN_LSX(vmax_hu, vvv)
35
+INSN_LSX(vmax_wu, vvv)
36
+INSN_LSX(vmax_du, vvv)
37
+INSN_LSX(vmin_bu, vvv)
38
+INSN_LSX(vmin_hu, vvv)
39
+INSN_LSX(vmin_wu, vvv)
40
+INSN_LSX(vmin_du, vvv)
41
+INSN_LSX(vmaxi_b, vv_i)
42
+INSN_LSX(vmaxi_h, vv_i)
43
+INSN_LSX(vmaxi_w, vv_i)
44
+INSN_LSX(vmaxi_d, vv_i)
45
+INSN_LSX(vmini_b, vv_i)
46
+INSN_LSX(vmini_h, vv_i)
47
+INSN_LSX(vmini_w, vv_i)
48
+INSN_LSX(vmini_d, vv_i)
49
+INSN_LSX(vmaxi_bu, vv_i)
50
+INSN_LSX(vmaxi_hu, vv_i)
51
+INSN_LSX(vmaxi_wu, vv_i)
52
+INSN_LSX(vmaxi_du, vv_i)
53
+INSN_LSX(vmini_bu, vv_i)
54
+INSN_LSX(vmini_hu, vv_i)
55
+INSN_LSX(vmini_wu, vv_i)
56
+INSN_LSX(vmini_du, vv_i)
57
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/loongarch/helper.h
60
+++ b/target/loongarch/helper.h
61
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
62
DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
63
DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
64
DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
65
+
66
+DEF_HELPER_FLAGS_4(vmini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
67
+DEF_HELPER_FLAGS_4(vmini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
68
+DEF_HELPER_FLAGS_4(vmini_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
69
+DEF_HELPER_FLAGS_4(vmini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
70
+DEF_HELPER_FLAGS_4(vmini_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
71
+DEF_HELPER_FLAGS_4(vmini_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
72
+DEF_HELPER_FLAGS_4(vmini_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
73
+DEF_HELPER_FLAGS_4(vmini_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
74
+
75
+DEF_HELPER_FLAGS_4(vmaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
76
+DEF_HELPER_FLAGS_4(vmaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
77
+DEF_HELPER_FLAGS_4(vmaxi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
78
+DEF_HELPER_FLAGS_4(vmaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
79
+DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
80
+DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
81
+DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
82
+DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
83
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
86
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
87
@@ -XXX,XX +XXX,XX @@ TRANS(vadda_b, gvec_vvv, MO_8, do_vadda)
88
TRANS(vadda_h, gvec_vvv, MO_16, do_vadda)
89
TRANS(vadda_w, gvec_vvv, MO_32, do_vadda)
90
TRANS(vadda_d, gvec_vvv, MO_64, do_vadda)
91
+
92
+TRANS(vmax_b, gvec_vvv, MO_8, tcg_gen_gvec_smax)
93
+TRANS(vmax_h, gvec_vvv, MO_16, tcg_gen_gvec_smax)
94
+TRANS(vmax_w, gvec_vvv, MO_32, tcg_gen_gvec_smax)
95
+TRANS(vmax_d, gvec_vvv, MO_64, tcg_gen_gvec_smax)
96
+TRANS(vmax_bu, gvec_vvv, MO_8, tcg_gen_gvec_umax)
97
+TRANS(vmax_hu, gvec_vvv, MO_16, tcg_gen_gvec_umax)
98
+TRANS(vmax_wu, gvec_vvv, MO_32, tcg_gen_gvec_umax)
99
+TRANS(vmax_du, gvec_vvv, MO_64, tcg_gen_gvec_umax)
100
+
101
+TRANS(vmin_b, gvec_vvv, MO_8, tcg_gen_gvec_smin)
102
+TRANS(vmin_h, gvec_vvv, MO_16, tcg_gen_gvec_smin)
103
+TRANS(vmin_w, gvec_vvv, MO_32, tcg_gen_gvec_smin)
104
+TRANS(vmin_d, gvec_vvv, MO_64, tcg_gen_gvec_smin)
105
+TRANS(vmin_bu, gvec_vvv, MO_8, tcg_gen_gvec_umin)
106
+TRANS(vmin_hu, gvec_vvv, MO_16, tcg_gen_gvec_umin)
107
+TRANS(vmin_wu, gvec_vvv, MO_32, tcg_gen_gvec_umin)
108
+TRANS(vmin_du, gvec_vvv, MO_64, tcg_gen_gvec_umin)
109
+
110
+static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
111
+{
112
+ tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
113
+}
114
+
115
+static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
116
+{
117
+ tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
118
+}
119
+
120
+static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
121
+{
122
+ tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
123
+}
124
+
125
+static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
126
+{
127
+ tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
128
+}
129
+
130
+static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
131
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
132
+{
133
+ static const TCGOpcode vecop_list[] = {
134
+ INDEX_op_smin_vec, 0
135
+ };
136
+ static const GVecGen2i op[4] = {
137
+ {
138
+ .fniv = gen_vmini_s,
139
+ .fnoi = gen_helper_vmini_b,
140
+ .opt_opc = vecop_list,
141
+ .vece = MO_8
142
+ },
143
+ {
144
+ .fniv = gen_vmini_s,
145
+ .fnoi = gen_helper_vmini_h,
146
+ .opt_opc = vecop_list,
147
+ .vece = MO_16
148
+ },
149
+ {
150
+ .fniv = gen_vmini_s,
151
+ .fnoi = gen_helper_vmini_w,
152
+ .opt_opc = vecop_list,
153
+ .vece = MO_32
154
+ },
155
+ {
156
+ .fniv = gen_vmini_s,
157
+ .fnoi = gen_helper_vmini_d,
158
+ .opt_opc = vecop_list,
159
+ .vece = MO_64
160
+ },
161
+ };
162
+
163
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
164
+}
165
+
166
+static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
167
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
168
+{
169
+ static const TCGOpcode vecop_list[] = {
170
+ INDEX_op_umin_vec, 0
171
+ };
172
+ static const GVecGen2i op[4] = {
173
+ {
174
+ .fniv = gen_vmini_u,
175
+ .fnoi = gen_helper_vmini_bu,
176
+ .opt_opc = vecop_list,
177
+ .vece = MO_8
178
+ },
179
+ {
180
+ .fniv = gen_vmini_u,
181
+ .fnoi = gen_helper_vmini_hu,
182
+ .opt_opc = vecop_list,
183
+ .vece = MO_16
184
+ },
185
+ {
186
+ .fniv = gen_vmini_u,
187
+ .fnoi = gen_helper_vmini_wu,
188
+ .opt_opc = vecop_list,
189
+ .vece = MO_32
190
+ },
191
+ {
192
+ .fniv = gen_vmini_u,
193
+ .fnoi = gen_helper_vmini_du,
194
+ .opt_opc = vecop_list,
195
+ .vece = MO_64
196
+ },
197
+ };
198
+
199
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
200
+}
201
+
202
+TRANS(vmini_b, gvec_vv_i, MO_8, do_vmini_s)
203
+TRANS(vmini_h, gvec_vv_i, MO_16, do_vmini_s)
204
+TRANS(vmini_w, gvec_vv_i, MO_32, do_vmini_s)
205
+TRANS(vmini_d, gvec_vv_i, MO_64, do_vmini_s)
206
+TRANS(vmini_bu, gvec_vv_i, MO_8, do_vmini_u)
207
+TRANS(vmini_hu, gvec_vv_i, MO_16, do_vmini_u)
208
+TRANS(vmini_wu, gvec_vv_i, MO_32, do_vmini_u)
209
+TRANS(vmini_du, gvec_vv_i, MO_64, do_vmini_u)
210
+
211
+static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
212
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
213
+{
214
+ static const TCGOpcode vecop_list[] = {
215
+ INDEX_op_smax_vec, 0
216
+ };
217
+ static const GVecGen2i op[4] = {
218
+ {
219
+ .fniv = gen_vmaxi_s,
220
+ .fnoi = gen_helper_vmaxi_b,
221
+ .opt_opc = vecop_list,
222
+ .vece = MO_8
223
+ },
224
+ {
225
+ .fniv = gen_vmaxi_s,
226
+ .fnoi = gen_helper_vmaxi_h,
227
+ .opt_opc = vecop_list,
228
+ .vece = MO_16
229
+ },
230
+ {
231
+ .fniv = gen_vmaxi_s,
232
+ .fnoi = gen_helper_vmaxi_w,
233
+ .opt_opc = vecop_list,
234
+ .vece = MO_32
235
+ },
236
+ {
237
+ .fniv = gen_vmaxi_s,
238
+ .fnoi = gen_helper_vmaxi_d,
239
+ .opt_opc = vecop_list,
240
+ .vece = MO_64
241
+ },
242
+ };
243
+
244
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
245
+}
246
+
247
+static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
248
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
249
+{
250
+ static const TCGOpcode vecop_list[] = {
251
+ INDEX_op_umax_vec, 0
252
+ };
253
+ static const GVecGen2i op[4] = {
254
+ {
255
+ .fniv = gen_vmaxi_u,
256
+ .fnoi = gen_helper_vmaxi_bu,
257
+ .opt_opc = vecop_list,
258
+ .vece = MO_8
259
+ },
260
+ {
261
+ .fniv = gen_vmaxi_u,
262
+ .fnoi = gen_helper_vmaxi_hu,
263
+ .opt_opc = vecop_list,
264
+ .vece = MO_16
265
+ },
266
+ {
267
+ .fniv = gen_vmaxi_u,
268
+ .fnoi = gen_helper_vmaxi_wu,
269
+ .opt_opc = vecop_list,
270
+ .vece = MO_32
271
+ },
272
+ {
273
+ .fniv = gen_vmaxi_u,
274
+ .fnoi = gen_helper_vmaxi_du,
275
+ .opt_opc = vecop_list,
276
+ .vece = MO_64
277
+ },
278
+ };
279
+
280
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
281
+}
282
+
283
+TRANS(vmaxi_b, gvec_vv_i, MO_8, do_vmaxi_s)
284
+TRANS(vmaxi_h, gvec_vv_i, MO_16, do_vmaxi_s)
285
+TRANS(vmaxi_w, gvec_vv_i, MO_32, do_vmaxi_s)
286
+TRANS(vmaxi_d, gvec_vv_i, MO_64, do_vmaxi_s)
287
+TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u)
288
+TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u)
289
+TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u)
290
+TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u)
291
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/loongarch/insns.decode
294
+++ b/target/loongarch/insns.decode
295
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
296
@vv .... ........ ..... ..... vj:5 vd:5 &vv
297
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
298
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
299
+@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
300
301
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
302
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
303
@@ -XXX,XX +XXX,XX @@ vadda_b 0111 00000101 11000 ..... ..... ..... @vvv
304
vadda_h 0111 00000101 11001 ..... ..... ..... @vvv
305
vadda_w 0111 00000101 11010 ..... ..... ..... @vvv
306
vadda_d 0111 00000101 11011 ..... ..... ..... @vvv
307
+
308
+vmax_b 0111 00000111 00000 ..... ..... ..... @vvv
309
+vmax_h 0111 00000111 00001 ..... ..... ..... @vvv
310
+vmax_w 0111 00000111 00010 ..... ..... ..... @vvv
311
+vmax_d 0111 00000111 00011 ..... ..... ..... @vvv
312
+vmaxi_b 0111 00101001 00000 ..... ..... ..... @vv_i5
313
+vmaxi_h 0111 00101001 00001 ..... ..... ..... @vv_i5
314
+vmaxi_w 0111 00101001 00010 ..... ..... ..... @vv_i5
315
+vmaxi_d 0111 00101001 00011 ..... ..... ..... @vv_i5
316
+vmax_bu 0111 00000111 01000 ..... ..... ..... @vvv
317
+vmax_hu 0111 00000111 01001 ..... ..... ..... @vvv
318
+vmax_wu 0111 00000111 01010 ..... ..... ..... @vvv
319
+vmax_du 0111 00000111 01011 ..... ..... ..... @vvv
320
+vmaxi_bu 0111 00101001 01000 ..... ..... ..... @vv_ui5
321
+vmaxi_hu 0111 00101001 01001 ..... ..... ..... @vv_ui5
322
+vmaxi_wu 0111 00101001 01010 ..... ..... ..... @vv_ui5
323
+vmaxi_du 0111 00101001 01011 ..... ..... ..... @vv_ui5
324
+
325
+vmin_b 0111 00000111 00100 ..... ..... ..... @vvv
326
+vmin_h 0111 00000111 00101 ..... ..... ..... @vvv
327
+vmin_w 0111 00000111 00110 ..... ..... ..... @vvv
328
+vmin_d 0111 00000111 00111 ..... ..... ..... @vvv
329
+vmini_b 0111 00101001 00100 ..... ..... ..... @vv_i5
330
+vmini_h 0111 00101001 00101 ..... ..... ..... @vv_i5
331
+vmini_w 0111 00101001 00110 ..... ..... ..... @vv_i5
332
+vmini_d 0111 00101001 00111 ..... ..... ..... @vv_i5
333
+vmin_bu 0111 00000111 01100 ..... ..... ..... @vvv
334
+vmin_hu 0111 00000111 01101 ..... ..... ..... @vvv
335
+vmin_wu 0111 00000111 01110 ..... ..... ..... @vvv
336
+vmin_du 0111 00000111 01111 ..... ..... ..... @vvv
337
+vmini_bu 0111 00101001 01100 ..... ..... ..... @vv_ui5
338
+vmini_hu 0111 00101001 01101 ..... ..... ..... @vv_ui5
339
+vmini_wu 0111 00101001 01110 ..... ..... ..... @vv_ui5
340
+vmini_du 0111 00101001 01111 ..... ..... ..... @vv_ui5
341
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/target/loongarch/lsx_helper.c
344
+++ b/target/loongarch/lsx_helper.c
345
@@ -XXX,XX +XXX,XX @@ DO_VADDA(vadda_b, 8, B, DO_VABS)
346
DO_VADDA(vadda_h, 16, H, DO_VABS)
347
DO_VADDA(vadda_w, 32, W, DO_VABS)
348
DO_VADDA(vadda_d, 64, D, DO_VABS)
349
+
350
+#define DO_MIN(a, b) (a < b ? a : b)
351
+#define DO_MAX(a, b) (a > b ? a : b)
352
+
353
+#define VMINMAXI(NAME, BIT, E, DO_OP) \
354
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
355
+{ \
356
+ int i; \
357
+ VReg *Vd = (VReg *)vd; \
358
+ VReg *Vj = (VReg *)vj; \
359
+ typedef __typeof(Vd->E(0)) TD; \
360
+ \
361
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
362
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
363
+ } \
364
+}
365
+
366
+VMINMAXI(vmini_b, 8, B, DO_MIN)
367
+VMINMAXI(vmini_h, 16, H, DO_MIN)
368
+VMINMAXI(vmini_w, 32, W, DO_MIN)
369
+VMINMAXI(vmini_d, 64, D, DO_MIN)
370
+VMINMAXI(vmaxi_b, 8, B, DO_MAX)
371
+VMINMAXI(vmaxi_h, 16, H, DO_MAX)
372
+VMINMAXI(vmaxi_w, 32, W, DO_MAX)
373
+VMINMAXI(vmaxi_d, 64, D, DO_MAX)
374
+VMINMAXI(vmini_bu, 8, UB, DO_MIN)
375
+VMINMAXI(vmini_hu, 16, UH, DO_MIN)
376
+VMINMAXI(vmini_wu, 32, UW, DO_MIN)
377
+VMINMAXI(vmini_du, 64, UD, DO_MIN)
378
+VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
379
+VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
380
+VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
381
+VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
382
--
383
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VMUL.{B/H/W/D};
3
- VMUH.{B/H/W/D}[U];
4
- VMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
5
- VMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
6
1
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230504122810.4094787-15-gaosong@loongson.cn>
10
---
11
target/loongarch/disas.c | 38 ++
12
target/loongarch/helper.h | 30 ++
13
target/loongarch/insn_trans/trans_lsx.c.inc | 550 ++++++++++++++++++++
14
target/loongarch/insns.decode | 38 ++
15
target/loongarch/lsx_helper.c | 76 +++
16
5 files changed, 732 insertions(+)
17
18
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/loongarch/disas.c
21
+++ b/target/loongarch/disas.c
22
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmini_bu, vv_i)
23
INSN_LSX(vmini_hu, vv_i)
24
INSN_LSX(vmini_wu, vv_i)
25
INSN_LSX(vmini_du, vv_i)
26
+
27
+INSN_LSX(vmul_b, vvv)
28
+INSN_LSX(vmul_h, vvv)
29
+INSN_LSX(vmul_w, vvv)
30
+INSN_LSX(vmul_d, vvv)
31
+INSN_LSX(vmuh_b, vvv)
32
+INSN_LSX(vmuh_h, vvv)
33
+INSN_LSX(vmuh_w, vvv)
34
+INSN_LSX(vmuh_d, vvv)
35
+INSN_LSX(vmuh_bu, vvv)
36
+INSN_LSX(vmuh_hu, vvv)
37
+INSN_LSX(vmuh_wu, vvv)
38
+INSN_LSX(vmuh_du, vvv)
39
+
40
+INSN_LSX(vmulwev_h_b, vvv)
41
+INSN_LSX(vmulwev_w_h, vvv)
42
+INSN_LSX(vmulwev_d_w, vvv)
43
+INSN_LSX(vmulwev_q_d, vvv)
44
+INSN_LSX(vmulwod_h_b, vvv)
45
+INSN_LSX(vmulwod_w_h, vvv)
46
+INSN_LSX(vmulwod_d_w, vvv)
47
+INSN_LSX(vmulwod_q_d, vvv)
48
+INSN_LSX(vmulwev_h_bu, vvv)
49
+INSN_LSX(vmulwev_w_hu, vvv)
50
+INSN_LSX(vmulwev_d_wu, vvv)
51
+INSN_LSX(vmulwev_q_du, vvv)
52
+INSN_LSX(vmulwod_h_bu, vvv)
53
+INSN_LSX(vmulwod_w_hu, vvv)
54
+INSN_LSX(vmulwod_d_wu, vvv)
55
+INSN_LSX(vmulwod_q_du, vvv)
56
+INSN_LSX(vmulwev_h_bu_b, vvv)
57
+INSN_LSX(vmulwev_w_hu_h, vvv)
58
+INSN_LSX(vmulwev_d_wu_w, vvv)
59
+INSN_LSX(vmulwev_q_du_d, vvv)
60
+INSN_LSX(vmulwod_h_bu_b, vvv)
61
+INSN_LSX(vmulwod_w_hu_h, vvv)
62
+INSN_LSX(vmulwod_d_wu_w, vvv)
63
+INSN_LSX(vmulwod_q_du_d, vvv)
64
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/loongarch/helper.h
67
+++ b/target/loongarch/helper.h
68
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
69
DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
70
DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
71
DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
72
+
73
+DEF_HELPER_FLAGS_4(vmuh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
74
+DEF_HELPER_FLAGS_4(vmuh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
75
+DEF_HELPER_FLAGS_4(vmuh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
76
+DEF_HELPER_FLAGS_4(vmuh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
77
+DEF_HELPER_FLAGS_4(vmuh_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
78
+DEF_HELPER_FLAGS_4(vmuh_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
79
+DEF_HELPER_FLAGS_4(vmuh_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
80
+DEF_HELPER_FLAGS_4(vmuh_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
81
+
82
+DEF_HELPER_FLAGS_4(vmulwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
83
+DEF_HELPER_FLAGS_4(vmulwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
84
+DEF_HELPER_FLAGS_4(vmulwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
85
+DEF_HELPER_FLAGS_4(vmulwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
86
+DEF_HELPER_FLAGS_4(vmulwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
87
+DEF_HELPER_FLAGS_4(vmulwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
88
+
89
+DEF_HELPER_FLAGS_4(vmulwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
90
+DEF_HELPER_FLAGS_4(vmulwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
91
+DEF_HELPER_FLAGS_4(vmulwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
92
+DEF_HELPER_FLAGS_4(vmulwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
93
+DEF_HELPER_FLAGS_4(vmulwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
94
+DEF_HELPER_FLAGS_4(vmulwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
95
+
96
+DEF_HELPER_FLAGS_4(vmulwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
97
+DEF_HELPER_FLAGS_4(vmulwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
98
+DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
99
+DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
100
+DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
101
+DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
102
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
105
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
106
@@ -XXX,XX +XXX,XX @@ TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u)
107
TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u)
108
TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u)
109
TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u)
110
+
111
+TRANS(vmul_b, gvec_vvv, MO_8, tcg_gen_gvec_mul)
112
+TRANS(vmul_h, gvec_vvv, MO_16, tcg_gen_gvec_mul)
113
+TRANS(vmul_w, gvec_vvv, MO_32, tcg_gen_gvec_mul)
114
+TRANS(vmul_d, gvec_vvv, MO_64, tcg_gen_gvec_mul)
115
+
116
+static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
117
+{
118
+ TCGv_i32 discard = tcg_temp_new_i32();
119
+ tcg_gen_muls2_i32(discard, t, a, b);
120
+}
121
+
122
+static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
123
+{
124
+ TCGv_i64 discard = tcg_temp_new_i64();
125
+ tcg_gen_muls2_i64(discard, t, a, b);
126
+}
127
+
128
+static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
129
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
130
+{
131
+ static const GVecGen3 op[4] = {
132
+ {
133
+ .fno = gen_helper_vmuh_b,
134
+ .vece = MO_8
135
+ },
136
+ {
137
+ .fno = gen_helper_vmuh_h,
138
+ .vece = MO_16
139
+ },
140
+ {
141
+ .fni4 = gen_vmuh_w,
142
+ .fno = gen_helper_vmuh_w,
143
+ .vece = MO_32
144
+ },
145
+ {
146
+ .fni8 = gen_vmuh_d,
147
+ .fno = gen_helper_vmuh_d,
148
+ .vece = MO_64
149
+ },
150
+ };
151
+
152
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
153
+}
154
+
155
+TRANS(vmuh_b, gvec_vvv, MO_8, do_vmuh_s)
156
+TRANS(vmuh_h, gvec_vvv, MO_16, do_vmuh_s)
157
+TRANS(vmuh_w, gvec_vvv, MO_32, do_vmuh_s)
158
+TRANS(vmuh_d, gvec_vvv, MO_64, do_vmuh_s)
159
+
160
+static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
161
+{
162
+ TCGv_i32 discard = tcg_temp_new_i32();
163
+ tcg_gen_mulu2_i32(discard, t, a, b);
164
+}
165
+
166
+static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
167
+{
168
+ TCGv_i64 discard = tcg_temp_new_i64();
169
+ tcg_gen_mulu2_i64(discard, t, a, b);
170
+}
171
+
172
+static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
173
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
174
+{
175
+ static const GVecGen3 op[4] = {
176
+ {
177
+ .fno = gen_helper_vmuh_bu,
178
+ .vece = MO_8
179
+ },
180
+ {
181
+ .fno = gen_helper_vmuh_hu,
182
+ .vece = MO_16
183
+ },
184
+ {
185
+ .fni4 = gen_vmuh_wu,
186
+ .fno = gen_helper_vmuh_wu,
187
+ .vece = MO_32
188
+ },
189
+ {
190
+ .fni8 = gen_vmuh_du,
191
+ .fno = gen_helper_vmuh_du,
192
+ .vece = MO_64
193
+ },
194
+ };
195
+
196
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
197
+}
198
+
199
+TRANS(vmuh_bu, gvec_vvv, MO_8, do_vmuh_u)
200
+TRANS(vmuh_hu, gvec_vvv, MO_16, do_vmuh_u)
201
+TRANS(vmuh_wu, gvec_vvv, MO_32, do_vmuh_u)
202
+TRANS(vmuh_du, gvec_vvv, MO_64, do_vmuh_u)
203
+
204
+static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
205
+{
206
+ TCGv_vec t1, t2;
207
+ int halfbits = 4 << vece;
208
+
209
+ t1 = tcg_temp_new_vec_matching(a);
210
+ t2 = tcg_temp_new_vec_matching(b);
211
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
212
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
213
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
214
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
215
+ tcg_gen_mul_vec(vece, t, t1, t2);
216
+}
217
+
218
+static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
219
+{
220
+ TCGv_i32 t1, t2;
221
+
222
+ t1 = tcg_temp_new_i32();
223
+ t2 = tcg_temp_new_i32();
224
+ tcg_gen_ext16s_i32(t1, a);
225
+ tcg_gen_ext16s_i32(t2, b);
226
+ tcg_gen_mul_i32(t, t1, t2);
227
+}
228
+
229
+static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
230
+{
231
+ TCGv_i64 t1, t2;
232
+
233
+ t1 = tcg_temp_new_i64();
234
+ t2 = tcg_temp_new_i64();
235
+ tcg_gen_ext32s_i64(t1, a);
236
+ tcg_gen_ext32s_i64(t2, b);
237
+ tcg_gen_mul_i64(t, t1, t2);
238
+}
239
+
240
+static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
241
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
242
+{
243
+ static const TCGOpcode vecop_list[] = {
244
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
245
+ };
246
+ static const GVecGen3 op[3] = {
247
+ {
248
+ .fniv = gen_vmulwev_s,
249
+ .fno = gen_helper_vmulwev_h_b,
250
+ .opt_opc = vecop_list,
251
+ .vece = MO_16
252
+ },
253
+ {
254
+ .fni4 = gen_vmulwev_w_h,
255
+ .fniv = gen_vmulwev_s,
256
+ .fno = gen_helper_vmulwev_w_h,
257
+ .opt_opc = vecop_list,
258
+ .vece = MO_32
259
+ },
260
+ {
261
+ .fni8 = gen_vmulwev_d_w,
262
+ .fniv = gen_vmulwev_s,
263
+ .fno = gen_helper_vmulwev_d_w,
264
+ .opt_opc = vecop_list,
265
+ .vece = MO_64
266
+ },
267
+ };
268
+
269
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
270
+}
271
+
272
+TRANS(vmulwev_h_b, gvec_vvv, MO_8, do_vmulwev_s)
273
+TRANS(vmulwev_w_h, gvec_vvv, MO_16, do_vmulwev_s)
274
+TRANS(vmulwev_d_w, gvec_vvv, MO_32, do_vmulwev_s)
275
+
276
+static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
277
+ TCGv_i64 arg1, TCGv_i64 arg2)
278
+{
279
+ tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
280
+}
281
+
282
+#define VMUL_Q(NAME, FN, idx1, idx2) \
283
+static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
284
+{ \
285
+ TCGv_i64 rh, rl, arg1, arg2; \
286
+ \
287
+ rh = tcg_temp_new_i64(); \
288
+ rl = tcg_temp_new_i64(); \
289
+ arg1 = tcg_temp_new_i64(); \
290
+ arg2 = tcg_temp_new_i64(); \
291
+ \
292
+ get_vreg64(arg1, a->vj, idx1); \
293
+ get_vreg64(arg2, a->vk, idx2); \
294
+ \
295
+ tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \
296
+ \
297
+ set_vreg64(rh, a->vd, 1); \
298
+ set_vreg64(rl, a->vd, 0); \
299
+ \
300
+ return true; \
301
+}
302
+
303
+VMUL_Q(vmulwev_q_d, muls2, 0, 0)
304
+VMUL_Q(vmulwod_q_d, muls2, 1, 1)
305
+VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
306
+VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
307
+VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
308
+VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
309
+
310
+static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
311
+{
312
+ TCGv_vec t1, t2;
313
+ int halfbits = 4 << vece;
314
+
315
+ t1 = tcg_temp_new_vec_matching(a);
316
+ t2 = tcg_temp_new_vec_matching(b);
317
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
318
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
319
+ tcg_gen_mul_vec(vece, t, t1, t2);
320
+}
321
+
322
+static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
323
+{
324
+ TCGv_i32 t1, t2;
325
+
326
+ t1 = tcg_temp_new_i32();
327
+ t2 = tcg_temp_new_i32();
328
+ tcg_gen_sari_i32(t1, a, 16);
329
+ tcg_gen_sari_i32(t2, b, 16);
330
+ tcg_gen_mul_i32(t, t1, t2);
331
+}
332
+
333
+static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
334
+{
335
+ TCGv_i64 t1, t2;
336
+
337
+ t1 = tcg_temp_new_i64();
338
+ t2 = tcg_temp_new_i64();
339
+ tcg_gen_sari_i64(t1, a, 32);
340
+ tcg_gen_sari_i64(t2, b, 32);
341
+ tcg_gen_mul_i64(t, t1, t2);
342
+}
343
+
344
+static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
345
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
346
+{
347
+ static const TCGOpcode vecop_list[] = {
348
+ INDEX_op_sari_vec, INDEX_op_mul_vec, 0
349
+ };
350
+ static const GVecGen3 op[3] = {
351
+ {
352
+ .fniv = gen_vmulwod_s,
353
+ .fno = gen_helper_vmulwod_h_b,
354
+ .opt_opc = vecop_list,
355
+ .vece = MO_16
356
+ },
357
+ {
358
+ .fni4 = gen_vmulwod_w_h,
359
+ .fniv = gen_vmulwod_s,
360
+ .fno = gen_helper_vmulwod_w_h,
361
+ .opt_opc = vecop_list,
362
+ .vece = MO_32
363
+ },
364
+ {
365
+ .fni8 = gen_vmulwod_d_w,
366
+ .fniv = gen_vmulwod_s,
367
+ .fno = gen_helper_vmulwod_d_w,
368
+ .opt_opc = vecop_list,
369
+ .vece = MO_64
370
+ },
371
+ };
372
+
373
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
374
+}
375
+
376
+TRANS(vmulwod_h_b, gvec_vvv, MO_8, do_vmulwod_s)
377
+TRANS(vmulwod_w_h, gvec_vvv, MO_16, do_vmulwod_s)
378
+TRANS(vmulwod_d_w, gvec_vvv, MO_32, do_vmulwod_s)
379
+
380
+static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
381
+{
382
+ TCGv_vec t1, t2, mask;
383
+
384
+ t1 = tcg_temp_new_vec_matching(a);
385
+ t2 = tcg_temp_new_vec_matching(b);
386
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
387
+ tcg_gen_and_vec(vece, t1, a, mask);
388
+ tcg_gen_and_vec(vece, t2, b, mask);
389
+ tcg_gen_mul_vec(vece, t, t1, t2);
390
+}
391
+
392
+static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
393
+{
394
+ TCGv_i32 t1, t2;
395
+
396
+ t1 = tcg_temp_new_i32();
397
+ t2 = tcg_temp_new_i32();
398
+ tcg_gen_ext16u_i32(t1, a);
399
+ tcg_gen_ext16u_i32(t2, b);
400
+ tcg_gen_mul_i32(t, t1, t2);
401
+}
402
+
403
+static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
404
+{
405
+ TCGv_i64 t1, t2;
406
+
407
+ t1 = tcg_temp_new_i64();
408
+ t2 = tcg_temp_new_i64();
409
+ tcg_gen_ext32u_i64(t1, a);
410
+ tcg_gen_ext32u_i64(t2, b);
411
+ tcg_gen_mul_i64(t, t1, t2);
412
+}
413
+
414
+static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
415
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
416
+{
417
+ static const TCGOpcode vecop_list[] = {
418
+ INDEX_op_mul_vec, 0
419
+ };
420
+ static const GVecGen3 op[3] = {
421
+ {
422
+ .fniv = gen_vmulwev_u,
423
+ .fno = gen_helper_vmulwev_h_bu,
424
+ .opt_opc = vecop_list,
425
+ .vece = MO_16
426
+ },
427
+ {
428
+ .fni4 = gen_vmulwev_w_hu,
429
+ .fniv = gen_vmulwev_u,
430
+ .fno = gen_helper_vmulwev_w_hu,
431
+ .opt_opc = vecop_list,
432
+ .vece = MO_32
433
+ },
434
+ {
435
+ .fni8 = gen_vmulwev_d_wu,
436
+ .fniv = gen_vmulwev_u,
437
+ .fno = gen_helper_vmulwev_d_wu,
438
+ .opt_opc = vecop_list,
439
+ .vece = MO_64
440
+ },
441
+ };
442
+
443
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
444
+}
445
+
446
+TRANS(vmulwev_h_bu, gvec_vvv, MO_8, do_vmulwev_u)
447
+TRANS(vmulwev_w_hu, gvec_vvv, MO_16, do_vmulwev_u)
448
+TRANS(vmulwev_d_wu, gvec_vvv, MO_32, do_vmulwev_u)
449
+
450
+static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
451
+{
452
+ TCGv_vec t1, t2;
453
+ int halfbits = 4 << vece;
454
+
455
+ t1 = tcg_temp_new_vec_matching(a);
456
+ t2 = tcg_temp_new_vec_matching(b);
457
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
458
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
459
+ tcg_gen_mul_vec(vece, t, t1, t2);
460
+}
461
+
462
+static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
463
+{
464
+ TCGv_i32 t1, t2;
465
+
466
+ t1 = tcg_temp_new_i32();
467
+ t2 = tcg_temp_new_i32();
468
+ tcg_gen_shri_i32(t1, a, 16);
469
+ tcg_gen_shri_i32(t2, b, 16);
470
+ tcg_gen_mul_i32(t, t1, t2);
471
+}
472
+
473
+static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
474
+{
475
+ TCGv_i64 t1, t2;
476
+
477
+ t1 = tcg_temp_new_i64();
478
+ t2 = tcg_temp_new_i64();
479
+ tcg_gen_shri_i64(t1, a, 32);
480
+ tcg_gen_shri_i64(t2, b, 32);
481
+ tcg_gen_mul_i64(t, t1, t2);
482
+}
483
+
484
+static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
485
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
486
+{
487
+ static const TCGOpcode vecop_list[] = {
488
+ INDEX_op_shri_vec, INDEX_op_mul_vec, 0
489
+ };
490
+ static const GVecGen3 op[3] = {
491
+ {
492
+ .fniv = gen_vmulwod_u,
493
+ .fno = gen_helper_vmulwod_h_bu,
494
+ .opt_opc = vecop_list,
495
+ .vece = MO_16
496
+ },
497
+ {
498
+ .fni4 = gen_vmulwod_w_hu,
499
+ .fniv = gen_vmulwod_u,
500
+ .fno = gen_helper_vmulwod_w_hu,
501
+ .opt_opc = vecop_list,
502
+ .vece = MO_32
503
+ },
504
+ {
505
+ .fni8 = gen_vmulwod_d_wu,
506
+ .fniv = gen_vmulwod_u,
507
+ .fno = gen_helper_vmulwod_d_wu,
508
+ .opt_opc = vecop_list,
509
+ .vece = MO_64
510
+ },
511
+ };
512
+
513
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
514
+}
515
+
516
+TRANS(vmulwod_h_bu, gvec_vvv, MO_8, do_vmulwod_u)
517
+TRANS(vmulwod_w_hu, gvec_vvv, MO_16, do_vmulwod_u)
518
+TRANS(vmulwod_d_wu, gvec_vvv, MO_32, do_vmulwod_u)
519
+
520
+static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
521
+{
522
+ TCGv_vec t1, t2, mask;
523
+ int halfbits = 4 << vece;
524
+
525
+ t1 = tcg_temp_new_vec_matching(a);
526
+ t2 = tcg_temp_new_vec_matching(b);
527
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
528
+ tcg_gen_and_vec(vece, t1, a, mask);
529
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
530
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
531
+ tcg_gen_mul_vec(vece, t, t1, t2);
532
+}
533
+
534
+static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
535
+{
536
+ TCGv_i32 t1, t2;
537
+
538
+ t1 = tcg_temp_new_i32();
539
+ t2 = tcg_temp_new_i32();
540
+ tcg_gen_ext16u_i32(t1, a);
541
+ tcg_gen_ext16s_i32(t2, b);
542
+ tcg_gen_mul_i32(t, t1, t2);
543
+}
544
+
545
+static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
546
+{
547
+ TCGv_i64 t1, t2;
548
+
549
+ t1 = tcg_temp_new_i64();
550
+ t2 = tcg_temp_new_i64();
551
+ tcg_gen_ext32u_i64(t1, a);
552
+ tcg_gen_ext32s_i64(t2, b);
553
+ tcg_gen_mul_i64(t, t1, t2);
554
+}
555
+
556
+static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
557
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
558
+{
559
+ static const TCGOpcode vecop_list[] = {
560
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
561
+ };
562
+ static const GVecGen3 op[3] = {
563
+ {
564
+ .fniv = gen_vmulwev_u_s,
565
+ .fno = gen_helper_vmulwev_h_bu_b,
566
+ .opt_opc = vecop_list,
567
+ .vece = MO_16
568
+ },
569
+ {
570
+ .fni4 = gen_vmulwev_w_hu_h,
571
+ .fniv = gen_vmulwev_u_s,
572
+ .fno = gen_helper_vmulwev_w_hu_h,
573
+ .opt_opc = vecop_list,
574
+ .vece = MO_32
575
+ },
576
+ {
577
+ .fni8 = gen_vmulwev_d_wu_w,
578
+ .fniv = gen_vmulwev_u_s,
579
+ .fno = gen_helper_vmulwev_d_wu_w,
580
+ .opt_opc = vecop_list,
581
+ .vece = MO_64
582
+ },
583
+ };
584
+
585
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
586
+}
587
+
588
+TRANS(vmulwev_h_bu_b, gvec_vvv, MO_8, do_vmulwev_u_s)
589
+TRANS(vmulwev_w_hu_h, gvec_vvv, MO_16, do_vmulwev_u_s)
590
+TRANS(vmulwev_d_wu_w, gvec_vvv, MO_32, do_vmulwev_u_s)
591
+
592
+static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
593
+{
594
+ TCGv_vec t1, t2;
595
+ int halfbits = 4 << vece;
596
+
597
+ t1 = tcg_temp_new_vec_matching(a);
598
+ t2 = tcg_temp_new_vec_matching(b);
599
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
600
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
601
+ tcg_gen_mul_vec(vece, t, t1, t2);
602
+}
603
+
604
+static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
605
+{
606
+ TCGv_i32 t1, t2;
607
+
608
+ t1 = tcg_temp_new_i32();
609
+ t2 = tcg_temp_new_i32();
610
+ tcg_gen_shri_i32(t1, a, 16);
611
+ tcg_gen_sari_i32(t2, b, 16);
612
+ tcg_gen_mul_i32(t, t1, t2);
613
+}
614
+static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
615
+{
616
+ TCGv_i64 t1, t2;
617
+
618
+ t1 = tcg_temp_new_i64();
619
+ t2 = tcg_temp_new_i64();
620
+ tcg_gen_shri_i64(t1, a, 32);
621
+ tcg_gen_sari_i64(t2, b, 32);
622
+ tcg_gen_mul_i64(t, t1, t2);
623
+}
624
+
625
+static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
626
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
627
+{
628
+ static const TCGOpcode vecop_list[] = {
629
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
630
+ };
631
+ static const GVecGen3 op[3] = {
632
+ {
633
+ .fniv = gen_vmulwod_u_s,
634
+ .fno = gen_helper_vmulwod_h_bu_b,
635
+ .opt_opc = vecop_list,
636
+ .vece = MO_16
637
+ },
638
+ {
639
+ .fni4 = gen_vmulwod_w_hu_h,
640
+ .fniv = gen_vmulwod_u_s,
641
+ .fno = gen_helper_vmulwod_w_hu_h,
642
+ .opt_opc = vecop_list,
643
+ .vece = MO_32
644
+ },
645
+ {
646
+ .fni8 = gen_vmulwod_d_wu_w,
647
+ .fniv = gen_vmulwod_u_s,
648
+ .fno = gen_helper_vmulwod_d_wu_w,
649
+ .opt_opc = vecop_list,
650
+ .vece = MO_64
651
+ },
652
+ };
653
+
654
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
655
+}
656
+
657
+TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s)
658
+TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s)
659
+TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s)
660
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
661
index XXXXXXX..XXXXXXX 100644
662
--- a/target/loongarch/insns.decode
663
+++ b/target/loongarch/insns.decode
664
@@ -XXX,XX +XXX,XX @@ vmini_bu 0111 00101001 01100 ..... ..... ..... @vv_ui5
665
vmini_hu 0111 00101001 01101 ..... ..... ..... @vv_ui5
666
vmini_wu 0111 00101001 01110 ..... ..... ..... @vv_ui5
667
vmini_du 0111 00101001 01111 ..... ..... ..... @vv_ui5
668
+
669
+vmul_b 0111 00001000 01000 ..... ..... ..... @vvv
670
+vmul_h 0111 00001000 01001 ..... ..... ..... @vvv
671
+vmul_w 0111 00001000 01010 ..... ..... ..... @vvv
672
+vmul_d 0111 00001000 01011 ..... ..... ..... @vvv
673
+vmuh_b 0111 00001000 01100 ..... ..... ..... @vvv
674
+vmuh_h 0111 00001000 01101 ..... ..... ..... @vvv
675
+vmuh_w 0111 00001000 01110 ..... ..... ..... @vvv
676
+vmuh_d 0111 00001000 01111 ..... ..... ..... @vvv
677
+vmuh_bu 0111 00001000 10000 ..... ..... ..... @vvv
678
+vmuh_hu 0111 00001000 10001 ..... ..... ..... @vvv
679
+vmuh_wu 0111 00001000 10010 ..... ..... ..... @vvv
680
+vmuh_du 0111 00001000 10011 ..... ..... ..... @vvv
681
+
682
+vmulwev_h_b 0111 00001001 00000 ..... ..... ..... @vvv
683
+vmulwev_w_h 0111 00001001 00001 ..... ..... ..... @vvv
684
+vmulwev_d_w 0111 00001001 00010 ..... ..... ..... @vvv
685
+vmulwev_q_d 0111 00001001 00011 ..... ..... ..... @vvv
686
+vmulwod_h_b 0111 00001001 00100 ..... ..... ..... @vvv
687
+vmulwod_w_h 0111 00001001 00101 ..... ..... ..... @vvv
688
+vmulwod_d_w 0111 00001001 00110 ..... ..... ..... @vvv
689
+vmulwod_q_d 0111 00001001 00111 ..... ..... ..... @vvv
690
+vmulwev_h_bu 0111 00001001 10000 ..... ..... ..... @vvv
691
+vmulwev_w_hu 0111 00001001 10001 ..... ..... ..... @vvv
692
+vmulwev_d_wu 0111 00001001 10010 ..... ..... ..... @vvv
693
+vmulwev_q_du 0111 00001001 10011 ..... ..... ..... @vvv
694
+vmulwod_h_bu 0111 00001001 10100 ..... ..... ..... @vvv
695
+vmulwod_w_hu 0111 00001001 10101 ..... ..... ..... @vvv
696
+vmulwod_d_wu 0111 00001001 10110 ..... ..... ..... @vvv
697
+vmulwod_q_du 0111 00001001 10111 ..... ..... ..... @vvv
698
+vmulwev_h_bu_b 0111 00001010 00000 ..... ..... ..... @vvv
699
+vmulwev_w_hu_h 0111 00001010 00001 ..... ..... ..... @vvv
700
+vmulwev_d_wu_w 0111 00001010 00010 ..... ..... ..... @vvv
701
+vmulwev_q_du_d 0111 00001010 00011 ..... ..... ..... @vvv
702
+vmulwod_h_bu_b 0111 00001010 00100 ..... ..... ..... @vvv
703
+vmulwod_w_hu_h 0111 00001010 00101 ..... ..... ..... @vvv
704
+vmulwod_d_wu_w 0111 00001010 00110 ..... ..... ..... @vvv
705
+vmulwod_q_du_d 0111 00001010 00111 ..... ..... ..... @vvv
706
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
707
index XXXXXXX..XXXXXXX 100644
708
--- a/target/loongarch/lsx_helper.c
709
+++ b/target/loongarch/lsx_helper.c
710
@@ -XXX,XX +XXX,XX @@ VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
711
VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
712
VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
713
VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
714
+
715
+#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
716
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
717
+{ \
718
+ int i; \
719
+ VReg *Vd = (VReg *)vd; \
720
+ VReg *Vj = (VReg *)vj; \
721
+ VReg *Vk = (VReg *)vk; \
722
+ typedef __typeof(Vd->E1(0)) T; \
723
+ \
724
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
725
+ Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
726
+ } \
727
+}
728
+
729
+void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v)
730
+{
731
+ uint64_t l, h1, h2;
732
+ VReg *Vd = (VReg *)vd;
733
+ VReg *Vj = (VReg *)vj;
734
+ VReg *Vk = (VReg *)vk;
735
+
736
+ muls64(&l, &h1, Vj->D(0), Vk->D(0));
737
+ muls64(&l, &h2, Vj->D(1), Vk->D(1));
738
+
739
+ Vd->D(0) = h1;
740
+ Vd->D(1) = h2;
741
+}
742
+
743
+DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
744
+DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
745
+DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
746
+
747
+void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v)
748
+{
749
+ uint64_t l, h1, h2;
750
+ VReg *Vd = (VReg *)vd;
751
+ VReg *Vj = (VReg *)vj;
752
+ VReg *Vk = (VReg *)vk;
753
+
754
+ mulu64(&l, &h1, Vj->D(0), Vk->D(0));
755
+ mulu64(&l, &h2, Vj->D(1), Vk->D(1));
756
+
757
+ Vd->D(0) = h1;
758
+ Vd->D(1) = h2;
759
+}
760
+
761
+DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
762
+DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
763
+DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
764
+
765
+#define DO_MUL(a, b) (a * b)
766
+
767
+DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
768
+DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
769
+DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
770
+
771
+DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
772
+DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
773
+DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
774
+
775
+DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
776
+DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
777
+DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
778
+
779
+DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
780
+DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
781
+DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
782
+
783
+DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
784
+DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
785
+DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
786
+
787
+DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
788
+DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
789
+DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
790
--
791
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VMADD.{B/H/W/D};
3
- VMSUB.{B/H/W/D};
4
- VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
5
- VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
6
1
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230504122810.4094787-16-gaosong@loongson.cn>
10
---
11
target/loongarch/disas.c | 34 ++
12
target/loongarch/helper.h | 30 +
13
target/loongarch/insn_trans/trans_lsx.c.inc | 612 ++++++++++++++++++++
14
target/loongarch/insns.decode | 34 ++
15
target/loongarch/lsx_helper.c | 107 ++++
16
5 files changed, 817 insertions(+)
17
18
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/loongarch/disas.c
21
+++ b/target/loongarch/disas.c
22
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmulwod_h_bu_b, vvv)
23
INSN_LSX(vmulwod_w_hu_h, vvv)
24
INSN_LSX(vmulwod_d_wu_w, vvv)
25
INSN_LSX(vmulwod_q_du_d, vvv)
26
+
27
+INSN_LSX(vmadd_b, vvv)
28
+INSN_LSX(vmadd_h, vvv)
29
+INSN_LSX(vmadd_w, vvv)
30
+INSN_LSX(vmadd_d, vvv)
31
+INSN_LSX(vmsub_b, vvv)
32
+INSN_LSX(vmsub_h, vvv)
33
+INSN_LSX(vmsub_w, vvv)
34
+INSN_LSX(vmsub_d, vvv)
35
+
36
+INSN_LSX(vmaddwev_h_b, vvv)
37
+INSN_LSX(vmaddwev_w_h, vvv)
38
+INSN_LSX(vmaddwev_d_w, vvv)
39
+INSN_LSX(vmaddwev_q_d, vvv)
40
+INSN_LSX(vmaddwod_h_b, vvv)
41
+INSN_LSX(vmaddwod_w_h, vvv)
42
+INSN_LSX(vmaddwod_d_w, vvv)
43
+INSN_LSX(vmaddwod_q_d, vvv)
44
+INSN_LSX(vmaddwev_h_bu, vvv)
45
+INSN_LSX(vmaddwev_w_hu, vvv)
46
+INSN_LSX(vmaddwev_d_wu, vvv)
47
+INSN_LSX(vmaddwev_q_du, vvv)
48
+INSN_LSX(vmaddwod_h_bu, vvv)
49
+INSN_LSX(vmaddwod_w_hu, vvv)
50
+INSN_LSX(vmaddwod_d_wu, vvv)
51
+INSN_LSX(vmaddwod_q_du, vvv)
52
+INSN_LSX(vmaddwev_h_bu_b, vvv)
53
+INSN_LSX(vmaddwev_w_hu_h, vvv)
54
+INSN_LSX(vmaddwev_d_wu_w, vvv)
55
+INSN_LSX(vmaddwev_q_du_d, vvv)
56
+INSN_LSX(vmaddwod_h_bu_b, vvv)
57
+INSN_LSX(vmaddwod_w_hu_h, vvv)
58
+INSN_LSX(vmaddwod_d_wu_w, vvv)
59
+INSN_LSX(vmaddwod_q_du_d, vvv)
60
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/helper.h
63
+++ b/target/loongarch/helper.h
64
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
65
DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
66
DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
67
DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
68
+
69
+DEF_HELPER_FLAGS_4(vmadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
70
+DEF_HELPER_FLAGS_4(vmadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
71
+DEF_HELPER_FLAGS_4(vmadd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
72
+DEF_HELPER_FLAGS_4(vmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
73
+DEF_HELPER_FLAGS_4(vmsub_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
74
+DEF_HELPER_FLAGS_4(vmsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
75
+DEF_HELPER_FLAGS_4(vmsub_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
76
+DEF_HELPER_FLAGS_4(vmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
77
+
78
+DEF_HELPER_FLAGS_4(vmaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
79
+DEF_HELPER_FLAGS_4(vmaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
80
+DEF_HELPER_FLAGS_4(vmaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
81
+DEF_HELPER_FLAGS_4(vmaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
82
+DEF_HELPER_FLAGS_4(vmaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
83
+DEF_HELPER_FLAGS_4(vmaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
84
+
85
+DEF_HELPER_FLAGS_4(vmaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
86
+DEF_HELPER_FLAGS_4(vmaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
87
+DEF_HELPER_FLAGS_4(vmaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
88
+DEF_HELPER_FLAGS_4(vmaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
89
+DEF_HELPER_FLAGS_4(vmaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
90
+DEF_HELPER_FLAGS_4(vmaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
91
+
92
+DEF_HELPER_FLAGS_4(vmaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
93
+DEF_HELPER_FLAGS_4(vmaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
94
+DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
95
+DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
96
+DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
97
+DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
98
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
101
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
102
@@ -XXX,XX +XXX,XX @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
103
TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s)
104
TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s)
105
TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s)
106
+
107
+static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
108
+{
109
+ TCGv_vec t1;
110
+
111
+ t1 = tcg_temp_new_vec_matching(t);
112
+ tcg_gen_mul_vec(vece, t1, a, b);
113
+ tcg_gen_add_vec(vece, t, t, t1);
114
+}
115
+
116
+static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
117
+{
118
+ TCGv_i32 t1;
119
+
120
+ t1 = tcg_temp_new_i32();
121
+ tcg_gen_mul_i32(t1, a, b);
122
+ tcg_gen_add_i32(t, t, t1);
123
+}
124
+
125
+static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
126
+{
127
+ TCGv_i64 t1;
128
+
129
+ t1 = tcg_temp_new_i64();
130
+ tcg_gen_mul_i64(t1, a, b);
131
+ tcg_gen_add_i64(t, t, t1);
132
+}
133
+
134
+static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
135
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
136
+{
137
+ static const TCGOpcode vecop_list[] = {
138
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
139
+ };
140
+ static const GVecGen3 op[4] = {
141
+ {
142
+ .fniv = gen_vmadd,
143
+ .fno = gen_helper_vmadd_b,
144
+ .load_dest = true,
145
+ .opt_opc = vecop_list,
146
+ .vece = MO_8
147
+ },
148
+ {
149
+ .fniv = gen_vmadd,
150
+ .fno = gen_helper_vmadd_h,
151
+ .load_dest = true,
152
+ .opt_opc = vecop_list,
153
+ .vece = MO_16
154
+ },
155
+ {
156
+ .fni4 = gen_vmadd_w,
157
+ .fniv = gen_vmadd,
158
+ .fno = gen_helper_vmadd_w,
159
+ .load_dest = true,
160
+ .opt_opc = vecop_list,
161
+ .vece = MO_32
162
+ },
163
+ {
164
+ .fni8 = gen_vmadd_d,
165
+ .fniv = gen_vmadd,
166
+ .fno = gen_helper_vmadd_d,
167
+ .load_dest = true,
168
+ .opt_opc = vecop_list,
169
+ .vece = MO_64
170
+ },
171
+ };
172
+
173
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
174
+}
175
+
176
+TRANS(vmadd_b, gvec_vvv, MO_8, do_vmadd)
177
+TRANS(vmadd_h, gvec_vvv, MO_16, do_vmadd)
178
+TRANS(vmadd_w, gvec_vvv, MO_32, do_vmadd)
179
+TRANS(vmadd_d, gvec_vvv, MO_64, do_vmadd)
180
+
181
+static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
182
+{
183
+ TCGv_vec t1;
184
+
185
+ t1 = tcg_temp_new_vec_matching(t);
186
+ tcg_gen_mul_vec(vece, t1, a, b);
187
+ tcg_gen_sub_vec(vece, t, t, t1);
188
+}
189
+
190
+static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
191
+{
192
+ TCGv_i32 t1;
193
+
194
+ t1 = tcg_temp_new_i32();
195
+ tcg_gen_mul_i32(t1, a, b);
196
+ tcg_gen_sub_i32(t, t, t1);
197
+}
198
+
199
+static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
200
+{
201
+ TCGv_i64 t1;
202
+
203
+ t1 = tcg_temp_new_i64();
204
+ tcg_gen_mul_i64(t1, a, b);
205
+ tcg_gen_sub_i64(t, t, t1);
206
+}
207
+
208
+static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
209
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
210
+{
211
+ static const TCGOpcode vecop_list[] = {
212
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
213
+ };
214
+ static const GVecGen3 op[4] = {
215
+ {
216
+ .fniv = gen_vmsub,
217
+ .fno = gen_helper_vmsub_b,
218
+ .load_dest = true,
219
+ .opt_opc = vecop_list,
220
+ .vece = MO_8
221
+ },
222
+ {
223
+ .fniv = gen_vmsub,
224
+ .fno = gen_helper_vmsub_h,
225
+ .load_dest = true,
226
+ .opt_opc = vecop_list,
227
+ .vece = MO_16
228
+ },
229
+ {
230
+ .fni4 = gen_vmsub_w,
231
+ .fniv = gen_vmsub,
232
+ .fno = gen_helper_vmsub_w,
233
+ .load_dest = true,
234
+ .opt_opc = vecop_list,
235
+ .vece = MO_32
236
+ },
237
+ {
238
+ .fni8 = gen_vmsub_d,
239
+ .fniv = gen_vmsub,
240
+ .fno = gen_helper_vmsub_d,
241
+ .load_dest = true,
242
+ .opt_opc = vecop_list,
243
+ .vece = MO_64
244
+ },
245
+ };
246
+
247
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
248
+}
249
+
250
+TRANS(vmsub_b, gvec_vvv, MO_8, do_vmsub)
251
+TRANS(vmsub_h, gvec_vvv, MO_16, do_vmsub)
252
+TRANS(vmsub_w, gvec_vvv, MO_32, do_vmsub)
253
+TRANS(vmsub_d, gvec_vvv, MO_64, do_vmsub)
254
+
255
+static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
256
+{
257
+ TCGv_vec t1, t2, t3;
258
+ int halfbits = 4 << vece;
259
+
260
+ t1 = tcg_temp_new_vec_matching(a);
261
+ t2 = tcg_temp_new_vec_matching(b);
262
+ t3 = tcg_temp_new_vec_matching(t);
263
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
264
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
265
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
266
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
267
+ tcg_gen_mul_vec(vece, t3, t1, t2);
268
+ tcg_gen_add_vec(vece, t, t, t3);
269
+}
270
+
271
+static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
272
+{
273
+ TCGv_i32 t1;
274
+
275
+ t1 = tcg_temp_new_i32();
276
+ gen_vmulwev_w_h(t1, a, b);
277
+ tcg_gen_add_i32(t, t, t1);
278
+}
279
+
280
+static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
281
+{
282
+ TCGv_i64 t1;
283
+
284
+ t1 = tcg_temp_new_i64();
285
+ gen_vmulwev_d_w(t1, a, b);
286
+ tcg_gen_add_i64(t, t, t1);
287
+}
288
+
289
+static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
290
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
291
+{
292
+ static const TCGOpcode vecop_list[] = {
293
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
294
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
295
+ };
296
+ static const GVecGen3 op[3] = {
297
+ {
298
+ .fniv = gen_vmaddwev_s,
299
+ .fno = gen_helper_vmaddwev_h_b,
300
+ .load_dest = true,
301
+ .opt_opc = vecop_list,
302
+ .vece = MO_16
303
+ },
304
+ {
305
+ .fni4 = gen_vmaddwev_w_h,
306
+ .fniv = gen_vmaddwev_s,
307
+ .fno = gen_helper_vmaddwev_w_h,
308
+ .load_dest = true,
309
+ .opt_opc = vecop_list,
310
+ .vece = MO_32
311
+ },
312
+ {
313
+ .fni8 = gen_vmaddwev_d_w,
314
+ .fniv = gen_vmaddwev_s,
315
+ .fno = gen_helper_vmaddwev_d_w,
316
+ .load_dest = true,
317
+ .opt_opc = vecop_list,
318
+ .vece = MO_64
319
+ },
320
+ };
321
+
322
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
323
+}
324
+
325
+TRANS(vmaddwev_h_b, gvec_vvv, MO_8, do_vmaddwev_s)
326
+TRANS(vmaddwev_w_h, gvec_vvv, MO_16, do_vmaddwev_s)
327
+TRANS(vmaddwev_d_w, gvec_vvv, MO_32, do_vmaddwev_s)
328
+
329
+#define VMADD_Q(NAME, FN, idx1, idx2) \
330
+static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
331
+{ \
332
+ TCGv_i64 rh, rl, arg1, arg2, th, tl; \
333
+ \
334
+ rh = tcg_temp_new_i64(); \
335
+ rl = tcg_temp_new_i64(); \
336
+ arg1 = tcg_temp_new_i64(); \
337
+ arg2 = tcg_temp_new_i64(); \
338
+ th = tcg_temp_new_i64(); \
339
+ tl = tcg_temp_new_i64(); \
340
+ \
341
+ get_vreg64(arg1, a->vj, idx1); \
342
+ get_vreg64(arg2, a->vk, idx2); \
343
+ get_vreg64(rh, a->vd, 1); \
344
+ get_vreg64(rl, a->vd, 0); \
345
+ \
346
+ tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
347
+ tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
348
+ \
349
+ set_vreg64(rh, a->vd, 1); \
350
+ set_vreg64(rl, a->vd, 0); \
351
+ \
352
+ return true; \
353
+}
354
+
355
+VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
356
+VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
357
+VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
358
+VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
359
+VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
360
+VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
361
+
362
+static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
363
+{
364
+ TCGv_vec t1, t2, t3;
365
+ int halfbits = 4 << vece;
366
+
367
+ t1 = tcg_temp_new_vec_matching(a);
368
+ t2 = tcg_temp_new_vec_matching(b);
369
+ t3 = tcg_temp_new_vec_matching(t);
370
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
371
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
372
+ tcg_gen_mul_vec(vece, t3, t1, t2);
373
+ tcg_gen_add_vec(vece, t, t, t3);
374
+}
375
+
376
+static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
377
+{
378
+ TCGv_i32 t1;
379
+
380
+ t1 = tcg_temp_new_i32();
381
+ gen_vmulwod_w_h(t1, a, b);
382
+ tcg_gen_add_i32(t, t, t1);
383
+}
384
+
385
+static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
386
+{
387
+ TCGv_i64 t1;
388
+
389
+ t1 = tcg_temp_new_i64();
390
+ gen_vmulwod_d_w(t1, a, b);
391
+ tcg_gen_add_i64(t, t, t1);
392
+}
393
+
394
+static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
395
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
396
+{
397
+ static const TCGOpcode vecop_list[] = {
398
+ INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
399
+ };
400
+ static const GVecGen3 op[3] = {
401
+ {
402
+ .fniv = gen_vmaddwod_s,
403
+ .fno = gen_helper_vmaddwod_h_b,
404
+ .load_dest = true,
405
+ .opt_opc = vecop_list,
406
+ .vece = MO_16
407
+ },
408
+ {
409
+ .fni4 = gen_vmaddwod_w_h,
410
+ .fniv = gen_vmaddwod_s,
411
+ .fno = gen_helper_vmaddwod_w_h,
412
+ .load_dest = true,
413
+ .opt_opc = vecop_list,
414
+ .vece = MO_32
415
+ },
416
+ {
417
+ .fni8 = gen_vmaddwod_d_w,
418
+ .fniv = gen_vmaddwod_s,
419
+ .fno = gen_helper_vmaddwod_d_w,
420
+ .load_dest = true,
421
+ .opt_opc = vecop_list,
422
+ .vece = MO_64
423
+ },
424
+ };
425
+
426
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
427
+}
428
+
429
+TRANS(vmaddwod_h_b, gvec_vvv, MO_8, do_vmaddwod_s)
430
+TRANS(vmaddwod_w_h, gvec_vvv, MO_16, do_vmaddwod_s)
431
+TRANS(vmaddwod_d_w, gvec_vvv, MO_32, do_vmaddwod_s)
432
+
433
+static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
434
+{
435
+ TCGv_vec t1, t2, mask;
436
+
437
+ t1 = tcg_temp_new_vec_matching(t);
438
+ t2 = tcg_temp_new_vec_matching(b);
439
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
440
+ tcg_gen_and_vec(vece, t1, a, mask);
441
+ tcg_gen_and_vec(vece, t2, b, mask);
442
+ tcg_gen_mul_vec(vece, t1, t1, t2);
443
+ tcg_gen_add_vec(vece, t, t, t1);
444
+}
445
+
446
+static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
447
+{
448
+ TCGv_i32 t1;
449
+
450
+ t1 = tcg_temp_new_i32();
451
+ gen_vmulwev_w_hu(t1, a, b);
452
+ tcg_gen_add_i32(t, t, t1);
453
+}
454
+
455
+static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
456
+{
457
+ TCGv_i64 t1;
458
+
459
+ t1 = tcg_temp_new_i64();
460
+ gen_vmulwev_d_wu(t1, a, b);
461
+ tcg_gen_add_i64(t, t, t1);
462
+}
463
+
464
+static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
465
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
466
+{
467
+ static const TCGOpcode vecop_list[] = {
468
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
469
+ };
470
+ static const GVecGen3 op[3] = {
471
+ {
472
+ .fniv = gen_vmaddwev_u,
473
+ .fno = gen_helper_vmaddwev_h_bu,
474
+ .load_dest = true,
475
+ .opt_opc = vecop_list,
476
+ .vece = MO_16
477
+ },
478
+ {
479
+ .fni4 = gen_vmaddwev_w_hu,
480
+ .fniv = gen_vmaddwev_u,
481
+ .fno = gen_helper_vmaddwev_w_hu,
482
+ .load_dest = true,
483
+ .opt_opc = vecop_list,
484
+ .vece = MO_32
485
+ },
486
+ {
487
+ .fni8 = gen_vmaddwev_d_wu,
488
+ .fniv = gen_vmaddwev_u,
489
+ .fno = gen_helper_vmaddwev_d_wu,
490
+ .load_dest = true,
491
+ .opt_opc = vecop_list,
492
+ .vece = MO_64
493
+ },
494
+ };
495
+
496
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
497
+}
498
+
499
+TRANS(vmaddwev_h_bu, gvec_vvv, MO_8, do_vmaddwev_u)
500
+TRANS(vmaddwev_w_hu, gvec_vvv, MO_16, do_vmaddwev_u)
501
+TRANS(vmaddwev_d_wu, gvec_vvv, MO_32, do_vmaddwev_u)
502
+
503
+static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
504
+{
505
+ TCGv_vec t1, t2, t3;
506
+ int halfbits = 4 << vece;
507
+
508
+ t1 = tcg_temp_new_vec_matching(a);
509
+ t2 = tcg_temp_new_vec_matching(b);
510
+ t3 = tcg_temp_new_vec_matching(t);
511
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
512
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
513
+ tcg_gen_mul_vec(vece, t3, t1, t2);
514
+ tcg_gen_add_vec(vece, t, t, t3);
515
+}
516
+
517
+static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
518
+{
519
+ TCGv_i32 t1;
520
+
521
+ t1 = tcg_temp_new_i32();
522
+ gen_vmulwod_w_hu(t1, a, b);
523
+ tcg_gen_add_i32(t, t, t1);
524
+}
525
+
526
+static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
527
+{
528
+ TCGv_i64 t1;
529
+
530
+ t1 = tcg_temp_new_i64();
531
+ gen_vmulwod_d_wu(t1, a, b);
532
+ tcg_gen_add_i64(t, t, t1);
533
+}
534
+
535
+static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
536
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
537
+{
538
+ static const TCGOpcode vecop_list[] = {
539
+ INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
540
+ };
541
+ static const GVecGen3 op[3] = {
542
+ {
543
+ .fniv = gen_vmaddwod_u,
544
+ .fno = gen_helper_vmaddwod_h_bu,
545
+ .load_dest = true,
546
+ .opt_opc = vecop_list,
547
+ .vece = MO_16
548
+ },
549
+ {
550
+ .fni4 = gen_vmaddwod_w_hu,
551
+ .fniv = gen_vmaddwod_u,
552
+ .fno = gen_helper_vmaddwod_w_hu,
553
+ .load_dest = true,
554
+ .opt_opc = vecop_list,
555
+ .vece = MO_32
556
+ },
557
+ {
558
+ .fni8 = gen_vmaddwod_d_wu,
559
+ .fniv = gen_vmaddwod_u,
560
+ .fno = gen_helper_vmaddwod_d_wu,
561
+ .load_dest = true,
562
+ .opt_opc = vecop_list,
563
+ .vece = MO_64
564
+ },
565
+ };
566
+
567
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
568
+}
569
+
570
+TRANS(vmaddwod_h_bu, gvec_vvv, MO_8, do_vmaddwod_u)
571
+TRANS(vmaddwod_w_hu, gvec_vvv, MO_16, do_vmaddwod_u)
572
+TRANS(vmaddwod_d_wu, gvec_vvv, MO_32, do_vmaddwod_u)
573
+
574
+static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
575
+{
576
+ TCGv_vec t1, t2, mask;
577
+ int halfbits = 4 << vece;
578
+
579
+ t1 = tcg_temp_new_vec_matching(a);
580
+ t2 = tcg_temp_new_vec_matching(b);
581
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
582
+ tcg_gen_and_vec(vece, t1, a, mask);
583
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
584
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
585
+ tcg_gen_mul_vec(vece, t1, t1, t2);
586
+ tcg_gen_add_vec(vece, t, t, t1);
587
+}
588
+
589
+static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
590
+{
591
+ TCGv_i32 t1;
592
+
593
+ t1 = tcg_temp_new_i32();
594
+ gen_vmulwev_w_hu_h(t1, a, b);
595
+ tcg_gen_add_i32(t, t, t1);
596
+}
597
+
598
+static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
599
+{
600
+ TCGv_i64 t1;
601
+
602
+ t1 = tcg_temp_new_i64();
603
+ gen_vmulwev_d_wu_w(t1, a, b);
604
+ tcg_gen_add_i64(t, t, t1);
605
+}
606
+
607
+static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
608
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
609
+{
610
+ static const TCGOpcode vecop_list[] = {
611
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
612
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
613
+ };
614
+ static const GVecGen3 op[3] = {
615
+ {
616
+ .fniv = gen_vmaddwev_u_s,
617
+ .fno = gen_helper_vmaddwev_h_bu_b,
618
+ .load_dest = true,
619
+ .opt_opc = vecop_list,
620
+ .vece = MO_16
621
+ },
622
+ {
623
+ .fni4 = gen_vmaddwev_w_hu_h,
624
+ .fniv = gen_vmaddwev_u_s,
625
+ .fno = gen_helper_vmaddwev_w_hu_h,
626
+ .load_dest = true,
627
+ .opt_opc = vecop_list,
628
+ .vece = MO_32
629
+ },
630
+ {
631
+ .fni8 = gen_vmaddwev_d_wu_w,
632
+ .fniv = gen_vmaddwev_u_s,
633
+ .fno = gen_helper_vmaddwev_d_wu_w,
634
+ .load_dest = true,
635
+ .opt_opc = vecop_list,
636
+ .vece = MO_64
637
+ },
638
+ };
639
+
640
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
641
+}
642
+
643
+TRANS(vmaddwev_h_bu_b, gvec_vvv, MO_8, do_vmaddwev_u_s)
644
+TRANS(vmaddwev_w_hu_h, gvec_vvv, MO_16, do_vmaddwev_u_s)
645
+TRANS(vmaddwev_d_wu_w, gvec_vvv, MO_32, do_vmaddwev_u_s)
646
+
647
+static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
648
+{
649
+ TCGv_vec t1, t2, t3;
650
+ int halfbits = 4 << vece;
651
+
652
+ t1 = tcg_temp_new_vec_matching(a);
653
+ t2 = tcg_temp_new_vec_matching(b);
654
+ t3 = tcg_temp_new_vec_matching(t);
655
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
656
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
657
+ tcg_gen_mul_vec(vece, t3, t1, t2);
658
+ tcg_gen_add_vec(vece, t, t, t3);
659
+}
660
+
661
+static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
662
+{
663
+ TCGv_i32 t1;
664
+
665
+ t1 = tcg_temp_new_i32();
666
+ gen_vmulwod_w_hu_h(t1, a, b);
667
+ tcg_gen_add_i32(t, t, t1);
668
+}
669
+
670
+static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
671
+{
672
+ TCGv_i64 t1;
673
+
674
+ t1 = tcg_temp_new_i64();
675
+ gen_vmulwod_d_wu_w(t1, a, b);
676
+ tcg_gen_add_i64(t, t, t1);
677
+}
678
+
679
+static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
680
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
681
+{
682
+ static const TCGOpcode vecop_list[] = {
683
+ INDEX_op_shri_vec, INDEX_op_sari_vec,
684
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
685
+ };
686
+ static const GVecGen3 op[3] = {
687
+ {
688
+ .fniv = gen_vmaddwod_u_s,
689
+ .fno = gen_helper_vmaddwod_h_bu_b,
690
+ .load_dest = true,
691
+ .opt_opc = vecop_list,
692
+ .vece = MO_16
693
+ },
694
+ {
695
+ .fni4 = gen_vmaddwod_w_hu_h,
696
+ .fniv = gen_vmaddwod_u_s,
697
+ .fno = gen_helper_vmaddwod_w_hu_h,
698
+ .load_dest = true,
699
+ .opt_opc = vecop_list,
700
+ .vece = MO_32
701
+ },
702
+ {
703
+ .fni8 = gen_vmaddwod_d_wu_w,
704
+ .fniv = gen_vmaddwod_u_s,
705
+ .fno = gen_helper_vmaddwod_d_wu_w,
706
+ .load_dest = true,
707
+ .opt_opc = vecop_list,
708
+ .vece = MO_64
709
+ },
710
+ };
711
+
712
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
713
+}
714
+
715
+TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s)
716
+TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s)
717
+TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s)
718
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
719
index XXXXXXX..XXXXXXX 100644
720
--- a/target/loongarch/insns.decode
721
+++ b/target/loongarch/insns.decode
722
@@ -XXX,XX +XXX,XX @@ vmulwod_h_bu_b 0111 00001010 00100 ..... ..... ..... @vvv
723
vmulwod_w_hu_h 0111 00001010 00101 ..... ..... ..... @vvv
724
vmulwod_d_wu_w 0111 00001010 00110 ..... ..... ..... @vvv
725
vmulwod_q_du_d 0111 00001010 00111 ..... ..... ..... @vvv
726
+
727
+vmadd_b 0111 00001010 10000 ..... ..... ..... @vvv
728
+vmadd_h 0111 00001010 10001 ..... ..... ..... @vvv
729
+vmadd_w 0111 00001010 10010 ..... ..... ..... @vvv
730
+vmadd_d 0111 00001010 10011 ..... ..... ..... @vvv
731
+vmsub_b 0111 00001010 10100 ..... ..... ..... @vvv
732
+vmsub_h 0111 00001010 10101 ..... ..... ..... @vvv
733
+vmsub_w 0111 00001010 10110 ..... ..... ..... @vvv
734
+vmsub_d 0111 00001010 10111 ..... ..... ..... @vvv
735
+
736
+vmaddwev_h_b 0111 00001010 11000 ..... ..... ..... @vvv
737
+vmaddwev_w_h 0111 00001010 11001 ..... ..... ..... @vvv
738
+vmaddwev_d_w 0111 00001010 11010 ..... ..... ..... @vvv
739
+vmaddwev_q_d 0111 00001010 11011 ..... ..... ..... @vvv
740
+vmaddwod_h_b 0111 00001010 11100 ..... ..... ..... @vvv
741
+vmaddwod_w_h 0111 00001010 11101 ..... ..... ..... @vvv
742
+vmaddwod_d_w 0111 00001010 11110 ..... ..... ..... @vvv
743
+vmaddwod_q_d 0111 00001010 11111 ..... ..... ..... @vvv
744
+vmaddwev_h_bu 0111 00001011 01000 ..... ..... ..... @vvv
745
+vmaddwev_w_hu 0111 00001011 01001 ..... ..... ..... @vvv
746
+vmaddwev_d_wu 0111 00001011 01010 ..... ..... ..... @vvv
747
+vmaddwev_q_du 0111 00001011 01011 ..... ..... ..... @vvv
748
+vmaddwod_h_bu 0111 00001011 01100 ..... ..... ..... @vvv
749
+vmaddwod_w_hu 0111 00001011 01101 ..... ..... ..... @vvv
750
+vmaddwod_d_wu 0111 00001011 01110 ..... ..... ..... @vvv
751
+vmaddwod_q_du 0111 00001011 01111 ..... ..... ..... @vvv
752
+vmaddwev_h_bu_b 0111 00001011 11000 ..... ..... ..... @vvv
753
+vmaddwev_w_hu_h 0111 00001011 11001 ..... ..... ..... @vvv
754
+vmaddwev_d_wu_w 0111 00001011 11010 ..... ..... ..... @vvv
755
+vmaddwev_q_du_d 0111 00001011 11011 ..... ..... ..... @vvv
756
+vmaddwod_h_bu_b 0111 00001011 11100 ..... ..... ..... @vvv
757
+vmaddwod_w_hu_h 0111 00001011 11101 ..... ..... ..... @vvv
758
+vmaddwod_d_wu_w 0111 00001011 11110 ..... ..... ..... @vvv
759
+vmaddwod_q_du_d 0111 00001011 11111 ..... ..... ..... @vvv
760
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
761
index XXXXXXX..XXXXXXX 100644
762
--- a/target/loongarch/lsx_helper.c
763
+++ b/target/loongarch/lsx_helper.c
764
@@ -XXX,XX +XXX,XX @@ DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
765
DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
766
DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
767
DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
768
+
769
+#define DO_MADD(a, b, c) (a + b * c)
770
+#define DO_MSUB(a, b, c) (a - b * c)
771
+
772
+#define VMADDSUB(NAME, BIT, E, DO_OP) \
773
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
774
+{ \
775
+ int i; \
776
+ VReg *Vd = (VReg *)vd; \
777
+ VReg *Vj = (VReg *)vj; \
778
+ VReg *Vk = (VReg *)vk; \
779
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
780
+ Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
781
+ } \
782
+}
783
+
784
+VMADDSUB(vmadd_b, 8, B, DO_MADD)
785
+VMADDSUB(vmadd_h, 16, H, DO_MADD)
786
+VMADDSUB(vmadd_w, 32, W, DO_MADD)
787
+VMADDSUB(vmadd_d, 64, D, DO_MADD)
788
+VMADDSUB(vmsub_b, 8, B, DO_MSUB)
789
+VMADDSUB(vmsub_h, 16, H, DO_MSUB)
790
+VMADDSUB(vmsub_w, 32, W, DO_MSUB)
791
+VMADDSUB(vmsub_d, 64, D, DO_MSUB)
792
+
793
+#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \
794
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
795
+{ \
796
+ int i; \
797
+ VReg *Vd = (VReg *)vd; \
798
+ VReg *Vj = (VReg *)vj; \
799
+ VReg *Vk = (VReg *)vk; \
800
+ typedef __typeof(Vd->E1(0)) TD; \
801
+ \
802
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
803
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
804
+ } \
805
+}
806
+
807
+VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
808
+VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
809
+VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
810
+VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
811
+VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
812
+VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
813
+
814
+#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
815
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
816
+{ \
817
+ int i; \
818
+ VReg *Vd = (VReg *)vd; \
819
+ VReg *Vj = (VReg *)vj; \
820
+ VReg *Vk = (VReg *)vk; \
821
+ typedef __typeof(Vd->E1(0)) TD; \
822
+ \
823
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
824
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
825
+ (TD)Vk->E2(2 * i + 1)); \
826
+ } \
827
+}
828
+
829
+VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
830
+VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
831
+VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
832
+VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL)
833
+VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL)
834
+VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL)
835
+
836
+#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
837
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
838
+{ \
839
+ int i; \
840
+ VReg *Vd = (VReg *)vd; \
841
+ VReg *Vj = (VReg *)vj; \
842
+ VReg *Vk = (VReg *)vk; \
843
+ typedef __typeof(Vd->ES1(0)) TS1; \
844
+ typedef __typeof(Vd->EU1(0)) TU1; \
845
+ \
846
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
847
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
848
+ (TS1)Vk->ES2(2 * i)); \
849
+ } \
850
+}
851
+
852
+VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
853
+VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
854
+VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
855
+
856
+#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
857
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
858
+{ \
859
+ int i; \
860
+ VReg *Vd = (VReg *)vd; \
861
+ VReg *Vj = (VReg *)vj; \
862
+ VReg *Vk = (VReg *)vk; \
863
+ typedef __typeof(Vd->ES1(0)) TS1; \
864
+ typedef __typeof(Vd->EU1(0)) TU1; \
865
+ \
866
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
867
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
868
+ (TS1)Vk->ES2(2 * i + 1)); \
869
+ } \
870
+}
871
+
872
+VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
873
+VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
874
+VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
875
--
876
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VDIV.{B/H/W/D}[U];
3
- VMOD.{B/H/W/D}[U].
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-17-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 17 ++++++++++
10
target/loongarch/helper.h | 17 ++++++++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 17 ++++++++++
12
target/loongarch/insns.decode | 17 ++++++++++
13
target/loongarch/lsx_helper.c | 37 +++++++++++++++++++++
14
5 files changed, 105 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmaddwod_h_bu_b, vvv)
21
INSN_LSX(vmaddwod_w_hu_h, vvv)
22
INSN_LSX(vmaddwod_d_wu_w, vvv)
23
INSN_LSX(vmaddwod_q_du_d, vvv)
24
+
25
+INSN_LSX(vdiv_b, vvv)
26
+INSN_LSX(vdiv_h, vvv)
27
+INSN_LSX(vdiv_w, vvv)
28
+INSN_LSX(vdiv_d, vvv)
29
+INSN_LSX(vdiv_bu, vvv)
30
+INSN_LSX(vdiv_hu, vvv)
31
+INSN_LSX(vdiv_wu, vvv)
32
+INSN_LSX(vdiv_du, vvv)
33
+INSN_LSX(vmod_b, vvv)
34
+INSN_LSX(vmod_h, vvv)
35
+INSN_LSX(vmod_w, vvv)
36
+INSN_LSX(vmod_d, vvv)
37
+INSN_LSX(vmod_bu, vvv)
38
+INSN_LSX(vmod_hu, vvv)
39
+INSN_LSX(vmod_wu, vvv)
40
+INSN_LSX(vmod_du, vvv)
41
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/helper.h
44
+++ b/target/loongarch/helper.h
45
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
46
DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
47
DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+
50
+DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32)
51
+DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32)
52
+DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32)
53
+DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32)
54
+DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32)
55
+DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32)
56
+DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32)
57
+DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32)
58
+DEF_HELPER_4(vmod_b, void, env, i32, i32, i32)
59
+DEF_HELPER_4(vmod_h, void, env, i32, i32, i32)
60
+DEF_HELPER_4(vmod_w, void, env, i32, i32, i32)
61
+DEF_HELPER_4(vmod_d, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
66
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
69
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
70
@@ -XXX,XX +XXX,XX @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
71
TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s)
72
TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s)
73
TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s)
74
+
75
+TRANS(vdiv_b, gen_vvv, gen_helper_vdiv_b)
76
+TRANS(vdiv_h, gen_vvv, gen_helper_vdiv_h)
77
+TRANS(vdiv_w, gen_vvv, gen_helper_vdiv_w)
78
+TRANS(vdiv_d, gen_vvv, gen_helper_vdiv_d)
79
+TRANS(vdiv_bu, gen_vvv, gen_helper_vdiv_bu)
80
+TRANS(vdiv_hu, gen_vvv, gen_helper_vdiv_hu)
81
+TRANS(vdiv_wu, gen_vvv, gen_helper_vdiv_wu)
82
+TRANS(vdiv_du, gen_vvv, gen_helper_vdiv_du)
83
+TRANS(vmod_b, gen_vvv, gen_helper_vmod_b)
84
+TRANS(vmod_h, gen_vvv, gen_helper_vmod_h)
85
+TRANS(vmod_w, gen_vvv, gen_helper_vmod_w)
86
+TRANS(vmod_d, gen_vvv, gen_helper_vmod_d)
87
+TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu)
88
+TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu)
89
+TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu)
90
+TRANS(vmod_du, gen_vvv, gen_helper_vmod_du)
91
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/loongarch/insns.decode
94
+++ b/target/loongarch/insns.decode
95
@@ -XXX,XX +XXX,XX @@ vmaddwod_h_bu_b 0111 00001011 11100 ..... ..... ..... @vvv
96
vmaddwod_w_hu_h 0111 00001011 11101 ..... ..... ..... @vvv
97
vmaddwod_d_wu_w 0111 00001011 11110 ..... ..... ..... @vvv
98
vmaddwod_q_du_d 0111 00001011 11111 ..... ..... ..... @vvv
99
+
100
+vdiv_b 0111 00001110 00000 ..... ..... ..... @vvv
101
+vdiv_h 0111 00001110 00001 ..... ..... ..... @vvv
102
+vdiv_w 0111 00001110 00010 ..... ..... ..... @vvv
103
+vdiv_d 0111 00001110 00011 ..... ..... ..... @vvv
104
+vdiv_bu 0111 00001110 01000 ..... ..... ..... @vvv
105
+vdiv_hu 0111 00001110 01001 ..... ..... ..... @vvv
106
+vdiv_wu 0111 00001110 01010 ..... ..... ..... @vvv
107
+vdiv_du 0111 00001110 01011 ..... ..... ..... @vvv
108
+vmod_b 0111 00001110 00100 ..... ..... ..... @vvv
109
+vmod_h 0111 00001110 00101 ..... ..... ..... @vvv
110
+vmod_w 0111 00001110 00110 ..... ..... ..... @vvv
111
+vmod_d 0111 00001110 00111 ..... ..... ..... @vvv
112
+vmod_bu 0111 00001110 01100 ..... ..... ..... @vvv
113
+vmod_hu 0111 00001110 01101 ..... ..... ..... @vvv
114
+vmod_wu 0111 00001110 01110 ..... ..... ..... @vvv
115
+vmod_du 0111 00001110 01111 ..... ..... ..... @vvv
116
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/loongarch/lsx_helper.c
119
+++ b/target/loongarch/lsx_helper.c
120
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
121
VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
122
VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
123
VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
124
+
125
+#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
126
+#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
127
+#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\
128
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
129
+#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
130
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
131
+
132
+#define VDIV(NAME, BIT, E, DO_OP) \
133
+void HELPER(NAME)(CPULoongArchState *env, \
134
+ uint32_t vd, uint32_t vj, uint32_t vk) \
135
+{ \
136
+ int i; \
137
+ VReg *Vd = &(env->fpr[vd].vreg); \
138
+ VReg *Vj = &(env->fpr[vj].vreg); \
139
+ VReg *Vk = &(env->fpr[vk].vreg); \
140
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
141
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
142
+ } \
143
+}
144
+
145
+VDIV(vdiv_b, 8, B, DO_DIV)
146
+VDIV(vdiv_h, 16, H, DO_DIV)
147
+VDIV(vdiv_w, 32, W, DO_DIV)
148
+VDIV(vdiv_d, 64, D, DO_DIV)
149
+VDIV(vdiv_bu, 8, UB, DO_DIVU)
150
+VDIV(vdiv_hu, 16, UH, DO_DIVU)
151
+VDIV(vdiv_wu, 32, UW, DO_DIVU)
152
+VDIV(vdiv_du, 64, UD, DO_DIVU)
153
+VDIV(vmod_b, 8, B, DO_REM)
154
+VDIV(vmod_h, 16, H, DO_REM)
155
+VDIV(vmod_w, 32, W, DO_REM)
156
+VDIV(vmod_d, 64, D, DO_REM)
157
+VDIV(vmod_bu, 8, UB, DO_REMU)
158
+VDIV(vmod_hu, 16, UH, DO_REMU)
159
+VDIV(vmod_wu, 32, UW, DO_REMU)
160
+VDIV(vmod_du, 64, UD, DO_REMU)
161
--
162
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSAT.{B/H/W/D}[U].
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-18-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 9 ++
9
target/loongarch/helper.h | 9 ++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 101 ++++++++++++++++++++
11
target/loongarch/insns.decode | 12 +++
12
target/loongarch/lsx_helper.c | 37 +++++++
13
5 files changed, 168 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmod_bu, vvv)
20
INSN_LSX(vmod_hu, vvv)
21
INSN_LSX(vmod_wu, vvv)
22
INSN_LSX(vmod_du, vvv)
23
+
24
+INSN_LSX(vsat_b, vv_i)
25
+INSN_LSX(vsat_h, vv_i)
26
+INSN_LSX(vsat_w, vv_i)
27
+INSN_LSX(vsat_d, vv_i)
28
+INSN_LSX(vsat_bu, vv_i)
29
+INSN_LSX(vsat_hu, vv_i)
30
+INSN_LSX(vsat_wu, vv_i)
31
+INSN_LSX(vsat_du, vv_i)
32
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/loongarch/helper.h
35
+++ b/target/loongarch/helper.h
36
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
37
DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
38
DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
39
DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
40
+
41
+DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
42
+DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
43
+DEF_HELPER_FLAGS_4(vsat_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
44
+DEF_HELPER_FLAGS_4(vsat_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
45
+DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
46
+DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
47
+DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
48
+DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
49
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
52
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
53
@@ -XXX,XX +XXX,XX @@ TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu)
54
TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu)
55
TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu)
56
TRANS(vmod_du, gen_vvv, gen_helper_vmod_du)
57
+
58
+static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
59
+{
60
+ TCGv_vec min;
61
+
62
+ min = tcg_temp_new_vec_matching(t);
63
+ tcg_gen_not_vec(vece, min, max);
64
+ tcg_gen_smax_vec(vece, t, a, min);
65
+ tcg_gen_smin_vec(vece, t, t, max);
66
+}
67
+
68
+static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
69
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
70
+{
71
+ static const TCGOpcode vecop_list[] = {
72
+ INDEX_op_smax_vec, INDEX_op_smin_vec, 0
73
+ };
74
+ static const GVecGen2s op[4] = {
75
+ {
76
+ .fniv = gen_vsat_s,
77
+ .fno = gen_helper_vsat_b,
78
+ .opt_opc = vecop_list,
79
+ .vece = MO_8
80
+ },
81
+ {
82
+ .fniv = gen_vsat_s,
83
+ .fno = gen_helper_vsat_h,
84
+ .opt_opc = vecop_list,
85
+ .vece = MO_16
86
+ },
87
+ {
88
+ .fniv = gen_vsat_s,
89
+ .fno = gen_helper_vsat_w,
90
+ .opt_opc = vecop_list,
91
+ .vece = MO_32
92
+ },
93
+ {
94
+ .fniv = gen_vsat_s,
95
+ .fno = gen_helper_vsat_d,
96
+ .opt_opc = vecop_list,
97
+ .vece = MO_64
98
+ },
99
+ };
100
+
101
+ tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
102
+ tcg_constant_i64((1ll<< imm) -1), &op[vece]);
103
+}
104
+
105
+TRANS(vsat_b, gvec_vv_i, MO_8, do_vsat_s)
106
+TRANS(vsat_h, gvec_vv_i, MO_16, do_vsat_s)
107
+TRANS(vsat_w, gvec_vv_i, MO_32, do_vsat_s)
108
+TRANS(vsat_d, gvec_vv_i, MO_64, do_vsat_s)
109
+
110
+static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
111
+{
112
+ tcg_gen_umin_vec(vece, t, a, max);
113
+}
114
+
115
+static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
116
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
117
+{
118
+ uint64_t max;
119
+ static const TCGOpcode vecop_list[] = {
120
+ INDEX_op_umin_vec, 0
121
+ };
122
+ static const GVecGen2s op[4] = {
123
+ {
124
+ .fniv = gen_vsat_u,
125
+ .fno = gen_helper_vsat_bu,
126
+ .opt_opc = vecop_list,
127
+ .vece = MO_8
128
+ },
129
+ {
130
+ .fniv = gen_vsat_u,
131
+ .fno = gen_helper_vsat_hu,
132
+ .opt_opc = vecop_list,
133
+ .vece = MO_16
134
+ },
135
+ {
136
+ .fniv = gen_vsat_u,
137
+ .fno = gen_helper_vsat_wu,
138
+ .opt_opc = vecop_list,
139
+ .vece = MO_32
140
+ },
141
+ {
142
+ .fniv = gen_vsat_u,
143
+ .fno = gen_helper_vsat_du,
144
+ .opt_opc = vecop_list,
145
+ .vece = MO_64
146
+ },
147
+ };
148
+
149
+ max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
150
+ tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
151
+ tcg_constant_i64(max), &op[vece]);
152
+}
153
+
154
+TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u)
155
+TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u)
156
+TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u)
157
+TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u)
158
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
159
index XXXXXXX..XXXXXXX 100644
160
--- a/target/loongarch/insns.decode
161
+++ b/target/loongarch/insns.decode
162
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
163
#
164
@vv .... ........ ..... ..... vj:5 vd:5 &vv
165
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
166
+@vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i
167
+@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
168
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
169
+@vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i
170
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
171
172
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
173
@@ -XXX,XX +XXX,XX @@ vmod_bu 0111 00001110 01100 ..... ..... ..... @vvv
174
vmod_hu 0111 00001110 01101 ..... ..... ..... @vvv
175
vmod_wu 0111 00001110 01110 ..... ..... ..... @vvv
176
vmod_du 0111 00001110 01111 ..... ..... ..... @vvv
177
+
178
+vsat_b 0111 00110010 01000 01 ... ..... ..... @vv_ui3
179
+vsat_h 0111 00110010 01000 1 .... ..... ..... @vv_ui4
180
+vsat_w 0111 00110010 01001 ..... ..... ..... @vv_ui5
181
+vsat_d 0111 00110010 0101 ...... ..... ..... @vv_ui6
182
+vsat_bu 0111 00110010 10000 01 ... ..... ..... @vv_ui3
183
+vsat_hu 0111 00110010 10000 1 .... ..... ..... @vv_ui4
184
+vsat_wu 0111 00110010 10001 ..... ..... ..... @vv_ui5
185
+vsat_du 0111 00110010 1001 ...... ..... ..... @vv_ui6
186
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/loongarch/lsx_helper.c
189
+++ b/target/loongarch/lsx_helper.c
190
@@ -XXX,XX +XXX,XX @@ VDIV(vmod_bu, 8, UB, DO_REMU)
191
VDIV(vmod_hu, 16, UH, DO_REMU)
192
VDIV(vmod_wu, 32, UW, DO_REMU)
193
VDIV(vmod_du, 64, UD, DO_REMU)
194
+
195
+#define VSAT_S(NAME, BIT, E) \
196
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
197
+{ \
198
+ int i; \
199
+ VReg *Vd = (VReg *)vd; \
200
+ VReg *Vj = (VReg *)vj; \
201
+ typedef __typeof(Vd->E(0)) TD; \
202
+ \
203
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
204
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
205
+ Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
206
+ } \
207
+}
208
+
209
+VSAT_S(vsat_b, 8, B)
210
+VSAT_S(vsat_h, 16, H)
211
+VSAT_S(vsat_w, 32, W)
212
+VSAT_S(vsat_d, 64, D)
213
+
214
+#define VSAT_U(NAME, BIT, E) \
215
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
216
+{ \
217
+ int i; \
218
+ VReg *Vd = (VReg *)vd; \
219
+ VReg *Vj = (VReg *)vj; \
220
+ typedef __typeof(Vd->E(0)) TD; \
221
+ \
222
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
223
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
224
+ } \
225
+}
226
+
227
+VSAT_U(vsat_bu, 8, UB)
228
+VSAT_U(vsat_hu, 16, UH)
229
+VSAT_U(vsat_wu, 32, UW)
230
+VSAT_U(vsat_du, 64, UD)
231
--
232
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VEXTH.{H.B/W.H/D.W/Q.D};
3
- VEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-19-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 9 ++++++
10
target/loongarch/helper.h | 9 ++++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 20 ++++++++++++
12
target/loongarch/insns.decode | 9 ++++++
13
target/loongarch/lsx_helper.c | 35 +++++++++++++++++++++
14
5 files changed, 82 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsat_bu, vv_i)
21
INSN_LSX(vsat_hu, vv_i)
22
INSN_LSX(vsat_wu, vv_i)
23
INSN_LSX(vsat_du, vv_i)
24
+
25
+INSN_LSX(vexth_h_b, vv)
26
+INSN_LSX(vexth_w_h, vv)
27
+INSN_LSX(vexth_d_w, vv)
28
+INSN_LSX(vexth_q_d, vv)
29
+INSN_LSX(vexth_hu_bu, vv)
30
+INSN_LSX(vexth_wu_hu, vv)
31
+INSN_LSX(vexth_du_wu, vv)
32
+INSN_LSX(vexth_qu_du, vv)
33
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/loongarch/helper.h
36
+++ b/target/loongarch/helper.h
37
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
38
DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
39
DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
40
DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
41
+
42
+DEF_HELPER_3(vexth_h_b, void, env, i32, i32)
43
+DEF_HELPER_3(vexth_w_h, void, env, i32, i32)
44
+DEF_HELPER_3(vexth_d_w, void, env, i32, i32)
45
+DEF_HELPER_3(vexth_q_d, void, env, i32, i32)
46
+DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
47
+DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
48
+DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
49
+DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
50
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
53
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
54
@@ -XXX,XX +XXX,XX @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
55
return true;
56
}
57
58
+static bool gen_vv(DisasContext *ctx, arg_vv *a,
59
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
60
+{
61
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
62
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
63
+
64
+ CHECK_SXE;
65
+ func(cpu_env, vd, vj);
66
+ return true;
67
+}
68
+
69
static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
70
void (*func)(unsigned, uint32_t, uint32_t,
71
uint32_t, uint32_t, uint32_t))
72
@@ -XXX,XX +XXX,XX @@ TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u)
73
TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u)
74
TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u)
75
TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u)
76
+
77
+TRANS(vexth_h_b, gen_vv, gen_helper_vexth_h_b)
78
+TRANS(vexth_w_h, gen_vv, gen_helper_vexth_w_h)
79
+TRANS(vexth_d_w, gen_vv, gen_helper_vexth_d_w)
80
+TRANS(vexth_q_d, gen_vv, gen_helper_vexth_q_d)
81
+TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu)
82
+TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu)
83
+TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu)
84
+TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du)
85
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/loongarch/insns.decode
88
+++ b/target/loongarch/insns.decode
89
@@ -XXX,XX +XXX,XX @@ vsat_bu 0111 00110010 10000 01 ... ..... ..... @vv_ui3
90
vsat_hu 0111 00110010 10000 1 .... ..... ..... @vv_ui4
91
vsat_wu 0111 00110010 10001 ..... ..... ..... @vv_ui5
92
vsat_du 0111 00110010 1001 ...... ..... ..... @vv_ui6
93
+
94
+vexth_h_b 0111 00101001 11101 11000 ..... ..... @vv
95
+vexth_w_h 0111 00101001 11101 11001 ..... ..... @vv
96
+vexth_d_w 0111 00101001 11101 11010 ..... ..... @vv
97
+vexth_q_d 0111 00101001 11101 11011 ..... ..... @vv
98
+vexth_hu_bu 0111 00101001 11101 11100 ..... ..... @vv
99
+vexth_wu_hu 0111 00101001 11101 11101 ..... ..... @vv
100
+vexth_du_wu 0111 00101001 11101 11110 ..... ..... @vv
101
+vexth_qu_du 0111 00101001 11101 11111 ..... ..... @vv
102
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/loongarch/lsx_helper.c
105
+++ b/target/loongarch/lsx_helper.c
106
@@ -XXX,XX +XXX,XX @@ VSAT_U(vsat_bu, 8, UB)
107
VSAT_U(vsat_hu, 16, UH)
108
VSAT_U(vsat_wu, 32, UW)
109
VSAT_U(vsat_du, 64, UD)
110
+
111
+#define VEXTH(NAME, BIT, E1, E2) \
112
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
113
+{ \
114
+ int i; \
115
+ VReg *Vd = &(env->fpr[vd].vreg); \
116
+ VReg *Vj = &(env->fpr[vj].vreg); \
117
+ \
118
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
119
+ Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
120
+ } \
121
+}
122
+
123
+void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
124
+{
125
+ VReg *Vd = &(env->fpr[vd].vreg);
126
+ VReg *Vj = &(env->fpr[vj].vreg);
127
+
128
+ Vd->Q(0) = int128_makes64(Vj->D(1));
129
+}
130
+
131
+void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
132
+{
133
+ VReg *Vd = &(env->fpr[vd].vreg);
134
+ VReg *Vj = &(env->fpr[vj].vreg);
135
+
136
+ Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
137
+}
138
+
139
+VEXTH(vexth_h_b, 16, H, B)
140
+VEXTH(vexth_w_h, 32, W, H)
141
+VEXTH(vexth_d_w, 64, D, W)
142
+VEXTH(vexth_hu_bu, 16, UH, UB)
143
+VEXTH(vexth_wu_hu, 32, UW, UH)
144
+VEXTH(vexth_du_wu, 64, UD, UW)
145
--
146
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSIGNCOV.{B/H/W/D}.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-20-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 5 ++
9
target/loongarch/helper.h | 5 ++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 53 +++++++++++++++++++++
11
target/loongarch/insns.decode | 5 ++
12
target/loongarch/lsx_helper.c | 7 +++
13
5 files changed, 75 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vexth_hu_bu, vv)
20
INSN_LSX(vexth_wu_hu, vv)
21
INSN_LSX(vexth_du_wu, vv)
22
INSN_LSX(vexth_qu_du, vv)
23
+
24
+INSN_LSX(vsigncov_b, vvv)
25
+INSN_LSX(vsigncov_h, vvv)
26
+INSN_LSX(vsigncov_w, vvv)
27
+INSN_LSX(vsigncov_d, vvv)
28
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/loongarch/helper.h
31
+++ b/target/loongarch/helper.h
32
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
33
DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
34
DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
35
DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
36
+
37
+DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
44
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
45
@@ -XXX,XX +XXX,XX @@ TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu)
46
TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu)
47
TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu)
48
TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du)
49
+
50
+static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
51
+{
52
+ TCGv_vec t1, zero;
53
+
54
+ t1 = tcg_temp_new_vec_matching(t);
55
+ zero = tcg_constant_vec_matching(t, vece, 0);
56
+
57
+ tcg_gen_neg_vec(vece, t1, b);
58
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
59
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
60
+}
61
+
62
+static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
63
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
64
+{
65
+ static const TCGOpcode vecop_list[] = {
66
+ INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
67
+ };
68
+ static const GVecGen3 op[4] = {
69
+ {
70
+ .fniv = gen_vsigncov,
71
+ .fno = gen_helper_vsigncov_b,
72
+ .opt_opc = vecop_list,
73
+ .vece = MO_8
74
+ },
75
+ {
76
+ .fniv = gen_vsigncov,
77
+ .fno = gen_helper_vsigncov_h,
78
+ .opt_opc = vecop_list,
79
+ .vece = MO_16
80
+ },
81
+ {
82
+ .fniv = gen_vsigncov,
83
+ .fno = gen_helper_vsigncov_w,
84
+ .opt_opc = vecop_list,
85
+ .vece = MO_32
86
+ },
87
+ {
88
+ .fniv = gen_vsigncov,
89
+ .fno = gen_helper_vsigncov_d,
90
+ .opt_opc = vecop_list,
91
+ .vece = MO_64
92
+ },
93
+ };
94
+
95
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
96
+}
97
+
98
+TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov)
99
+TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov)
100
+TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov)
101
+TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov)
102
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
103
index XXXXXXX..XXXXXXX 100644
104
--- a/target/loongarch/insns.decode
105
+++ b/target/loongarch/insns.decode
106
@@ -XXX,XX +XXX,XX @@ vexth_hu_bu 0111 00101001 11101 11100 ..... ..... @vv
107
vexth_wu_hu 0111 00101001 11101 11101 ..... ..... @vv
108
vexth_du_wu 0111 00101001 11101 11110 ..... ..... @vv
109
vexth_qu_du 0111 00101001 11101 11111 ..... ..... @vv
110
+
111
+vsigncov_b 0111 00010010 11100 ..... ..... ..... @vvv
112
+vsigncov_h 0111 00010010 11101 ..... ..... ..... @vvv
113
+vsigncov_w 0111 00010010 11110 ..... ..... ..... @vvv
114
+vsigncov_d 0111 00010010 11111 ..... ..... ..... @vvv
115
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/target/loongarch/lsx_helper.c
118
+++ b/target/loongarch/lsx_helper.c
119
@@ -XXX,XX +XXX,XX @@ VEXTH(vexth_d_w, 64, D, W)
120
VEXTH(vexth_hu_bu, 16, UH, UB)
121
VEXTH(vexth_wu_hu, 32, UW, UH)
122
VEXTH(vexth_du_wu, 64, UD, UW)
123
+
124
+#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
125
+
126
+DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
127
+DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
128
+DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
129
+DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
130
--
131
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VMSKLTZ.{B/H/W/D};
3
- VMSKGEZ.B;
4
- VMSKNZ.B.
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-21-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 7 ++
11
target/loongarch/helper.h | 7 ++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 7 ++
13
target/loongarch/insns.decode | 7 ++
14
target/loongarch/lsx_helper.c | 113 ++++++++++++++++++++
15
5 files changed, 141 insertions(+)
16
17
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/disas.c
20
+++ b/target/loongarch/disas.c
21
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsigncov_b, vvv)
22
INSN_LSX(vsigncov_h, vvv)
23
INSN_LSX(vsigncov_w, vvv)
24
INSN_LSX(vsigncov_d, vvv)
25
+
26
+INSN_LSX(vmskltz_b, vv)
27
+INSN_LSX(vmskltz_h, vv)
28
+INSN_LSX(vmskltz_w, vv)
29
+INSN_LSX(vmskltz_d, vv)
30
+INSN_LSX(vmskgez_b, vv)
31
+INSN_LSX(vmsknz_b, vv)
32
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/loongarch/helper.h
35
+++ b/target/loongarch/helper.h
36
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+
41
+DEF_HELPER_3(vmskltz_b, void, env, i32, i32)
42
+DEF_HELPER_3(vmskltz_h, void, env, i32, i32)
43
+DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
44
+DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
45
+DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
46
+DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
47
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
50
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
51
@@ -XXX,XX +XXX,XX @@ TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov)
52
TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov)
53
TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov)
54
TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov)
55
+
56
+TRANS(vmskltz_b, gen_vv, gen_helper_vmskltz_b)
57
+TRANS(vmskltz_h, gen_vv, gen_helper_vmskltz_h)
58
+TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w)
59
+TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
60
+TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
61
+TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
62
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/loongarch/insns.decode
65
+++ b/target/loongarch/insns.decode
66
@@ -XXX,XX +XXX,XX @@ vsigncov_b 0111 00010010 11100 ..... ..... ..... @vvv
67
vsigncov_h 0111 00010010 11101 ..... ..... ..... @vvv
68
vsigncov_w 0111 00010010 11110 ..... ..... ..... @vvv
69
vsigncov_d 0111 00010010 11111 ..... ..... ..... @vvv
70
+
71
+vmskltz_b 0111 00101001 11000 10000 ..... ..... @vv
72
+vmskltz_h 0111 00101001 11000 10001 ..... ..... @vv
73
+vmskltz_w 0111 00101001 11000 10010 ..... ..... @vv
74
+vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv
75
+vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv
76
+vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv
77
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/loongarch/lsx_helper.c
80
+++ b/target/loongarch/lsx_helper.c
81
@@ -XXX,XX +XXX,XX @@ DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
82
DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
83
DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
84
DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
85
+
86
+static uint64_t do_vmskltz_b(int64_t val)
87
+{
88
+ uint64_t m = 0x8080808080808080ULL;
89
+ uint64_t c = val & m;
90
+ c |= c << 7;
91
+ c |= c << 14;
92
+ c |= c << 28;
93
+ return c >> 56;
94
+}
95
+
96
+void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
97
+{
98
+ uint16_t temp = 0;
99
+ VReg *Vd = &(env->fpr[vd].vreg);
100
+ VReg *Vj = &(env->fpr[vj].vreg);
101
+
102
+ temp = do_vmskltz_b(Vj->D(0));
103
+ temp |= (do_vmskltz_b(Vj->D(1)) << 8);
104
+ Vd->D(0) = temp;
105
+ Vd->D(1) = 0;
106
+}
107
+
108
+static uint64_t do_vmskltz_h(int64_t val)
109
+{
110
+ uint64_t m = 0x8000800080008000ULL;
111
+ uint64_t c = val & m;
112
+ c |= c << 15;
113
+ c |= c << 30;
114
+ return c >> 60;
115
+}
116
+
117
+void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
118
+{
119
+ uint16_t temp = 0;
120
+ VReg *Vd = &(env->fpr[vd].vreg);
121
+ VReg *Vj = &(env->fpr[vj].vreg);
122
+
123
+ temp = do_vmskltz_h(Vj->D(0));
124
+ temp |= (do_vmskltz_h(Vj->D(1)) << 4);
125
+ Vd->D(0) = temp;
126
+ Vd->D(1) = 0;
127
+}
128
+
129
+static uint64_t do_vmskltz_w(int64_t val)
130
+{
131
+ uint64_t m = 0x8000000080000000ULL;
132
+ uint64_t c = val & m;
133
+ c |= c << 31;
134
+ return c >> 62;
135
+}
136
+
137
+void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
138
+{
139
+ uint16_t temp = 0;
140
+ VReg *Vd = &(env->fpr[vd].vreg);
141
+ VReg *Vj = &(env->fpr[vj].vreg);
142
+
143
+ temp = do_vmskltz_w(Vj->D(0));
144
+ temp |= (do_vmskltz_w(Vj->D(1)) << 2);
145
+ Vd->D(0) = temp;
146
+ Vd->D(1) = 0;
147
+}
148
+
149
+static uint64_t do_vmskltz_d(int64_t val)
150
+{
151
+ return (uint64_t)val >> 63;
152
+}
153
+void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
154
+{
155
+ uint16_t temp = 0;
156
+ VReg *Vd = &(env->fpr[vd].vreg);
157
+ VReg *Vj = &(env->fpr[vj].vreg);
158
+
159
+ temp = do_vmskltz_d(Vj->D(0));
160
+ temp |= (do_vmskltz_d(Vj->D(1)) << 1);
161
+ Vd->D(0) = temp;
162
+ Vd->D(1) = 0;
163
+}
164
+
165
+void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
166
+{
167
+ uint16_t temp = 0;
168
+ VReg *Vd = &(env->fpr[vd].vreg);
169
+ VReg *Vj = &(env->fpr[vj].vreg);
170
+
171
+ temp = do_vmskltz_b(Vj->D(0));
172
+ temp |= (do_vmskltz_b(Vj->D(1)) << 8);
173
+ Vd->D(0) = (uint16_t)(~temp);
174
+ Vd->D(1) = 0;
175
+}
176
+
177
+static uint64_t do_vmskez_b(uint64_t a)
178
+{
179
+ uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
180
+ uint64_t c = ~(((a & m) + m) | a | m);
181
+ c |= c << 7;
182
+ c |= c << 14;
183
+ c |= c << 28;
184
+ return c >> 56;
185
+}
186
+
187
+void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
188
+{
189
+ uint16_t temp = 0;
190
+ VReg *Vd = &(env->fpr[vd].vreg);
191
+ VReg *Vj = &(env->fpr[vj].vreg);
192
+
193
+ temp = do_vmskez_b(Vj->D(0));
194
+ temp |= (do_vmskez_b(Vj->D(1)) << 8);
195
+ Vd->D(0) = (uint16_t)(~temp);
196
+ Vd->D(1) = 0;
197
+}
198
--
199
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- V{AND/OR/XOR/NOR/ANDN/ORN}.V;
3
- V{AND/OR/XOR/NOR}I.B.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-22-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 12 +++++
10
target/loongarch/helper.h | 2 +
11
target/loongarch/insn_trans/trans_lsx.c.inc | 56 +++++++++++++++++++++
12
target/loongarch/insns.decode | 13 +++++
13
target/loongarch/lsx_helper.c | 11 ++++
14
5 files changed, 94 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmskltz_w, vv)
21
INSN_LSX(vmskltz_d, vv)
22
INSN_LSX(vmskgez_b, vv)
23
INSN_LSX(vmsknz_b, vv)
24
+
25
+INSN_LSX(vand_v, vvv)
26
+INSN_LSX(vor_v, vvv)
27
+INSN_LSX(vxor_v, vvv)
28
+INSN_LSX(vnor_v, vvv)
29
+INSN_LSX(vandn_v, vvv)
30
+INSN_LSX(vorn_v, vvv)
31
+
32
+INSN_LSX(vandi_b, vv_i)
33
+INSN_LSX(vori_b, vv_i)
34
+INSN_LSX(vxori_b, vv_i)
35
+INSN_LSX(vnori_b, vv_i)
36
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/loongarch/helper.h
39
+++ b/target/loongarch/helper.h
40
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
41
DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
42
DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
43
DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
44
+
45
+DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
46
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
49
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
50
@@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w)
51
TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
52
TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
53
TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
54
+
55
+TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and)
56
+TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or)
57
+TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor)
58
+TRANS(vnor_v, gvec_vvv, MO_64, tcg_gen_gvec_nor)
59
+
60
+static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
61
+{
62
+ uint32_t vd_ofs, vj_ofs, vk_ofs;
63
+
64
+ CHECK_SXE;
65
+
66
+ vd_ofs = vec_full_offset(a->vd);
67
+ vj_ofs = vec_full_offset(a->vj);
68
+ vk_ofs = vec_full_offset(a->vk);
69
+
70
+ tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
71
+ return true;
72
+}
73
+TRANS(vorn_v, gvec_vvv, MO_64, tcg_gen_gvec_orc)
74
+TRANS(vandi_b, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
75
+TRANS(vori_b, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
76
+TRANS(vxori_b, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
77
+
78
+static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
79
+{
80
+ TCGv_vec t1;
81
+
82
+ t1 = tcg_constant_vec_matching(t, vece, imm);
83
+ tcg_gen_nor_vec(vece, t, a, t1);
84
+}
85
+
86
+static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
87
+{
88
+ tcg_gen_movi_i64(t, dup_const(MO_8, imm));
89
+ tcg_gen_nor_i64(t, a, t);
90
+}
91
+
92
+static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
93
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
94
+{
95
+ static const TCGOpcode vecop_list[] = {
96
+ INDEX_op_nor_vec, 0
97
+ };
98
+ static const GVecGen2i op = {
99
+ .fni8 = gen_vnori_b,
100
+ .fniv = gen_vnori,
101
+ .fnoi = gen_helper_vnori_b,
102
+ .opt_opc = vecop_list,
103
+ .vece = MO_8
104
+ };
105
+
106
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
107
+}
108
+
109
+TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b)
110
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/loongarch/insns.decode
113
+++ b/target/loongarch/insns.decode
114
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
115
@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
116
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
117
@vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i
118
+@vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i
119
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
120
121
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
122
@@ -XXX,XX +XXX,XX @@ vmskltz_w 0111 00101001 11000 10010 ..... ..... @vv
123
vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv
124
vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv
125
vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv
126
+
127
+vand_v 0111 00010010 01100 ..... ..... ..... @vvv
128
+vor_v 0111 00010010 01101 ..... ..... ..... @vvv
129
+vxor_v 0111 00010010 01110 ..... ..... ..... @vvv
130
+vnor_v 0111 00010010 01111 ..... ..... ..... @vvv
131
+vandn_v 0111 00010010 10000 ..... ..... ..... @vvv
132
+vorn_v 0111 00010010 10001 ..... ..... ..... @vvv
133
+
134
+vandi_b 0111 00111101 00 ........ ..... ..... @vv_ui8
135
+vori_b 0111 00111101 01 ........ ..... ..... @vv_ui8
136
+vxori_b 0111 00111101 10 ........ ..... ..... @vv_ui8
137
+vnori_b 0111 00111101 11 ........ ..... ..... @vv_ui8
138
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/target/loongarch/lsx_helper.c
141
+++ b/target/loongarch/lsx_helper.c
142
@@ -XXX,XX +XXX,XX @@ void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
143
Vd->D(0) = (uint16_t)(~temp);
144
Vd->D(1) = 0;
145
}
146
+
147
+void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
148
+{
149
+ int i;
150
+ VReg *Vd = (VReg *)vd;
151
+ VReg *Vj = (VReg *)vj;
152
+
153
+ for (i = 0; i < LSX_LEN/8; i++) {
154
+ Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
155
+ }
156
+}
157
--
158
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSLL[I].{B/H/W/D};
3
- VSRL[I].{B/H/W/D};
4
- VSRA[I].{B/H/W/D};
5
- VROTR[I].{B/H/W/D}.
6
1
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230504122810.4094787-23-gaosong@loongson.cn>
10
---
11
target/loongarch/disas.c | 36 +++++++++++++++++++++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 36 +++++++++++++++++++++
13
target/loongarch/insns.decode | 36 +++++++++++++++++++++
14
3 files changed, 108 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vandi_b, vv_i)
21
INSN_LSX(vori_b, vv_i)
22
INSN_LSX(vxori_b, vv_i)
23
INSN_LSX(vnori_b, vv_i)
24
+
25
+INSN_LSX(vsll_b, vvv)
26
+INSN_LSX(vsll_h, vvv)
27
+INSN_LSX(vsll_w, vvv)
28
+INSN_LSX(vsll_d, vvv)
29
+INSN_LSX(vslli_b, vv_i)
30
+INSN_LSX(vslli_h, vv_i)
31
+INSN_LSX(vslli_w, vv_i)
32
+INSN_LSX(vslli_d, vv_i)
33
+
34
+INSN_LSX(vsrl_b, vvv)
35
+INSN_LSX(vsrl_h, vvv)
36
+INSN_LSX(vsrl_w, vvv)
37
+INSN_LSX(vsrl_d, vvv)
38
+INSN_LSX(vsrli_b, vv_i)
39
+INSN_LSX(vsrli_h, vv_i)
40
+INSN_LSX(vsrli_w, vv_i)
41
+INSN_LSX(vsrli_d, vv_i)
42
+
43
+INSN_LSX(vsra_b, vvv)
44
+INSN_LSX(vsra_h, vvv)
45
+INSN_LSX(vsra_w, vvv)
46
+INSN_LSX(vsra_d, vvv)
47
+INSN_LSX(vsrai_b, vv_i)
48
+INSN_LSX(vsrai_h, vv_i)
49
+INSN_LSX(vsrai_w, vv_i)
50
+INSN_LSX(vsrai_d, vv_i)
51
+
52
+INSN_LSX(vrotr_b, vvv)
53
+INSN_LSX(vrotr_h, vvv)
54
+INSN_LSX(vrotr_w, vvv)
55
+INSN_LSX(vrotr_d, vvv)
56
+INSN_LSX(vrotri_b, vv_i)
57
+INSN_LSX(vrotri_h, vv_i)
58
+INSN_LSX(vrotri_w, vv_i)
59
+INSN_LSX(vrotri_d, vv_i)
60
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
63
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
64
@@ -XXX,XX +XXX,XX @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
65
}
66
67
TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b)
68
+
69
+TRANS(vsll_b, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
70
+TRANS(vsll_h, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
71
+TRANS(vsll_w, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
72
+TRANS(vsll_d, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
73
+TRANS(vslli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
74
+TRANS(vslli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
75
+TRANS(vslli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
76
+TRANS(vslli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
77
+
78
+TRANS(vsrl_b, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
79
+TRANS(vsrl_h, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
80
+TRANS(vsrl_w, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
81
+TRANS(vsrl_d, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
82
+TRANS(vsrli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
83
+TRANS(vsrli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
84
+TRANS(vsrli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
85
+TRANS(vsrli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
86
+
87
+TRANS(vsra_b, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
88
+TRANS(vsra_h, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
89
+TRANS(vsra_w, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
90
+TRANS(vsra_d, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
91
+TRANS(vsrai_b, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
92
+TRANS(vsrai_h, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
93
+TRANS(vsrai_w, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
94
+TRANS(vsrai_d, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
95
+
96
+TRANS(vrotr_b, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
97
+TRANS(vrotr_h, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
98
+TRANS(vrotr_w, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
99
+TRANS(vrotr_d, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
100
+TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
101
+TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
102
+TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
103
+TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
104
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/loongarch/insns.decode
107
+++ b/target/loongarch/insns.decode
108
@@ -XXX,XX +XXX,XX @@ vandi_b 0111 00111101 00 ........ ..... ..... @vv_ui8
109
vori_b 0111 00111101 01 ........ ..... ..... @vv_ui8
110
vxori_b 0111 00111101 10 ........ ..... ..... @vv_ui8
111
vnori_b 0111 00111101 11 ........ ..... ..... @vv_ui8
112
+
113
+vsll_b 0111 00001110 10000 ..... ..... ..... @vvv
114
+vsll_h 0111 00001110 10001 ..... ..... ..... @vvv
115
+vsll_w 0111 00001110 10010 ..... ..... ..... @vvv
116
+vsll_d 0111 00001110 10011 ..... ..... ..... @vvv
117
+vslli_b 0111 00110010 11000 01 ... ..... ..... @vv_ui3
118
+vslli_h 0111 00110010 11000 1 .... ..... ..... @vv_ui4
119
+vslli_w 0111 00110010 11001 ..... ..... ..... @vv_ui5
120
+vslli_d 0111 00110010 1101 ...... ..... ..... @vv_ui6
121
+
122
+vsrl_b 0111 00001110 10100 ..... ..... ..... @vvv
123
+vsrl_h 0111 00001110 10101 ..... ..... ..... @vvv
124
+vsrl_w 0111 00001110 10110 ..... ..... ..... @vvv
125
+vsrl_d 0111 00001110 10111 ..... ..... ..... @vvv
126
+vsrli_b 0111 00110011 00000 01 ... ..... ..... @vv_ui3
127
+vsrli_h 0111 00110011 00000 1 .... ..... ..... @vv_ui4
128
+vsrli_w 0111 00110011 00001 ..... ..... ..... @vv_ui5
129
+vsrli_d 0111 00110011 0001 ...... ..... ..... @vv_ui6
130
+
131
+vsra_b 0111 00001110 11000 ..... ..... ..... @vvv
132
+vsra_h 0111 00001110 11001 ..... ..... ..... @vvv
133
+vsra_w 0111 00001110 11010 ..... ..... ..... @vvv
134
+vsra_d 0111 00001110 11011 ..... ..... ..... @vvv
135
+vsrai_b 0111 00110011 01000 01 ... ..... ..... @vv_ui3
136
+vsrai_h 0111 00110011 01000 1 .... ..... ..... @vv_ui4
137
+vsrai_w 0111 00110011 01001 ..... ..... ..... @vv_ui5
138
+vsrai_d 0111 00110011 0101 ...... ..... ..... @vv_ui6
139
+
140
+vrotr_b 0111 00001110 11100 ..... ..... ..... @vvv
141
+vrotr_h 0111 00001110 11101 ..... ..... ..... @vvv
142
+vrotr_w 0111 00001110 11110 ..... ..... ..... @vvv
143
+vrotr_d 0111 00001110 11111 ..... ..... ..... @vvv
144
+vrotri_b 0111 00101010 00000 01 ... ..... ..... @vv_ui3
145
+vrotri_h 0111 00101010 00000 1 .... ..... ..... @vv_ui4
146
+vrotri_w 0111 00101010 00001 ..... ..... ..... @vv_ui5
147
+vrotri_d 0111 00101010 0001 ...... ..... ..... @vv_ui6
148
--
149
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSLLWIL.{H.B/W.H/D.W};
3
- VSLLWIL.{HU.BU/WU.HU/DU.WU};
4
- VEXTL.Q.D, VEXTL.QU.DU.
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-24-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 9 +++++
11
target/loongarch/helper.h | 9 +++++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 21 +++++++++++
13
target/loongarch/insns.decode | 9 +++++
14
target/loongarch/lsx_helper.c | 41 +++++++++++++++++++++
15
5 files changed, 89 insertions(+)
16
17
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/disas.c
20
+++ b/target/loongarch/disas.c
21
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vrotri_b, vv_i)
22
INSN_LSX(vrotri_h, vv_i)
23
INSN_LSX(vrotri_w, vv_i)
24
INSN_LSX(vrotri_d, vv_i)
25
+
26
+INSN_LSX(vsllwil_h_b, vv_i)
27
+INSN_LSX(vsllwil_w_h, vv_i)
28
+INSN_LSX(vsllwil_d_w, vv_i)
29
+INSN_LSX(vextl_q_d, vv)
30
+INSN_LSX(vsllwil_hu_bu, vv_i)
31
+INSN_LSX(vsllwil_wu_hu, vv_i)
32
+INSN_LSX(vsllwil_du_wu, vv_i)
33
+INSN_LSX(vextl_qu_du, vv)
34
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/loongarch/helper.h
37
+++ b/target/loongarch/helper.h
38
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
39
DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
40
41
DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
42
+
43
+DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
44
+DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
45
+DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
46
+DEF_HELPER_3(vextl_q_d, void, env, i32, i32)
47
+DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
48
+DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
49
+DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
50
+DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
51
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
54
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
55
@@ -XXX,XX +XXX,XX @@ static bool gen_vv(DisasContext *ctx, arg_vv *a,
56
return true;
57
}
58
59
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
60
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
61
+{
62
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
63
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
64
+ TCGv_i32 imm = tcg_constant_i32(a->imm);
65
+
66
+ CHECK_SXE;
67
+ func(cpu_env, vd, vj, imm);
68
+ return true;
69
+}
70
+
71
static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
72
void (*func)(unsigned, uint32_t, uint32_t,
73
uint32_t, uint32_t, uint32_t))
74
@@ -XXX,XX +XXX,XX @@ TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
75
TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
76
TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
77
TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
78
+
79
+TRANS(vsllwil_h_b, gen_vv_i, gen_helper_vsllwil_h_b)
80
+TRANS(vsllwil_w_h, gen_vv_i, gen_helper_vsllwil_w_h)
81
+TRANS(vsllwil_d_w, gen_vv_i, gen_helper_vsllwil_d_w)
82
+TRANS(vextl_q_d, gen_vv, gen_helper_vextl_q_d)
83
+TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu)
84
+TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu)
85
+TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu)
86
+TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du)
87
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/loongarch/insns.decode
90
+++ b/target/loongarch/insns.decode
91
@@ -XXX,XX +XXX,XX @@ vrotri_b 0111 00101010 00000 01 ... ..... ..... @vv_ui3
92
vrotri_h 0111 00101010 00000 1 .... ..... ..... @vv_ui4
93
vrotri_w 0111 00101010 00001 ..... ..... ..... @vv_ui5
94
vrotri_d 0111 00101010 0001 ...... ..... ..... @vv_ui6
95
+
96
+vsllwil_h_b 0111 00110000 10000 01 ... ..... ..... @vv_ui3
97
+vsllwil_w_h 0111 00110000 10000 1 .... ..... ..... @vv_ui4
98
+vsllwil_d_w 0111 00110000 10001 ..... ..... ..... @vv_ui5
99
+vextl_q_d 0111 00110000 10010 00000 ..... ..... @vv
100
+vsllwil_hu_bu 0111 00110000 11000 01 ... ..... ..... @vv_ui3
101
+vsllwil_wu_hu 0111 00110000 11000 1 .... ..... ..... @vv_ui4
102
+vsllwil_du_wu 0111 00110000 11001 ..... ..... ..... @vv_ui5
103
+vextl_qu_du 0111 00110000 11010 00000 ..... ..... @vv
104
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/loongarch/lsx_helper.c
107
+++ b/target/loongarch/lsx_helper.c
108
@@ -XXX,XX +XXX,XX @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
109
Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
110
}
111
}
112
+
113
+#define VSLLWIL(NAME, BIT, E1, E2) \
114
+void HELPER(NAME)(CPULoongArchState *env, \
115
+ uint32_t vd, uint32_t vj, uint32_t imm) \
116
+{ \
117
+ int i; \
118
+ VReg temp; \
119
+ VReg *Vd = &(env->fpr[vd].vreg); \
120
+ VReg *Vj = &(env->fpr[vj].vreg); \
121
+ typedef __typeof(temp.E1(0)) TD; \
122
+ \
123
+ temp.D(0) = 0; \
124
+ temp.D(1) = 0; \
125
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
126
+ temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
127
+ } \
128
+ *Vd = temp; \
129
+}
130
+
131
+void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
132
+{
133
+ VReg *Vd = &(env->fpr[vd].vreg);
134
+ VReg *Vj = &(env->fpr[vj].vreg);
135
+
136
+ Vd->Q(0) = int128_makes64(Vj->D(0));
137
+}
138
+
139
+void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
140
+{
141
+ VReg *Vd = &(env->fpr[vd].vreg);
142
+ VReg *Vj = &(env->fpr[vj].vreg);
143
+
144
+ Vd->Q(0) = int128_make64(Vj->D(0));
145
+}
146
+
147
+VSLLWIL(vsllwil_h_b, 16, H, B)
148
+VSLLWIL(vsllwil_w_h, 32, W, H)
149
+VSLLWIL(vsllwil_d_w, 64, D, W)
150
+VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
151
+VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
152
+VSLLWIL(vsllwil_du_wu, 64, UD, UW)
153
--
154
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSRLR[I].{B/H/W/D};
3
- VSRAR[I].{B/H/W/D}.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-25-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 18 ++++
10
target/loongarch/helper.h | 18 ++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 18 ++++
12
target/loongarch/insns.decode | 18 ++++
13
target/loongarch/lsx_helper.c | 104 ++++++++++++++++++++
14
5 files changed, 176 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsllwil_hu_bu, vv_i)
21
INSN_LSX(vsllwil_wu_hu, vv_i)
22
INSN_LSX(vsllwil_du_wu, vv_i)
23
INSN_LSX(vextl_qu_du, vv)
24
+
25
+INSN_LSX(vsrlr_b, vvv)
26
+INSN_LSX(vsrlr_h, vvv)
27
+INSN_LSX(vsrlr_w, vvv)
28
+INSN_LSX(vsrlr_d, vvv)
29
+INSN_LSX(vsrlri_b, vv_i)
30
+INSN_LSX(vsrlri_h, vv_i)
31
+INSN_LSX(vsrlri_w, vv_i)
32
+INSN_LSX(vsrlri_d, vv_i)
33
+
34
+INSN_LSX(vsrar_b, vvv)
35
+INSN_LSX(vsrar_h, vvv)
36
+INSN_LSX(vsrar_w, vvv)
37
+INSN_LSX(vsrar_d, vvv)
38
+INSN_LSX(vsrari_b, vv_i)
39
+INSN_LSX(vsrari_h, vv_i)
40
+INSN_LSX(vsrari_w, vv_i)
41
+INSN_LSX(vsrari_d, vv_i)
42
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/loongarch/helper.h
45
+++ b/target/loongarch/helper.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
47
DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
48
DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
49
DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
50
+
51
+DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32)
52
+DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32)
53
+DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32)
54
+DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32)
55
+DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
56
+DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
57
+DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
58
+DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
59
+
60
+DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32)
61
+DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
66
+DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
67
+DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
68
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
71
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
72
@@ -XXX,XX +XXX,XX @@ TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu)
73
TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu)
74
TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu)
75
TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du)
76
+
77
+TRANS(vsrlr_b, gen_vvv, gen_helper_vsrlr_b)
78
+TRANS(vsrlr_h, gen_vvv, gen_helper_vsrlr_h)
79
+TRANS(vsrlr_w, gen_vvv, gen_helper_vsrlr_w)
80
+TRANS(vsrlr_d, gen_vvv, gen_helper_vsrlr_d)
81
+TRANS(vsrlri_b, gen_vv_i, gen_helper_vsrlri_b)
82
+TRANS(vsrlri_h, gen_vv_i, gen_helper_vsrlri_h)
83
+TRANS(vsrlri_w, gen_vv_i, gen_helper_vsrlri_w)
84
+TRANS(vsrlri_d, gen_vv_i, gen_helper_vsrlri_d)
85
+
86
+TRANS(vsrar_b, gen_vvv, gen_helper_vsrar_b)
87
+TRANS(vsrar_h, gen_vvv, gen_helper_vsrar_h)
88
+TRANS(vsrar_w, gen_vvv, gen_helper_vsrar_w)
89
+TRANS(vsrar_d, gen_vvv, gen_helper_vsrar_d)
90
+TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b)
91
+TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h)
92
+TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w)
93
+TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d)
94
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
95
index XXXXXXX..XXXXXXX 100644
96
--- a/target/loongarch/insns.decode
97
+++ b/target/loongarch/insns.decode
98
@@ -XXX,XX +XXX,XX @@ vsllwil_hu_bu 0111 00110000 11000 01 ... ..... ..... @vv_ui3
99
vsllwil_wu_hu 0111 00110000 11000 1 .... ..... ..... @vv_ui4
100
vsllwil_du_wu 0111 00110000 11001 ..... ..... ..... @vv_ui5
101
vextl_qu_du 0111 00110000 11010 00000 ..... ..... @vv
102
+
103
+vsrlr_b 0111 00001111 00000 ..... ..... ..... @vvv
104
+vsrlr_h 0111 00001111 00001 ..... ..... ..... @vvv
105
+vsrlr_w 0111 00001111 00010 ..... ..... ..... @vvv
106
+vsrlr_d 0111 00001111 00011 ..... ..... ..... @vvv
107
+vsrlri_b 0111 00101010 01000 01 ... ..... ..... @vv_ui3
108
+vsrlri_h 0111 00101010 01000 1 .... ..... ..... @vv_ui4
109
+vsrlri_w 0111 00101010 01001 ..... ..... ..... @vv_ui5
110
+vsrlri_d 0111 00101010 0101 ...... ..... ..... @vv_ui6
111
+
112
+vsrar_b 0111 00001111 00100 ..... ..... ..... @vvv
113
+vsrar_h 0111 00001111 00101 ..... ..... ..... @vvv
114
+vsrar_w 0111 00001111 00110 ..... ..... ..... @vvv
115
+vsrar_d 0111 00001111 00111 ..... ..... ..... @vvv
116
+vsrari_b 0111 00101010 10000 01 ... ..... ..... @vv_ui3
117
+vsrari_h 0111 00101010 10000 1 .... ..... ..... @vv_ui4
118
+vsrari_w 0111 00101010 10001 ..... ..... ..... @vv_ui5
119
+vsrari_d 0111 00101010 1001 ...... ..... ..... @vv_ui6
120
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/loongarch/lsx_helper.c
123
+++ b/target/loongarch/lsx_helper.c
124
@@ -XXX,XX +XXX,XX @@ VSLLWIL(vsllwil_d_w, 64, D, W)
125
VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
126
VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
127
VSLLWIL(vsllwil_du_wu, 64, UD, UW)
128
+
129
+#define do_vsrlr(E, T) \
130
+static T do_vsrlr_ ##E(T s1, int sh) \
131
+{ \
132
+ if (sh == 0) { \
133
+ return s1; \
134
+ } else { \
135
+ return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
136
+ } \
137
+}
138
+
139
+do_vsrlr(B, uint8_t)
140
+do_vsrlr(H, uint16_t)
141
+do_vsrlr(W, uint32_t)
142
+do_vsrlr(D, uint64_t)
143
+
144
+#define VSRLR(NAME, BIT, T, E) \
145
+void HELPER(NAME)(CPULoongArchState *env, \
146
+ uint32_t vd, uint32_t vj, uint32_t vk) \
147
+{ \
148
+ int i; \
149
+ VReg *Vd = &(env->fpr[vd].vreg); \
150
+ VReg *Vj = &(env->fpr[vj].vreg); \
151
+ VReg *Vk = &(env->fpr[vk].vreg); \
152
+ \
153
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
154
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
155
+ } \
156
+}
157
+
158
+VSRLR(vsrlr_b, 8, uint8_t, B)
159
+VSRLR(vsrlr_h, 16, uint16_t, H)
160
+VSRLR(vsrlr_w, 32, uint32_t, W)
161
+VSRLR(vsrlr_d, 64, uint64_t, D)
162
+
163
+#define VSRLRI(NAME, BIT, E) \
164
+void HELPER(NAME)(CPULoongArchState *env, \
165
+ uint32_t vd, uint32_t vj, uint32_t imm) \
166
+{ \
167
+ int i; \
168
+ VReg *Vd = &(env->fpr[vd].vreg); \
169
+ VReg *Vj = &(env->fpr[vj].vreg); \
170
+ \
171
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
172
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
173
+ } \
174
+}
175
+
176
+VSRLRI(vsrlri_b, 8, B)
177
+VSRLRI(vsrlri_h, 16, H)
178
+VSRLRI(vsrlri_w, 32, W)
179
+VSRLRI(vsrlri_d, 64, D)
180
+
181
+#define do_vsrar(E, T) \
182
+static T do_vsrar_ ##E(T s1, int sh) \
183
+{ \
184
+ if (sh == 0) { \
185
+ return s1; \
186
+ } else { \
187
+ return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
188
+ } \
189
+}
190
+
191
+do_vsrar(B, int8_t)
192
+do_vsrar(H, int16_t)
193
+do_vsrar(W, int32_t)
194
+do_vsrar(D, int64_t)
195
+
196
+#define VSRAR(NAME, BIT, T, E) \
197
+void HELPER(NAME)(CPULoongArchState *env, \
198
+ uint32_t vd, uint32_t vj, uint32_t vk) \
199
+{ \
200
+ int i; \
201
+ VReg *Vd = &(env->fpr[vd].vreg); \
202
+ VReg *Vj = &(env->fpr[vj].vreg); \
203
+ VReg *Vk = &(env->fpr[vk].vreg); \
204
+ \
205
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
206
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
207
+ } \
208
+}
209
+
210
+VSRAR(vsrar_b, 8, uint8_t, B)
211
+VSRAR(vsrar_h, 16, uint16_t, H)
212
+VSRAR(vsrar_w, 32, uint32_t, W)
213
+VSRAR(vsrar_d, 64, uint64_t, D)
214
+
215
+#define VSRARI(NAME, BIT, E) \
216
+void HELPER(NAME)(CPULoongArchState *env, \
217
+ uint32_t vd, uint32_t vj, uint32_t imm) \
218
+{ \
219
+ int i; \
220
+ VReg *Vd = &(env->fpr[vd].vreg); \
221
+ VReg *Vj = &(env->fpr[vj].vreg); \
222
+ \
223
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
224
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
225
+ } \
226
+}
227
+
228
+VSRARI(vsrari_b, 8, B)
229
+VSRARI(vsrari_h, 16, H)
230
+VSRARI(vsrari_w, 32, W)
231
+VSRARI(vsrari_d, 64, D)
232
--
233
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSRLN.{B.H/H.W/W.D};
3
- VSRAN.{B.H/H.W/W.D};
4
- VSRLNI.{B.H/H.W/W.D/D.Q};
5
- VSRANI.{B.H/H.W/W.D/D.Q}.
6
1
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230504122810.4094787-26-gaosong@loongson.cn>
10
---
11
target/loongarch/disas.c | 16 +++
12
target/loongarch/helper.h | 16 +++
13
target/loongarch/insn_trans/trans_lsx.c.inc | 16 +++
14
target/loongarch/insns.decode | 17 +++
15
target/loongarch/lsx_helper.c | 114 ++++++++++++++++++++
16
5 files changed, 179 insertions(+)
17
18
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/loongarch/disas.c
21
+++ b/target/loongarch/disas.c
22
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrari_b, vv_i)
23
INSN_LSX(vsrari_h, vv_i)
24
INSN_LSX(vsrari_w, vv_i)
25
INSN_LSX(vsrari_d, vv_i)
26
+
27
+INSN_LSX(vsrln_b_h, vvv)
28
+INSN_LSX(vsrln_h_w, vvv)
29
+INSN_LSX(vsrln_w_d, vvv)
30
+INSN_LSX(vsran_b_h, vvv)
31
+INSN_LSX(vsran_h_w, vvv)
32
+INSN_LSX(vsran_w_d, vvv)
33
+
34
+INSN_LSX(vsrlni_b_h, vv_i)
35
+INSN_LSX(vsrlni_h_w, vv_i)
36
+INSN_LSX(vsrlni_w_d, vv_i)
37
+INSN_LSX(vsrlni_d_q, vv_i)
38
+INSN_LSX(vsrani_b_h, vv_i)
39
+INSN_LSX(vsrani_h_w, vv_i)
40
+INSN_LSX(vsrani_w_d, vv_i)
41
+INSN_LSX(vsrani_d_q, vv_i)
42
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/loongarch/helper.h
45
+++ b/target/loongarch/helper.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
47
DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
48
DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
49
DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
50
+
51
+DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32)
52
+DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32)
53
+DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32)
54
+DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32)
55
+DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32)
56
+DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32)
57
+
58
+DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
59
+DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
60
+DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32)
61
+DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
66
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
69
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
70
@@ -XXX,XX +XXX,XX @@ TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b)
71
TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h)
72
TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w)
73
TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d)
74
+
75
+TRANS(vsrln_b_h, gen_vvv, gen_helper_vsrln_b_h)
76
+TRANS(vsrln_h_w, gen_vvv, gen_helper_vsrln_h_w)
77
+TRANS(vsrln_w_d, gen_vvv, gen_helper_vsrln_w_d)
78
+TRANS(vsran_b_h, gen_vvv, gen_helper_vsran_b_h)
79
+TRANS(vsran_h_w, gen_vvv, gen_helper_vsran_h_w)
80
+TRANS(vsran_w_d, gen_vvv, gen_helper_vsran_w_d)
81
+
82
+TRANS(vsrlni_b_h, gen_vv_i, gen_helper_vsrlni_b_h)
83
+TRANS(vsrlni_h_w, gen_vv_i, gen_helper_vsrlni_h_w)
84
+TRANS(vsrlni_w_d, gen_vv_i, gen_helper_vsrlni_w_d)
85
+TRANS(vsrlni_d_q, gen_vv_i, gen_helper_vsrlni_d_q)
86
+TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h)
87
+TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w)
88
+TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d)
89
+TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q)
90
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/loongarch/insns.decode
93
+++ b/target/loongarch/insns.decode
94
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
95
@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
96
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
97
@vv_ui6 .... ........ .... imm:6 vj:5 vd:5 &vv_i
98
+@vv_ui7 .... ........ ... imm:7 vj:5 vd:5 &vv_i
99
@vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i
100
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
101
102
@@ -XXX,XX +XXX,XX @@ vsrari_b 0111 00101010 10000 01 ... ..... ..... @vv_ui3
103
vsrari_h 0111 00101010 10000 1 .... ..... ..... @vv_ui4
104
vsrari_w 0111 00101010 10001 ..... ..... ..... @vv_ui5
105
vsrari_d 0111 00101010 1001 ...... ..... ..... @vv_ui6
106
+
107
+vsrln_b_h 0111 00001111 01001 ..... ..... ..... @vvv
108
+vsrln_h_w 0111 00001111 01010 ..... ..... ..... @vvv
109
+vsrln_w_d 0111 00001111 01011 ..... ..... ..... @vvv
110
+vsran_b_h 0111 00001111 01101 ..... ..... ..... @vvv
111
+vsran_h_w 0111 00001111 01110 ..... ..... ..... @vvv
112
+vsran_w_d 0111 00001111 01111 ..... ..... ..... @vvv
113
+
114
+vsrlni_b_h 0111 00110100 00000 1 .... ..... ..... @vv_ui4
115
+vsrlni_h_w 0111 00110100 00001 ..... ..... ..... @vv_ui5
116
+vsrlni_w_d 0111 00110100 0001 ...... ..... ..... @vv_ui6
117
+vsrlni_d_q 0111 00110100 001 ....... ..... ..... @vv_ui7
118
+vsrani_b_h 0111 00110101 10000 1 .... ..... ..... @vv_ui4
119
+vsrani_h_w 0111 00110101 10001 ..... ..... ..... @vv_ui5
120
+vsrani_w_d 0111 00110101 1001 ...... ..... ..... @vv_ui6
121
+vsrani_d_q 0111 00110101 101 ....... ..... ..... @vv_ui7
122
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/target/loongarch/lsx_helper.c
125
+++ b/target/loongarch/lsx_helper.c
126
@@ -XXX,XX +XXX,XX @@ VSRARI(vsrari_b, 8, B)
127
VSRARI(vsrari_h, 16, H)
128
VSRARI(vsrari_w, 32, W)
129
VSRARI(vsrari_d, 64, D)
130
+
131
+#define R_SHIFT(a, b) (a >> b)
132
+
133
+#define VSRLN(NAME, BIT, T, E1, E2) \
134
+void HELPER(NAME)(CPULoongArchState *env, \
135
+ uint32_t vd, uint32_t vj, uint32_t vk) \
136
+{ \
137
+ int i; \
138
+ VReg *Vd = &(env->fpr[vd].vreg); \
139
+ VReg *Vj = &(env->fpr[vj].vreg); \
140
+ VReg *Vk = &(env->fpr[vk].vreg); \
141
+ \
142
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
143
+ Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
144
+ } \
145
+ Vd->D(1) = 0; \
146
+}
147
+
148
+VSRLN(vsrln_b_h, 16, uint16_t, B, H)
149
+VSRLN(vsrln_h_w, 32, uint32_t, H, W)
150
+VSRLN(vsrln_w_d, 64, uint64_t, W, D)
151
+
152
+#define VSRAN(NAME, BIT, T, E1, E2) \
153
+void HELPER(NAME)(CPULoongArchState *env, \
154
+ uint32_t vd, uint32_t vj, uint32_t vk) \
155
+{ \
156
+ int i; \
157
+ VReg *Vd = &(env->fpr[vd].vreg); \
158
+ VReg *Vj = &(env->fpr[vj].vreg); \
159
+ VReg *Vk = &(env->fpr[vk].vreg); \
160
+ \
161
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
162
+ Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
163
+ } \
164
+ Vd->D(1) = 0; \
165
+}
166
+
167
+VSRAN(vsran_b_h, 16, uint16_t, B, H)
168
+VSRAN(vsran_h_w, 32, uint32_t, H, W)
169
+VSRAN(vsran_w_d, 64, uint64_t, W, D)
170
+
171
+#define VSRLNI(NAME, BIT, T, E1, E2) \
172
+void HELPER(NAME)(CPULoongArchState *env, \
173
+ uint32_t vd, uint32_t vj, uint32_t imm) \
174
+{ \
175
+ int i, max; \
176
+ VReg temp; \
177
+ VReg *Vd = &(env->fpr[vd].vreg); \
178
+ VReg *Vj = &(env->fpr[vj].vreg); \
179
+ \
180
+ temp.D(0) = 0; \
181
+ temp.D(1) = 0; \
182
+ max = LSX_LEN/BIT; \
183
+ for (i = 0; i < max; i++) { \
184
+ temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
185
+ temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
186
+ } \
187
+ *Vd = temp; \
188
+}
189
+
190
+void HELPER(vsrlni_d_q)(CPULoongArchState *env,
191
+ uint32_t vd, uint32_t vj, uint32_t imm)
192
+{
193
+ VReg temp;
194
+ VReg *Vd = &(env->fpr[vd].vreg);
195
+ VReg *Vj = &(env->fpr[vj].vreg);
196
+
197
+ temp.D(0) = 0;
198
+ temp.D(1) = 0;
199
+ temp.D(0) = int128_urshift(Vj->Q(0), imm % 128);
200
+ temp.D(1) = int128_urshift(Vd->Q(0), imm % 128);
201
+ *Vd = temp;
202
+}
203
+
204
+VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
205
+VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
206
+VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
207
+
208
+#define VSRANI(NAME, BIT, E1, E2) \
209
+void HELPER(NAME)(CPULoongArchState *env, \
210
+ uint32_t vd, uint32_t vj, uint32_t imm) \
211
+{ \
212
+ int i, max; \
213
+ VReg temp; \
214
+ VReg *Vd = &(env->fpr[vd].vreg); \
215
+ VReg *Vj = &(env->fpr[vj].vreg); \
216
+ \
217
+ temp.D(0) = 0; \
218
+ temp.D(1) = 0; \
219
+ max = LSX_LEN/BIT; \
220
+ for (i = 0; i < max; i++) { \
221
+ temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
222
+ temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
223
+ } \
224
+ *Vd = temp; \
225
+}
226
+
227
+void HELPER(vsrani_d_q)(CPULoongArchState *env,
228
+ uint32_t vd, uint32_t vj, uint32_t imm)
229
+{
230
+ VReg temp;
231
+ VReg *Vd = &(env->fpr[vd].vreg);
232
+ VReg *Vj = &(env->fpr[vj].vreg);
233
+
234
+ temp.D(0) = 0;
235
+ temp.D(1) = 0;
236
+ temp.D(0) = int128_rshift(Vj->Q(0), imm % 128);
237
+ temp.D(1) = int128_rshift(Vd->Q(0), imm % 128);
238
+ *Vd = temp;
239
+}
240
+
241
+VSRANI(vsrani_b_h, 16, B, H)
242
+VSRANI(vsrani_h_w, 32, H, W)
243
+VSRANI(vsrani_w_d, 64, W, D)
244
--
245
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSRLRN.{B.H/H.W/W.D};
3
- VSRARN.{B.H/H.W/W.D};
4
- VSRLRNI.{B.H/H.W/W.D/D.Q};
5
- VSRARNI.{B.H/H.W/W.D/D.Q}.
6
1
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Song Gao <gaosong@loongson.cn>
9
Message-Id: <20230504122810.4094787-27-gaosong@loongson.cn>
10
---
11
target/loongarch/disas.c | 16 +++
12
target/loongarch/helper.h | 16 +++
13
target/loongarch/insn_trans/trans_lsx.c.inc | 16 +++
14
target/loongarch/insns.decode | 16 +++
15
target/loongarch/lsx_helper.c | 126 ++++++++++++++++++++
16
5 files changed, 190 insertions(+)
17
18
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/loongarch/disas.c
21
+++ b/target/loongarch/disas.c
22
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrani_b_h, vv_i)
23
INSN_LSX(vsrani_h_w, vv_i)
24
INSN_LSX(vsrani_w_d, vv_i)
25
INSN_LSX(vsrani_d_q, vv_i)
26
+
27
+INSN_LSX(vsrlrn_b_h, vvv)
28
+INSN_LSX(vsrlrn_h_w, vvv)
29
+INSN_LSX(vsrlrn_w_d, vvv)
30
+INSN_LSX(vsrarn_b_h, vvv)
31
+INSN_LSX(vsrarn_h_w, vvv)
32
+INSN_LSX(vsrarn_w_d, vvv)
33
+
34
+INSN_LSX(vsrlrni_b_h, vv_i)
35
+INSN_LSX(vsrlrni_h_w, vv_i)
36
+INSN_LSX(vsrlrni_w_d, vv_i)
37
+INSN_LSX(vsrlrni_d_q, vv_i)
38
+INSN_LSX(vsrarni_b_h, vv_i)
39
+INSN_LSX(vsrarni_h_w, vv_i)
40
+INSN_LSX(vsrarni_w_d, vv_i)
41
+INSN_LSX(vsrarni_d_q, vv_i)
42
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/loongarch/helper.h
45
+++ b/target/loongarch/helper.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
47
DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
48
DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
49
DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
50
+
51
+DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32)
52
+DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32)
53
+DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32)
54
+DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32)
55
+DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32)
56
+DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32)
57
+
58
+DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
59
+DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
60
+DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32)
61
+DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
66
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
69
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
70
@@ -XXX,XX +XXX,XX @@ TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h)
71
TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w)
72
TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d)
73
TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q)
74
+
75
+TRANS(vsrlrn_b_h, gen_vvv, gen_helper_vsrlrn_b_h)
76
+TRANS(vsrlrn_h_w, gen_vvv, gen_helper_vsrlrn_h_w)
77
+TRANS(vsrlrn_w_d, gen_vvv, gen_helper_vsrlrn_w_d)
78
+TRANS(vsrarn_b_h, gen_vvv, gen_helper_vsrarn_b_h)
79
+TRANS(vsrarn_h_w, gen_vvv, gen_helper_vsrarn_h_w)
80
+TRANS(vsrarn_w_d, gen_vvv, gen_helper_vsrarn_w_d)
81
+
82
+TRANS(vsrlrni_b_h, gen_vv_i, gen_helper_vsrlrni_b_h)
83
+TRANS(vsrlrni_h_w, gen_vv_i, gen_helper_vsrlrni_h_w)
84
+TRANS(vsrlrni_w_d, gen_vv_i, gen_helper_vsrlrni_w_d)
85
+TRANS(vsrlrni_d_q, gen_vv_i, gen_helper_vsrlrni_d_q)
86
+TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h)
87
+TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w)
88
+TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d)
89
+TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q)
90
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/loongarch/insns.decode
93
+++ b/target/loongarch/insns.decode
94
@@ -XXX,XX +XXX,XX @@ vsrani_b_h 0111 00110101 10000 1 .... ..... ..... @vv_ui4
95
vsrani_h_w 0111 00110101 10001 ..... ..... ..... @vv_ui5
96
vsrani_w_d 0111 00110101 1001 ...... ..... ..... @vv_ui6
97
vsrani_d_q 0111 00110101 101 ....... ..... ..... @vv_ui7
98
+
99
+vsrlrn_b_h 0111 00001111 10001 ..... ..... ..... @vvv
100
+vsrlrn_h_w 0111 00001111 10010 ..... ..... ..... @vvv
101
+vsrlrn_w_d 0111 00001111 10011 ..... ..... ..... @vvv
102
+vsrarn_b_h 0111 00001111 10101 ..... ..... ..... @vvv
103
+vsrarn_h_w 0111 00001111 10110 ..... ..... ..... @vvv
104
+vsrarn_w_d 0111 00001111 10111 ..... ..... ..... @vvv
105
+
106
+vsrlrni_b_h 0111 00110100 01000 1 .... ..... ..... @vv_ui4
107
+vsrlrni_h_w 0111 00110100 01001 ..... ..... ..... @vv_ui5
108
+vsrlrni_w_d 0111 00110100 0101 ...... ..... ..... @vv_ui6
109
+vsrlrni_d_q 0111 00110100 011 ....... ..... ..... @vv_ui7
110
+vsrarni_b_h 0111 00110101 11000 1 .... ..... ..... @vv_ui4
111
+vsrarni_h_w 0111 00110101 11001 ..... ..... ..... @vv_ui5
112
+vsrarni_w_d 0111 00110101 1101 ...... ..... ..... @vv_ui6
113
+vsrarni_d_q 0111 00110101 111 ....... ..... ..... @vv_ui7
114
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/target/loongarch/lsx_helper.c
117
+++ b/target/loongarch/lsx_helper.c
118
@@ -XXX,XX +XXX,XX @@ void HELPER(vsrani_d_q)(CPULoongArchState *env,
119
VSRANI(vsrani_b_h, 16, B, H)
120
VSRANI(vsrani_h_w, 32, H, W)
121
VSRANI(vsrani_w_d, 64, W, D)
122
+
123
+#define VSRLRN(NAME, BIT, T, E1, E2) \
124
+void HELPER(NAME)(CPULoongArchState *env, \
125
+ uint32_t vd, uint32_t vj, uint32_t vk) \
126
+{ \
127
+ int i; \
128
+ VReg *Vd = &(env->fpr[vd].vreg); \
129
+ VReg *Vj = &(env->fpr[vj].vreg); \
130
+ VReg *Vk = &(env->fpr[vk].vreg); \
131
+ \
132
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
133
+ Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
134
+ } \
135
+ Vd->D(1) = 0; \
136
+}
137
+
138
+VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
139
+VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
140
+VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
141
+
142
+#define VSRARN(NAME, BIT, T, E1, E2) \
143
+void HELPER(NAME)(CPULoongArchState *env, \
144
+ uint32_t vd, uint32_t vj, uint32_t vk) \
145
+{ \
146
+ int i; \
147
+ VReg *Vd = &(env->fpr[vd].vreg); \
148
+ VReg *Vj = &(env->fpr[vj].vreg); \
149
+ VReg *Vk = &(env->fpr[vk].vreg); \
150
+ \
151
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
152
+ Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
153
+ } \
154
+ Vd->D(1) = 0; \
155
+}
156
+
157
+VSRARN(vsrarn_b_h, 16, uint8_t, B, H)
158
+VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
159
+VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
160
+
161
+#define VSRLRNI(NAME, BIT, E1, E2) \
162
+void HELPER(NAME)(CPULoongArchState *env, \
163
+ uint32_t vd, uint32_t vj, uint32_t imm) \
164
+{ \
165
+ int i, max; \
166
+ VReg temp; \
167
+ VReg *Vd = &(env->fpr[vd].vreg); \
168
+ VReg *Vj = &(env->fpr[vj].vreg); \
169
+ \
170
+ temp.D(0) = 0; \
171
+ temp.D(1) = 0; \
172
+ max = LSX_LEN/BIT; \
173
+ for (i = 0; i < max; i++) { \
174
+ temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
175
+ temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
176
+ } \
177
+ *Vd = temp; \
178
+}
179
+
180
+void HELPER(vsrlrni_d_q)(CPULoongArchState *env,
181
+ uint32_t vd, uint32_t vj, uint32_t imm)
182
+{
183
+ VReg temp;
184
+ VReg *Vd = &(env->fpr[vd].vreg);
185
+ VReg *Vj = &(env->fpr[vj].vreg);
186
+ Int128 r1, r2;
187
+
188
+ if (imm == 0) {
189
+ temp.D(0) = int128_getlo(Vj->Q(0));
190
+ temp.D(1) = int128_getlo(Vd->Q(0));
191
+ } else {
192
+ r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
193
+ r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
194
+
195
+ temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
196
+ temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
197
+ }
198
+ *Vd = temp;
199
+}
200
+
201
+VSRLRNI(vsrlrni_b_h, 16, B, H)
202
+VSRLRNI(vsrlrni_h_w, 32, H, W)
203
+VSRLRNI(vsrlrni_w_d, 64, W, D)
204
+
205
+#define VSRARNI(NAME, BIT, E1, E2) \
206
+void HELPER(NAME)(CPULoongArchState *env, \
207
+ uint32_t vd, uint32_t vj, uint32_t imm) \
208
+{ \
209
+ int i, max; \
210
+ VReg temp; \
211
+ VReg *Vd = &(env->fpr[vd].vreg); \
212
+ VReg *Vj = &(env->fpr[vj].vreg); \
213
+ \
214
+ temp.D(0) = 0; \
215
+ temp.D(1) = 0; \
216
+ max = LSX_LEN/BIT; \
217
+ for (i = 0; i < max; i++) { \
218
+ temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
219
+ temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
220
+ } \
221
+ *Vd = temp; \
222
+}
223
+
224
+void HELPER(vsrarni_d_q)(CPULoongArchState *env,
225
+ uint32_t vd, uint32_t vj, uint32_t imm)
226
+{
227
+ VReg temp;
228
+ VReg *Vd = &(env->fpr[vd].vreg);
229
+ VReg *Vj = &(env->fpr[vj].vreg);
230
+ Int128 r1, r2;
231
+
232
+ if (imm == 0) {
233
+ temp.D(0) = int128_getlo(Vj->Q(0));
234
+ temp.D(1) = int128_getlo(Vd->Q(0));
235
+ } else {
236
+ r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
237
+ r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
238
+
239
+ temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
240
+ temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
241
+ }
242
+ *Vd = temp;
243
+}
244
+
245
+VSRARNI(vsrarni_b_h, 16, B, H)
246
+VSRARNI(vsrarni_h_w, 32, H, W)
247
+VSRARNI(vsrarni_w_d, 64, W, D)
248
--
249
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSSRLN.{B.H/H.W/W.D};
3
- VSSRAN.{B.H/H.W/W.D};
4
- VSSRLN.{BU.H/HU.W/WU.D};
5
- VSSRAN.{BU.H/HU.W/WU.D};
6
- VSSRLNI.{B.H/H.W/W.D/D.Q};
7
- VSSRANI.{B.H/H.W/W.D/D.Q};
8
- VSSRLNI.{BU.H/HU.W/WU.D/DU.Q};
9
- VSSRANI.{BU.H/HU.W/WU.D/DU.Q}.
10
1
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Song Gao <gaosong@loongson.cn>
13
Message-Id: <20230504122810.4094787-28-gaosong@loongson.cn>
14
---
15
target/loongarch/disas.c | 30 ++
16
target/loongarch/helper.h | 30 ++
17
target/loongarch/insn_trans/trans_lsx.c.inc | 30 ++
18
target/loongarch/insns.decode | 30 ++
19
target/loongarch/lsx_helper.c | 379 ++++++++++++++++++++
20
5 files changed, 499 insertions(+)
21
22
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/loongarch/disas.c
25
+++ b/target/loongarch/disas.c
26
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsrarni_b_h, vv_i)
27
INSN_LSX(vsrarni_h_w, vv_i)
28
INSN_LSX(vsrarni_w_d, vv_i)
29
INSN_LSX(vsrarni_d_q, vv_i)
30
+
31
+INSN_LSX(vssrln_b_h, vvv)
32
+INSN_LSX(vssrln_h_w, vvv)
33
+INSN_LSX(vssrln_w_d, vvv)
34
+INSN_LSX(vssran_b_h, vvv)
35
+INSN_LSX(vssran_h_w, vvv)
36
+INSN_LSX(vssran_w_d, vvv)
37
+INSN_LSX(vssrln_bu_h, vvv)
38
+INSN_LSX(vssrln_hu_w, vvv)
39
+INSN_LSX(vssrln_wu_d, vvv)
40
+INSN_LSX(vssran_bu_h, vvv)
41
+INSN_LSX(vssran_hu_w, vvv)
42
+INSN_LSX(vssran_wu_d, vvv)
43
+
44
+INSN_LSX(vssrlni_b_h, vv_i)
45
+INSN_LSX(vssrlni_h_w, vv_i)
46
+INSN_LSX(vssrlni_w_d, vv_i)
47
+INSN_LSX(vssrlni_d_q, vv_i)
48
+INSN_LSX(vssrani_b_h, vv_i)
49
+INSN_LSX(vssrani_h_w, vv_i)
50
+INSN_LSX(vssrani_w_d, vv_i)
51
+INSN_LSX(vssrani_d_q, vv_i)
52
+INSN_LSX(vssrlni_bu_h, vv_i)
53
+INSN_LSX(vssrlni_hu_w, vv_i)
54
+INSN_LSX(vssrlni_wu_d, vv_i)
55
+INSN_LSX(vssrlni_du_q, vv_i)
56
+INSN_LSX(vssrani_bu_h, vv_i)
57
+INSN_LSX(vssrani_hu_w, vv_i)
58
+INSN_LSX(vssrani_wu_d, vv_i)
59
+INSN_LSX(vssrani_du_q, vv_i)
60
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/helper.h
63
+++ b/target/loongarch/helper.h
64
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
65
DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
66
DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
67
DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
68
+
69
+DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
70
+DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
71
+DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
72
+DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
73
+DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
74
+DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
75
+DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
76
+DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
77
+DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
78
+DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
79
+DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
80
+DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
81
+
82
+DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
83
+DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
84
+DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
85
+DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
86
+DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
87
+DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
88
+DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
89
+DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
90
+DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
91
+DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
92
+DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
93
+DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
94
+DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
95
+DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
96
+DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
97
+DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
98
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
101
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
102
@@ -XXX,XX +XXX,XX @@ TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h)
103
TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w)
104
TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d)
105
TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q)
106
+
107
+TRANS(vssrln_b_h, gen_vvv, gen_helper_vssrln_b_h)
108
+TRANS(vssrln_h_w, gen_vvv, gen_helper_vssrln_h_w)
109
+TRANS(vssrln_w_d, gen_vvv, gen_helper_vssrln_w_d)
110
+TRANS(vssran_b_h, gen_vvv, gen_helper_vssran_b_h)
111
+TRANS(vssran_h_w, gen_vvv, gen_helper_vssran_h_w)
112
+TRANS(vssran_w_d, gen_vvv, gen_helper_vssran_w_d)
113
+TRANS(vssrln_bu_h, gen_vvv, gen_helper_vssrln_bu_h)
114
+TRANS(vssrln_hu_w, gen_vvv, gen_helper_vssrln_hu_w)
115
+TRANS(vssrln_wu_d, gen_vvv, gen_helper_vssrln_wu_d)
116
+TRANS(vssran_bu_h, gen_vvv, gen_helper_vssran_bu_h)
117
+TRANS(vssran_hu_w, gen_vvv, gen_helper_vssran_hu_w)
118
+TRANS(vssran_wu_d, gen_vvv, gen_helper_vssran_wu_d)
119
+
120
+TRANS(vssrlni_b_h, gen_vv_i, gen_helper_vssrlni_b_h)
121
+TRANS(vssrlni_h_w, gen_vv_i, gen_helper_vssrlni_h_w)
122
+TRANS(vssrlni_w_d, gen_vv_i, gen_helper_vssrlni_w_d)
123
+TRANS(vssrlni_d_q, gen_vv_i, gen_helper_vssrlni_d_q)
124
+TRANS(vssrani_b_h, gen_vv_i, gen_helper_vssrani_b_h)
125
+TRANS(vssrani_h_w, gen_vv_i, gen_helper_vssrani_h_w)
126
+TRANS(vssrani_w_d, gen_vv_i, gen_helper_vssrani_w_d)
127
+TRANS(vssrani_d_q, gen_vv_i, gen_helper_vssrani_d_q)
128
+TRANS(vssrlni_bu_h, gen_vv_i, gen_helper_vssrlni_bu_h)
129
+TRANS(vssrlni_hu_w, gen_vv_i, gen_helper_vssrlni_hu_w)
130
+TRANS(vssrlni_wu_d, gen_vv_i, gen_helper_vssrlni_wu_d)
131
+TRANS(vssrlni_du_q, gen_vv_i, gen_helper_vssrlni_du_q)
132
+TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h)
133
+TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w)
134
+TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d)
135
+TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q)
136
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
137
index XXXXXXX..XXXXXXX 100644
138
--- a/target/loongarch/insns.decode
139
+++ b/target/loongarch/insns.decode
140
@@ -XXX,XX +XXX,XX @@ vsrarni_b_h 0111 00110101 11000 1 .... ..... ..... @vv_ui4
141
vsrarni_h_w 0111 00110101 11001 ..... ..... ..... @vv_ui5
142
vsrarni_w_d 0111 00110101 1101 ...... ..... ..... @vv_ui6
143
vsrarni_d_q 0111 00110101 111 ....... ..... ..... @vv_ui7
144
+
145
+vssrln_b_h 0111 00001111 11001 ..... ..... ..... @vvv
146
+vssrln_h_w 0111 00001111 11010 ..... ..... ..... @vvv
147
+vssrln_w_d 0111 00001111 11011 ..... ..... ..... @vvv
148
+vssran_b_h 0111 00001111 11101 ..... ..... ..... @vvv
149
+vssran_h_w 0111 00001111 11110 ..... ..... ..... @vvv
150
+vssran_w_d 0111 00001111 11111 ..... ..... ..... @vvv
151
+vssrln_bu_h 0111 00010000 01001 ..... ..... ..... @vvv
152
+vssrln_hu_w 0111 00010000 01010 ..... ..... ..... @vvv
153
+vssrln_wu_d 0111 00010000 01011 ..... ..... ..... @vvv
154
+vssran_bu_h 0111 00010000 01101 ..... ..... ..... @vvv
155
+vssran_hu_w 0111 00010000 01110 ..... ..... ..... @vvv
156
+vssran_wu_d 0111 00010000 01111 ..... ..... ..... @vvv
157
+
158
+vssrlni_b_h 0111 00110100 10000 1 .... ..... ..... @vv_ui4
159
+vssrlni_h_w 0111 00110100 10001 ..... ..... ..... @vv_ui5
160
+vssrlni_w_d 0111 00110100 1001 ...... ..... ..... @vv_ui6
161
+vssrlni_d_q 0111 00110100 101 ....... ..... ..... @vv_ui7
162
+vssrani_b_h 0111 00110110 00000 1 .... ..... ..... @vv_ui4
163
+vssrani_h_w 0111 00110110 00001 ..... ..... ..... @vv_ui5
164
+vssrani_w_d 0111 00110110 0001 ...... ..... ..... @vv_ui6
165
+vssrani_d_q 0111 00110110 001 ....... ..... ..... @vv_ui7
166
+vssrlni_bu_h 0111 00110100 11000 1 .... ..... ..... @vv_ui4
167
+vssrlni_hu_w 0111 00110100 11001 ..... ..... ..... @vv_ui5
168
+vssrlni_wu_d 0111 00110100 1101 ...... ..... ..... @vv_ui6
169
+vssrlni_du_q 0111 00110100 111 ....... ..... ..... @vv_ui7
170
+vssrani_bu_h 0111 00110110 01000 1 .... ..... ..... @vv_ui4
171
+vssrani_hu_w 0111 00110110 01001 ..... ..... ..... @vv_ui5
172
+vssrani_wu_d 0111 00110110 0101 ...... ..... ..... @vv_ui6
173
+vssrani_du_q 0111 00110110 011 ....... ..... ..... @vv_ui7
174
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/target/loongarch/lsx_helper.c
177
+++ b/target/loongarch/lsx_helper.c
178
@@ -XXX,XX +XXX,XX @@ void HELPER(vsrarni_d_q)(CPULoongArchState *env,
179
VSRARNI(vsrarni_b_h, 16, B, H)
180
VSRARNI(vsrarni_h_w, 32, H, W)
181
VSRARNI(vsrarni_w_d, 64, W, D)
182
+
183
+#define SSRLNS(NAME, T1, T2, T3) \
184
+static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
185
+{ \
186
+ T1 shft_res; \
187
+ if (sa == 0) { \
188
+ shft_res = e2; \
189
+ } else { \
190
+ shft_res = (((T1)e2) >> sa); \
191
+ } \
192
+ T3 mask; \
193
+ mask = (1ull << sh) -1; \
194
+ if (shft_res > mask) { \
195
+ return mask; \
196
+ } else { \
197
+ return shft_res; \
198
+ } \
199
+}
200
+
201
+SSRLNS(B, uint16_t, int16_t, uint8_t)
202
+SSRLNS(H, uint32_t, int32_t, uint16_t)
203
+SSRLNS(W, uint64_t, int64_t, uint32_t)
204
+
205
+#define VSSRLN(NAME, BIT, T, E1, E2) \
206
+void HELPER(NAME)(CPULoongArchState *env, \
207
+ uint32_t vd, uint32_t vj, uint32_t vk) \
208
+{ \
209
+ int i; \
210
+ VReg *Vd = &(env->fpr[vd].vreg); \
211
+ VReg *Vj = &(env->fpr[vj].vreg); \
212
+ VReg *Vk = &(env->fpr[vk].vreg); \
213
+ \
214
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
215
+ Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
216
+ } \
217
+ Vd->D(1) = 0; \
218
+}
219
+
220
+VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
221
+VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
222
+VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
223
+
224
+#define SSRANS(E, T1, T2) \
225
+static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
226
+{ \
227
+ T1 shft_res; \
228
+ if (sa == 0) { \
229
+ shft_res = e2; \
230
+ } else { \
231
+ shft_res = e2 >> sa; \
232
+ } \
233
+ T2 mask; \
234
+ mask = (1ll << sh) -1; \
235
+ if (shft_res > mask) { \
236
+ return mask; \
237
+ } else if (shft_res < -(mask +1)) { \
238
+ return ~mask; \
239
+ } else { \
240
+ return shft_res; \
241
+ } \
242
+}
243
+
244
+SSRANS(B, int16_t, int8_t)
245
+SSRANS(H, int32_t, int16_t)
246
+SSRANS(W, int64_t, int32_t)
247
+
248
+#define VSSRAN(NAME, BIT, T, E1, E2) \
249
+void HELPER(NAME)(CPULoongArchState *env, \
250
+ uint32_t vd, uint32_t vj, uint32_t vk) \
251
+{ \
252
+ int i; \
253
+ VReg *Vd = &(env->fpr[vd].vreg); \
254
+ VReg *Vj = &(env->fpr[vj].vreg); \
255
+ VReg *Vk = &(env->fpr[vk].vreg); \
256
+ \
257
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
258
+ Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
259
+ } \
260
+ Vd->D(1) = 0; \
261
+}
262
+
263
+VSSRAN(vssran_b_h, 16, uint16_t, B, H)
264
+VSSRAN(vssran_h_w, 32, uint32_t, H, W)
265
+VSSRAN(vssran_w_d, 64, uint64_t, W, D)
266
+
267
+#define SSRLNU(E, T1, T2, T3) \
268
+static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
269
+{ \
270
+ T1 shft_res; \
271
+ if (sa == 0) { \
272
+ shft_res = e2; \
273
+ } else { \
274
+ shft_res = (((T1)e2) >> sa); \
275
+ } \
276
+ T2 mask; \
277
+ mask = (1ull << sh) -1; \
278
+ if (shft_res > mask) { \
279
+ return mask; \
280
+ } else { \
281
+ return shft_res; \
282
+ } \
283
+}
284
+
285
+SSRLNU(B, uint16_t, uint8_t, int16_t)
286
+SSRLNU(H, uint32_t, uint16_t, int32_t)
287
+SSRLNU(W, uint64_t, uint32_t, int64_t)
288
+
289
+#define VSSRLNU(NAME, BIT, T, E1, E2) \
290
+void HELPER(NAME)(CPULoongArchState *env, \
291
+ uint32_t vd, uint32_t vj, uint32_t vk) \
292
+{ \
293
+ int i; \
294
+ VReg *Vd = &(env->fpr[vd].vreg); \
295
+ VReg *Vj = &(env->fpr[vj].vreg); \
296
+ VReg *Vk = &(env->fpr[vk].vreg); \
297
+ \
298
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
299
+ Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
300
+ } \
301
+ Vd->D(1) = 0; \
302
+}
303
+
304
+VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
305
+VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
306
+VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
307
+
308
+#define SSRANU(E, T1, T2, T3) \
309
+static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
310
+{ \
311
+ T1 shft_res; \
312
+ if (sa == 0) { \
313
+ shft_res = e2; \
314
+ } else { \
315
+ shft_res = e2 >> sa; \
316
+ } \
317
+ if (e2 < 0) { \
318
+ shft_res = 0; \
319
+ } \
320
+ T2 mask; \
321
+ mask = (1ull << sh) -1; \
322
+ if (shft_res > mask) { \
323
+ return mask; \
324
+ } else { \
325
+ return shft_res; \
326
+ } \
327
+}
328
+
329
+SSRANU(B, uint16_t, uint8_t, int16_t)
330
+SSRANU(H, uint32_t, uint16_t, int32_t)
331
+SSRANU(W, uint64_t, uint32_t, int64_t)
332
+
333
+#define VSSRANU(NAME, BIT, T, E1, E2) \
334
+void HELPER(NAME)(CPULoongArchState *env, \
335
+ uint32_t vd, uint32_t vj, uint32_t vk) \
336
+{ \
337
+ int i; \
338
+ VReg *Vd = &(env->fpr[vd].vreg); \
339
+ VReg *Vj = &(env->fpr[vj].vreg); \
340
+ VReg *Vk = &(env->fpr[vk].vreg); \
341
+ \
342
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
343
+ Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
344
+ } \
345
+ Vd->D(1) = 0; \
346
+}
347
+
348
+VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
349
+VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
350
+VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
351
+
352
+#define VSSRLNI(NAME, BIT, E1, E2) \
353
+void HELPER(NAME)(CPULoongArchState *env, \
354
+ uint32_t vd, uint32_t vj, uint32_t imm) \
355
+{ \
356
+ int i; \
357
+ VReg temp; \
358
+ VReg *Vd = &(env->fpr[vd].vreg); \
359
+ VReg *Vj = &(env->fpr[vj].vreg); \
360
+ \
361
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
362
+ temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
363
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
364
+ } \
365
+ *Vd = temp; \
366
+}
367
+
368
+void HELPER(vssrlni_d_q)(CPULoongArchState *env,
369
+ uint32_t vd, uint32_t vj, uint32_t imm)
370
+{
371
+ Int128 shft_res1, shft_res2, mask;
372
+ VReg *Vd = &(env->fpr[vd].vreg);
373
+ VReg *Vj = &(env->fpr[vj].vreg);
374
+
375
+ if (imm == 0) {
376
+ shft_res1 = Vj->Q(0);
377
+ shft_res2 = Vd->Q(0);
378
+ } else {
379
+ shft_res1 = int128_urshift(Vj->Q(0), imm);
380
+ shft_res2 = int128_urshift(Vd->Q(0), imm);
381
+ }
382
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
383
+
384
+ if (int128_ult(mask, shft_res1)) {
385
+ Vd->D(0) = int128_getlo(mask);
386
+ }else {
387
+ Vd->D(0) = int128_getlo(shft_res1);
388
+ }
389
+
390
+ if (int128_ult(mask, shft_res2)) {
391
+ Vd->D(1) = int128_getlo(mask);
392
+ }else {
393
+ Vd->D(1) = int128_getlo(shft_res2);
394
+ }
395
+}
396
+
397
+VSSRLNI(vssrlni_b_h, 16, B, H)
398
+VSSRLNI(vssrlni_h_w, 32, H, W)
399
+VSSRLNI(vssrlni_w_d, 64, W, D)
400
+
401
+#define VSSRANI(NAME, BIT, E1, E2) \
402
+void HELPER(NAME)(CPULoongArchState *env, \
403
+ uint32_t vd, uint32_t vj, uint32_t imm) \
404
+{ \
405
+ int i; \
406
+ VReg temp; \
407
+ VReg *Vd = &(env->fpr[vd].vreg); \
408
+ VReg *Vj = &(env->fpr[vj].vreg); \
409
+ \
410
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
411
+ temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
412
+ temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
413
+ } \
414
+ *Vd = temp; \
415
+}
416
+
417
+void HELPER(vssrani_d_q)(CPULoongArchState *env,
418
+ uint32_t vd, uint32_t vj, uint32_t imm)
419
+{
420
+ Int128 shft_res1, shft_res2, mask, min;
421
+ VReg *Vd = &(env->fpr[vd].vreg);
422
+ VReg *Vj = &(env->fpr[vj].vreg);
423
+
424
+ if (imm == 0) {
425
+ shft_res1 = Vj->Q(0);
426
+ shft_res2 = Vd->Q(0);
427
+ } else {
428
+ shft_res1 = int128_rshift(Vj->Q(0), imm);
429
+ shft_res2 = int128_rshift(Vd->Q(0), imm);
430
+ }
431
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
432
+ min = int128_lshift(int128_one(), 63);
433
+
434
+ if (int128_gt(shft_res1, mask)) {
435
+ Vd->D(0) = int128_getlo(mask);
436
+ } else if (int128_lt(shft_res1, int128_neg(min))) {
437
+ Vd->D(0) = int128_getlo(min);
438
+ } else {
439
+ Vd->D(0) = int128_getlo(shft_res1);
440
+ }
441
+
442
+ if (int128_gt(shft_res2, mask)) {
443
+ Vd->D(1) = int128_getlo(mask);
444
+ } else if (int128_lt(shft_res2, int128_neg(min))) {
445
+ Vd->D(1) = int128_getlo(min);
446
+ } else {
447
+ Vd->D(1) = int128_getlo(shft_res2);
448
+ }
449
+}
450
+
451
+VSSRANI(vssrani_b_h, 16, B, H)
452
+VSSRANI(vssrani_h_w, 32, H, W)
453
+VSSRANI(vssrani_w_d, 64, W, D)
454
+
455
+#define VSSRLNUI(NAME, BIT, E1, E2) \
456
+void HELPER(NAME)(CPULoongArchState *env, \
457
+ uint32_t vd, uint32_t vj, uint32_t imm) \
458
+{ \
459
+ int i; \
460
+ VReg temp; \
461
+ VReg *Vd = &(env->fpr[vd].vreg); \
462
+ VReg *Vj = &(env->fpr[vj].vreg); \
463
+ \
464
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
465
+ temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
466
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
467
+ } \
468
+ *Vd = temp; \
469
+}
470
+
471
+void HELPER(vssrlni_du_q)(CPULoongArchState *env,
472
+ uint32_t vd, uint32_t vj, uint32_t imm)
473
+{
474
+ Int128 shft_res1, shft_res2, mask;
475
+ VReg *Vd = &(env->fpr[vd].vreg);
476
+ VReg *Vj = &(env->fpr[vj].vreg);
477
+
478
+ if (imm == 0) {
479
+ shft_res1 = Vj->Q(0);
480
+ shft_res2 = Vd->Q(0);
481
+ } else {
482
+ shft_res1 = int128_urshift(Vj->Q(0), imm);
483
+ shft_res2 = int128_urshift(Vd->Q(0), imm);
484
+ }
485
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
486
+
487
+ if (int128_ult(mask, shft_res1)) {
488
+ Vd->D(0) = int128_getlo(mask);
489
+ }else {
490
+ Vd->D(0) = int128_getlo(shft_res1);
491
+ }
492
+
493
+ if (int128_ult(mask, shft_res2)) {
494
+ Vd->D(1) = int128_getlo(mask);
495
+ }else {
496
+ Vd->D(1) = int128_getlo(shft_res2);
497
+ }
498
+}
499
+
500
+VSSRLNUI(vssrlni_bu_h, 16, B, H)
501
+VSSRLNUI(vssrlni_hu_w, 32, H, W)
502
+VSSRLNUI(vssrlni_wu_d, 64, W, D)
503
+
504
+#define VSSRANUI(NAME, BIT, E1, E2) \
505
+void HELPER(NAME)(CPULoongArchState *env, \
506
+ uint32_t vd, uint32_t vj, uint32_t imm) \
507
+{ \
508
+ int i; \
509
+ VReg temp; \
510
+ VReg *Vd = &(env->fpr[vd].vreg); \
511
+ VReg *Vj = &(env->fpr[vj].vreg); \
512
+ \
513
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
514
+ temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
515
+ temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
516
+ } \
517
+ *Vd = temp; \
518
+}
519
+
520
+void HELPER(vssrani_du_q)(CPULoongArchState *env,
521
+ uint32_t vd, uint32_t vj, uint32_t imm)
522
+{
523
+ Int128 shft_res1, shft_res2, mask;
524
+ VReg *Vd = &(env->fpr[vd].vreg);
525
+ VReg *Vj = &(env->fpr[vj].vreg);
526
+
527
+ if (imm == 0) {
528
+ shft_res1 = Vj->Q(0);
529
+ shft_res2 = Vd->Q(0);
530
+ } else {
531
+ shft_res1 = int128_rshift(Vj->Q(0), imm);
532
+ shft_res2 = int128_rshift(Vd->Q(0), imm);
533
+ }
534
+
535
+ if (int128_lt(Vj->Q(0), int128_zero())) {
536
+ shft_res1 = int128_zero();
537
+ }
538
+
539
+ if (int128_lt(Vd->Q(0), int128_zero())) {
540
+ shft_res2 = int128_zero();
541
+ }
542
+
543
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
544
+
545
+ if (int128_ult(mask, shft_res1)) {
546
+ Vd->D(0) = int128_getlo(mask);
547
+ }else {
548
+ Vd->D(0) = int128_getlo(shft_res1);
549
+ }
550
+
551
+ if (int128_ult(mask, shft_res2)) {
552
+ Vd->D(1) = int128_getlo(mask);
553
+ }else {
554
+ Vd->D(1) = int128_getlo(shft_res2);
555
+ }
556
+}
557
+
558
+VSSRANUI(vssrani_bu_h, 16, B, H)
559
+VSSRANUI(vssrani_hu_w, 32, H, W)
560
+VSSRANUI(vssrani_wu_d, 64, W, D)
561
--
562
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSSRLRN.{B.H/H.W/W.D};
3
- VSSRARN.{B.H/H.W/W.D};
4
- VSSRLRN.{BU.H/HU.W/WU.D};
5
- VSSRARN.{BU.H/HU.W/WU.D};
6
- VSSRLRNI.{B.H/H.W/W.D/D.Q};
7
- VSSRARNI.{B.H/H.W/W.D/D.Q};
8
- VSSRLRNI.{BU.H/HU.W/WU.D/DU.Q};
9
- VSSRARNI.{BU.H/HU.W/WU.D/DU.Q}.
10
1
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Song Gao <gaosong@loongson.cn>
13
Message-Id: <20230504122810.4094787-29-gaosong@loongson.cn>
14
---
15
target/loongarch/disas.c | 30 ++
16
target/loongarch/helper.h | 30 ++
17
target/loongarch/insn_trans/trans_lsx.c.inc | 30 ++
18
target/loongarch/insns.decode | 30 ++
19
target/loongarch/lsx_helper.c | 358 ++++++++++++++++++++
20
5 files changed, 478 insertions(+)
21
22
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/loongarch/disas.c
25
+++ b/target/loongarch/disas.c
26
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vssrani_bu_h, vv_i)
27
INSN_LSX(vssrani_hu_w, vv_i)
28
INSN_LSX(vssrani_wu_d, vv_i)
29
INSN_LSX(vssrani_du_q, vv_i)
30
+
31
+INSN_LSX(vssrlrn_b_h, vvv)
32
+INSN_LSX(vssrlrn_h_w, vvv)
33
+INSN_LSX(vssrlrn_w_d, vvv)
34
+INSN_LSX(vssrarn_b_h, vvv)
35
+INSN_LSX(vssrarn_h_w, vvv)
36
+INSN_LSX(vssrarn_w_d, vvv)
37
+INSN_LSX(vssrlrn_bu_h, vvv)
38
+INSN_LSX(vssrlrn_hu_w, vvv)
39
+INSN_LSX(vssrlrn_wu_d, vvv)
40
+INSN_LSX(vssrarn_bu_h, vvv)
41
+INSN_LSX(vssrarn_hu_w, vvv)
42
+INSN_LSX(vssrarn_wu_d, vvv)
43
+
44
+INSN_LSX(vssrlrni_b_h, vv_i)
45
+INSN_LSX(vssrlrni_h_w, vv_i)
46
+INSN_LSX(vssrlrni_w_d, vv_i)
47
+INSN_LSX(vssrlrni_d_q, vv_i)
48
+INSN_LSX(vssrlrni_bu_h, vv_i)
49
+INSN_LSX(vssrlrni_hu_w, vv_i)
50
+INSN_LSX(vssrlrni_wu_d, vv_i)
51
+INSN_LSX(vssrlrni_du_q, vv_i)
52
+INSN_LSX(vssrarni_b_h, vv_i)
53
+INSN_LSX(vssrarni_h_w, vv_i)
54
+INSN_LSX(vssrarni_w_d, vv_i)
55
+INSN_LSX(vssrarni_d_q, vv_i)
56
+INSN_LSX(vssrarni_bu_h, vv_i)
57
+INSN_LSX(vssrarni_hu_w, vv_i)
58
+INSN_LSX(vssrarni_wu_d, vv_i)
59
+INSN_LSX(vssrarni_du_q, vv_i)
60
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/loongarch/helper.h
63
+++ b/target/loongarch/helper.h
64
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
65
DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
66
DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
67
DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
68
+
69
+DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32)
70
+DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32)
71
+DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32)
72
+DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32)
73
+DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32)
74
+DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32)
75
+DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32)
76
+DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32)
77
+DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32)
78
+DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32)
79
+DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32)
80
+DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32)
81
+
82
+DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
83
+DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
84
+DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32)
85
+DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32)
86
+DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32)
87
+DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32)
88
+DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32)
89
+DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32)
90
+DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32)
91
+DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32)
92
+DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32)
93
+DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32)
94
+DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
95
+DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
96
+DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
97
+DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
98
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
101
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
102
@@ -XXX,XX +XXX,XX @@ TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h)
103
TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w)
104
TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d)
105
TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q)
106
+
107
+TRANS(vssrlrn_b_h, gen_vvv, gen_helper_vssrlrn_b_h)
108
+TRANS(vssrlrn_h_w, gen_vvv, gen_helper_vssrlrn_h_w)
109
+TRANS(vssrlrn_w_d, gen_vvv, gen_helper_vssrlrn_w_d)
110
+TRANS(vssrarn_b_h, gen_vvv, gen_helper_vssrarn_b_h)
111
+TRANS(vssrarn_h_w, gen_vvv, gen_helper_vssrarn_h_w)
112
+TRANS(vssrarn_w_d, gen_vvv, gen_helper_vssrarn_w_d)
113
+TRANS(vssrlrn_bu_h, gen_vvv, gen_helper_vssrlrn_bu_h)
114
+TRANS(vssrlrn_hu_w, gen_vvv, gen_helper_vssrlrn_hu_w)
115
+TRANS(vssrlrn_wu_d, gen_vvv, gen_helper_vssrlrn_wu_d)
116
+TRANS(vssrarn_bu_h, gen_vvv, gen_helper_vssrarn_bu_h)
117
+TRANS(vssrarn_hu_w, gen_vvv, gen_helper_vssrarn_hu_w)
118
+TRANS(vssrarn_wu_d, gen_vvv, gen_helper_vssrarn_wu_d)
119
+
120
+TRANS(vssrlrni_b_h, gen_vv_i, gen_helper_vssrlrni_b_h)
121
+TRANS(vssrlrni_h_w, gen_vv_i, gen_helper_vssrlrni_h_w)
122
+TRANS(vssrlrni_w_d, gen_vv_i, gen_helper_vssrlrni_w_d)
123
+TRANS(vssrlrni_d_q, gen_vv_i, gen_helper_vssrlrni_d_q)
124
+TRANS(vssrarni_b_h, gen_vv_i, gen_helper_vssrarni_b_h)
125
+TRANS(vssrarni_h_w, gen_vv_i, gen_helper_vssrarni_h_w)
126
+TRANS(vssrarni_w_d, gen_vv_i, gen_helper_vssrarni_w_d)
127
+TRANS(vssrarni_d_q, gen_vv_i, gen_helper_vssrarni_d_q)
128
+TRANS(vssrlrni_bu_h, gen_vv_i, gen_helper_vssrlrni_bu_h)
129
+TRANS(vssrlrni_hu_w, gen_vv_i, gen_helper_vssrlrni_hu_w)
130
+TRANS(vssrlrni_wu_d, gen_vv_i, gen_helper_vssrlrni_wu_d)
131
+TRANS(vssrlrni_du_q, gen_vv_i, gen_helper_vssrlrni_du_q)
132
+TRANS(vssrarni_bu_h, gen_vv_i, gen_helper_vssrarni_bu_h)
133
+TRANS(vssrarni_hu_w, gen_vv_i, gen_helper_vssrarni_hu_w)
134
+TRANS(vssrarni_wu_d, gen_vv_i, gen_helper_vssrarni_wu_d)
135
+TRANS(vssrarni_du_q, gen_vv_i, gen_helper_vssrarni_du_q)
136
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
137
index XXXXXXX..XXXXXXX 100644
138
--- a/target/loongarch/insns.decode
139
+++ b/target/loongarch/insns.decode
140
@@ -XXX,XX +XXX,XX @@ vssrani_bu_h 0111 00110110 01000 1 .... ..... ..... @vv_ui4
141
vssrani_hu_w 0111 00110110 01001 ..... ..... ..... @vv_ui5
142
vssrani_wu_d 0111 00110110 0101 ...... ..... ..... @vv_ui6
143
vssrani_du_q 0111 00110110 011 ....... ..... ..... @vv_ui7
144
+
145
+vssrlrn_b_h 0111 00010000 00001 ..... ..... ..... @vvv
146
+vssrlrn_h_w 0111 00010000 00010 ..... ..... ..... @vvv
147
+vssrlrn_w_d 0111 00010000 00011 ..... ..... ..... @vvv
148
+vssrarn_b_h 0111 00010000 00101 ..... ..... ..... @vvv
149
+vssrarn_h_w 0111 00010000 00110 ..... ..... ..... @vvv
150
+vssrarn_w_d 0111 00010000 00111 ..... ..... ..... @vvv
151
+vssrlrn_bu_h 0111 00010000 10001 ..... ..... ..... @vvv
152
+vssrlrn_hu_w 0111 00010000 10010 ..... ..... ..... @vvv
153
+vssrlrn_wu_d 0111 00010000 10011 ..... ..... ..... @vvv
154
+vssrarn_bu_h 0111 00010000 10101 ..... ..... ..... @vvv
155
+vssrarn_hu_w 0111 00010000 10110 ..... ..... ..... @vvv
156
+vssrarn_wu_d 0111 00010000 10111 ..... ..... ..... @vvv
157
+
158
+vssrlrni_b_h 0111 00110101 00000 1 .... ..... ..... @vv_ui4
159
+vssrlrni_h_w 0111 00110101 00001 ..... ..... ..... @vv_ui5
160
+vssrlrni_w_d 0111 00110101 0001 ...... ..... ..... @vv_ui6
161
+vssrlrni_d_q 0111 00110101 001 ....... ..... ..... @vv_ui7
162
+vssrarni_b_h 0111 00110110 10000 1 .... ..... ..... @vv_ui4
163
+vssrarni_h_w 0111 00110110 10001 ..... ..... ..... @vv_ui5
164
+vssrarni_w_d 0111 00110110 1001 ...... ..... ..... @vv_ui6
165
+vssrarni_d_q 0111 00110110 101 ....... ..... ..... @vv_ui7
166
+vssrlrni_bu_h 0111 00110101 01000 1 .... ..... ..... @vv_ui4
167
+vssrlrni_hu_w 0111 00110101 01001 ..... ..... ..... @vv_ui5
168
+vssrlrni_wu_d 0111 00110101 0101 ...... ..... ..... @vv_ui6
169
+vssrlrni_du_q 0111 00110101 011 ....... ..... ..... @vv_ui7
170
+vssrarni_bu_h 0111 00110110 11000 1 .... ..... ..... @vv_ui4
171
+vssrarni_hu_w 0111 00110110 11001 ..... ..... ..... @vv_ui5
172
+vssrarni_wu_d 0111 00110110 1101 ...... ..... ..... @vv_ui6
173
+vssrarni_du_q 0111 00110110 111 ....... ..... ..... @vv_ui7
174
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/target/loongarch/lsx_helper.c
177
+++ b/target/loongarch/lsx_helper.c
178
@@ -XXX,XX +XXX,XX @@ void HELPER(vssrani_du_q)(CPULoongArchState *env,
179
VSSRANUI(vssrani_bu_h, 16, B, H)
180
VSSRANUI(vssrani_hu_w, 32, H, W)
181
VSSRANUI(vssrani_wu_d, 64, W, D)
182
+
183
+#define SSRLRNS(E1, E2, T1, T2, T3) \
184
+static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
185
+{ \
186
+ T1 shft_res; \
187
+ \
188
+ shft_res = do_vsrlr_ ## E2(e2, sa); \
189
+ T1 mask; \
190
+ mask = (1ull << sh) -1; \
191
+ if (shft_res > mask) { \
192
+ return mask; \
193
+ } else { \
194
+ return shft_res; \
195
+ } \
196
+}
197
+
198
+SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
199
+SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
200
+SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
201
+
202
+#define VSSRLRN(NAME, BIT, T, E1, E2) \
203
+void HELPER(NAME)(CPULoongArchState *env, \
204
+ uint32_t vd, uint32_t vj, uint32_t vk) \
205
+{ \
206
+ int i; \
207
+ VReg *Vd = &(env->fpr[vd].vreg); \
208
+ VReg *Vj = &(env->fpr[vj].vreg); \
209
+ VReg *Vk = &(env->fpr[vk].vreg); \
210
+ \
211
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
212
+ Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
213
+ } \
214
+ Vd->D(1) = 0; \
215
+}
216
+
217
+VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
218
+VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
219
+VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
220
+
221
+#define SSRARNS(E1, E2, T1, T2) \
222
+static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
223
+{ \
224
+ T1 shft_res; \
225
+ \
226
+ shft_res = do_vsrar_ ## E2(e2, sa); \
227
+ T2 mask; \
228
+ mask = (1ll << sh) -1; \
229
+ if (shft_res > mask) { \
230
+ return mask; \
231
+ } else if (shft_res < -(mask +1)) { \
232
+ return ~mask; \
233
+ } else { \
234
+ return shft_res; \
235
+ } \
236
+}
237
+
238
+SSRARNS(B, H, int16_t, int8_t)
239
+SSRARNS(H, W, int32_t, int16_t)
240
+SSRARNS(W, D, int64_t, int32_t)
241
+
242
+#define VSSRARN(NAME, BIT, T, E1, E2) \
243
+void HELPER(NAME)(CPULoongArchState *env, \
244
+ uint32_t vd, uint32_t vj, uint32_t vk) \
245
+{ \
246
+ int i; \
247
+ VReg *Vd = &(env->fpr[vd].vreg); \
248
+ VReg *Vj = &(env->fpr[vj].vreg); \
249
+ VReg *Vk = &(env->fpr[vk].vreg); \
250
+ \
251
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
252
+ Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
253
+ } \
254
+ Vd->D(1) = 0; \
255
+}
256
+
257
+VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
258
+VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
259
+VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
260
+
261
+#define SSRLRNU(E1, E2, T1, T2, T3) \
262
+static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
263
+{ \
264
+ T1 shft_res; \
265
+ \
266
+ shft_res = do_vsrlr_ ## E2(e2, sa); \
267
+ \
268
+ T2 mask; \
269
+ mask = (1ull << sh) -1; \
270
+ if (shft_res > mask) { \
271
+ return mask; \
272
+ } else { \
273
+ return shft_res; \
274
+ } \
275
+}
276
+
277
+SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
278
+SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
279
+SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
280
+
281
+#define VSSRLRNU(NAME, BIT, T, E1, E2) \
282
+void HELPER(NAME)(CPULoongArchState *env, \
283
+ uint32_t vd, uint32_t vj, uint32_t vk) \
284
+{ \
285
+ int i; \
286
+ VReg *Vd = &(env->fpr[vd].vreg); \
287
+ VReg *Vj = &(env->fpr[vj].vreg); \
288
+ VReg *Vk = &(env->fpr[vk].vreg); \
289
+ \
290
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
291
+ Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
292
+ } \
293
+ Vd->D(1) = 0; \
294
+}
295
+
296
+VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
297
+VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
298
+VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
299
+
300
+#define SSRARNU(E1, E2, T1, T2, T3) \
301
+static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
302
+{ \
303
+ T1 shft_res; \
304
+ \
305
+ if (e2 < 0) { \
306
+ shft_res = 0; \
307
+ } else { \
308
+ shft_res = do_vsrar_ ## E2(e2, sa); \
309
+ } \
310
+ T2 mask; \
311
+ mask = (1ull << sh) -1; \
312
+ if (shft_res > mask) { \
313
+ return mask; \
314
+ } else { \
315
+ return shft_res; \
316
+ } \
317
+}
318
+
319
+SSRARNU(B, H, uint16_t, uint8_t, int16_t)
320
+SSRARNU(H, W, uint32_t, uint16_t, int32_t)
321
+SSRARNU(W, D, uint64_t, uint32_t, int64_t)
322
+
323
+#define VSSRARNU(NAME, BIT, T, E1, E2) \
324
+void HELPER(NAME)(CPULoongArchState *env, \
325
+ uint32_t vd, uint32_t vj, uint32_t vk) \
326
+{ \
327
+ int i; \
328
+ VReg *Vd = &(env->fpr[vd].vreg); \
329
+ VReg *Vj = &(env->fpr[vj].vreg); \
330
+ VReg *Vk = &(env->fpr[vk].vreg); \
331
+ \
332
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
333
+ Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
334
+ } \
335
+ Vd->D(1) = 0; \
336
+}
337
+
338
+VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
339
+VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
340
+VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
341
+
342
+#define VSSRLRNI(NAME, BIT, E1, E2) \
343
+void HELPER(NAME)(CPULoongArchState *env, \
344
+ uint32_t vd, uint32_t vj, uint32_t imm) \
345
+{ \
346
+ int i; \
347
+ VReg temp; \
348
+ VReg *Vd = &(env->fpr[vd].vreg); \
349
+ VReg *Vj = &(env->fpr[vj].vreg); \
350
+ \
351
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
352
+ temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
353
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
354
+ } \
355
+ *Vd = temp; \
356
+}
357
+
358
+#define VSSRLRNI_Q(NAME, sh) \
359
+void HELPER(NAME)(CPULoongArchState *env, \
360
+ uint32_t vd, uint32_t vj, uint32_t imm) \
361
+{ \
362
+ Int128 shft_res1, shft_res2, mask, r1, r2; \
363
+ VReg *Vd = &(env->fpr[vd].vreg); \
364
+ VReg *Vj = &(env->fpr[vj].vreg); \
365
+ \
366
+ if (imm == 0) { \
367
+ shft_res1 = Vj->Q(0); \
368
+ shft_res2 = Vd->Q(0); \
369
+ } else { \
370
+ r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
371
+ r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
372
+ \
373
+ shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \
374
+ shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \
375
+ } \
376
+ \
377
+ mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \
378
+ \
379
+ if (int128_ult(mask, shft_res1)) { \
380
+ Vd->D(0) = int128_getlo(mask); \
381
+ }else { \
382
+ Vd->D(0) = int128_getlo(shft_res1); \
383
+ } \
384
+ \
385
+ if (int128_ult(mask, shft_res2)) { \
386
+ Vd->D(1) = int128_getlo(mask); \
387
+ }else { \
388
+ Vd->D(1) = int128_getlo(shft_res2); \
389
+ } \
390
+}
391
+
392
+VSSRLRNI(vssrlrni_b_h, 16, B, H)
393
+VSSRLRNI(vssrlrni_h_w, 32, H, W)
394
+VSSRLRNI(vssrlrni_w_d, 64, W, D)
395
+VSSRLRNI_Q(vssrlrni_d_q, 63)
396
+
397
+#define VSSRARNI(NAME, BIT, E1, E2) \
398
+void HELPER(NAME)(CPULoongArchState *env, \
399
+ uint32_t vd, uint32_t vj, uint32_t imm) \
400
+{ \
401
+ int i; \
402
+ VReg temp; \
403
+ VReg *Vd = &(env->fpr[vd].vreg); \
404
+ VReg *Vj = &(env->fpr[vj].vreg); \
405
+ \
406
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
407
+ temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
408
+ temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
409
+ } \
410
+ *Vd = temp; \
411
+}
412
+
413
+void HELPER(vssrarni_d_q)(CPULoongArchState *env,
414
+ uint32_t vd, uint32_t vj, uint32_t imm)
415
+{
416
+ Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
417
+ VReg *Vd = &(env->fpr[vd].vreg);
418
+ VReg *Vj = &(env->fpr[vj].vreg);
419
+
420
+ if (imm == 0) {
421
+ shft_res1 = Vj->Q(0);
422
+ shft_res2 = Vd->Q(0);
423
+ } else {
424
+ r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
425
+ r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
426
+
427
+ shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
428
+ shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
429
+ }
430
+
431
+ mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
432
+ mask2 = int128_lshift(int128_one(), 63);
433
+
434
+ if (int128_gt(shft_res1, mask1)) {
435
+ Vd->D(0) = int128_getlo(mask1);
436
+ } else if (int128_lt(shft_res1, int128_neg(mask2))) {
437
+ Vd->D(0) = int128_getlo(mask2);
438
+ } else {
439
+ Vd->D(0) = int128_getlo(shft_res1);
440
+ }
441
+
442
+ if (int128_gt(shft_res2, mask1)) {
443
+ Vd->D(1) = int128_getlo(mask1);
444
+ } else if (int128_lt(shft_res2, int128_neg(mask2))) {
445
+ Vd->D(1) = int128_getlo(mask2);
446
+ } else {
447
+ Vd->D(1) = int128_getlo(shft_res2);
448
+ }
449
+}
450
+
451
+VSSRARNI(vssrarni_b_h, 16, B, H)
452
+VSSRARNI(vssrarni_h_w, 32, H, W)
453
+VSSRARNI(vssrarni_w_d, 64, W, D)
454
+
455
+#define VSSRLRNUI(NAME, BIT, E1, E2) \
456
+void HELPER(NAME)(CPULoongArchState *env, \
457
+ uint32_t vd, uint32_t vj, uint32_t imm) \
458
+{ \
459
+ int i; \
460
+ VReg temp; \
461
+ VReg *Vd = &(env->fpr[vd].vreg); \
462
+ VReg *Vj = &(env->fpr[vj].vreg); \
463
+ \
464
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
465
+ temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \
466
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
467
+ } \
468
+ *Vd = temp; \
469
+}
470
+
471
+VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
472
+VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
473
+VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
474
+VSSRLRNI_Q(vssrlrni_du_q, 64)
475
+
476
+#define VSSRARNUI(NAME, BIT, E1, E2) \
477
+void HELPER(NAME)(CPULoongArchState *env, \
478
+ uint32_t vd, uint32_t vj, uint32_t imm) \
479
+{ \
480
+ int i; \
481
+ VReg temp; \
482
+ VReg *Vd = &(env->fpr[vd].vreg); \
483
+ VReg *Vj = &(env->fpr[vj].vreg); \
484
+ \
485
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
486
+ temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \
487
+ temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
488
+ } \
489
+ *Vd = temp; \
490
+}
491
+
492
+void HELPER(vssrarni_du_q)(CPULoongArchState *env,
493
+ uint32_t vd, uint32_t vj, uint32_t imm)
494
+{
495
+ Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
496
+ VReg *Vd = &(env->fpr[vd].vreg);
497
+ VReg *Vj = &(env->fpr[vj].vreg);
498
+
499
+ if (imm == 0) {
500
+ shft_res1 = Vj->Q(0);
501
+ shft_res2 = Vd->Q(0);
502
+ } else {
503
+ r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
504
+ r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
505
+
506
+ shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
507
+ shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
508
+ }
509
+
510
+ if (int128_lt(Vj->Q(0), int128_zero())) {
511
+ shft_res1 = int128_zero();
512
+ }
513
+ if (int128_lt(Vd->Q(0), int128_zero())) {
514
+ shft_res2 = int128_zero();
515
+ }
516
+
517
+ mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
518
+ mask2 = int128_lshift(int128_one(), 64);
519
+
520
+ if (int128_gt(shft_res1, mask1)) {
521
+ Vd->D(0) = int128_getlo(mask1);
522
+ } else if (int128_lt(shft_res1, int128_neg(mask2))) {
523
+ Vd->D(0) = int128_getlo(mask2);
524
+ } else {
525
+ Vd->D(0) = int128_getlo(shft_res1);
526
+ }
527
+
528
+ if (int128_gt(shft_res2, mask1)) {
529
+ Vd->D(1) = int128_getlo(mask1);
530
+ } else if (int128_lt(shft_res2, int128_neg(mask2))) {
531
+ Vd->D(1) = int128_getlo(mask2);
532
+ } else {
533
+ Vd->D(1) = int128_getlo(shft_res2);
534
+ }
535
+}
536
+
537
+VSSRARNUI(vssrarni_bu_h, 16, B, H)
538
+VSSRARNUI(vssrarni_hu_w, 32, H, W)
539
+VSSRARNUI(vssrarni_wu_d, 64, W, D)
540
--
541
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VCLO.{B/H/W/D};
3
- VCLZ.{B/H/W/D}.
4
1
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Song Gao <gaosong@loongson.cn>
7
Message-Id: <20230504122810.4094787-30-gaosong@loongson.cn>
8
---
9
target/loongarch/disas.c | 9 ++++++
10
target/loongarch/helper.h | 9 ++++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 9 ++++++
12
target/loongarch/insns.decode | 9 ++++++
13
target/loongarch/lsx_helper.c | 31 +++++++++++++++++++++
14
5 files changed, 67 insertions(+)
15
16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/disas.c
19
+++ b/target/loongarch/disas.c
20
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vssrarni_bu_h, vv_i)
21
INSN_LSX(vssrarni_hu_w, vv_i)
22
INSN_LSX(vssrarni_wu_d, vv_i)
23
INSN_LSX(vssrarni_du_q, vv_i)
24
+
25
+INSN_LSX(vclo_b, vv)
26
+INSN_LSX(vclo_h, vv)
27
+INSN_LSX(vclo_w, vv)
28
+INSN_LSX(vclo_d, vv)
29
+INSN_LSX(vclz_b, vv)
30
+INSN_LSX(vclz_h, vv)
31
+INSN_LSX(vclz_w, vv)
32
+INSN_LSX(vclz_d, vv)
33
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/loongarch/helper.h
36
+++ b/target/loongarch/helper.h
37
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
38
DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
39
DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
40
DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
41
+
42
+DEF_HELPER_3(vclo_b, void, env, i32, i32)
43
+DEF_HELPER_3(vclo_h, void, env, i32, i32)
44
+DEF_HELPER_3(vclo_w, void, env, i32, i32)
45
+DEF_HELPER_3(vclo_d, void, env, i32, i32)
46
+DEF_HELPER_3(vclz_b, void, env, i32, i32)
47
+DEF_HELPER_3(vclz_h, void, env, i32, i32)
48
+DEF_HELPER_3(vclz_w, void, env, i32, i32)
49
+DEF_HELPER_3(vclz_d, void, env, i32, i32)
50
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
53
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
54
@@ -XXX,XX +XXX,XX @@ TRANS(vssrarni_bu_h, gen_vv_i, gen_helper_vssrarni_bu_h)
55
TRANS(vssrarni_hu_w, gen_vv_i, gen_helper_vssrarni_hu_w)
56
TRANS(vssrarni_wu_d, gen_vv_i, gen_helper_vssrarni_wu_d)
57
TRANS(vssrarni_du_q, gen_vv_i, gen_helper_vssrarni_du_q)
58
+
59
+TRANS(vclo_b, gen_vv, gen_helper_vclo_b)
60
+TRANS(vclo_h, gen_vv, gen_helper_vclo_h)
61
+TRANS(vclo_w, gen_vv, gen_helper_vclo_w)
62
+TRANS(vclo_d, gen_vv, gen_helper_vclo_d)
63
+TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
64
+TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
65
+TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
66
+TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
67
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/loongarch/insns.decode
70
+++ b/target/loongarch/insns.decode
71
@@ -XXX,XX +XXX,XX @@ vssrarni_bu_h 0111 00110110 11000 1 .... ..... ..... @vv_ui4
72
vssrarni_hu_w 0111 00110110 11001 ..... ..... ..... @vv_ui5
73
vssrarni_wu_d 0111 00110110 1101 ...... ..... ..... @vv_ui6
74
vssrarni_du_q 0111 00110110 111 ....... ..... ..... @vv_ui7
75
+
76
+vclo_b 0111 00101001 11000 00000 ..... ..... @vv
77
+vclo_h 0111 00101001 11000 00001 ..... ..... @vv
78
+vclo_w 0111 00101001 11000 00010 ..... ..... @vv
79
+vclo_d 0111 00101001 11000 00011 ..... ..... @vv
80
+vclz_b 0111 00101001 11000 00100 ..... ..... @vv
81
+vclz_h 0111 00101001 11000 00101 ..... ..... @vv
82
+vclz_w 0111 00101001 11000 00110 ..... ..... @vv
83
+vclz_d 0111 00101001 11000 00111 ..... ..... @vv
84
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/target/loongarch/lsx_helper.c
87
+++ b/target/loongarch/lsx_helper.c
88
@@ -XXX,XX +XXX,XX @@ void HELPER(vssrarni_du_q)(CPULoongArchState *env,
89
VSSRARNUI(vssrarni_bu_h, 16, B, H)
90
VSSRARNUI(vssrarni_hu_w, 32, H, W)
91
VSSRARNUI(vssrarni_wu_d, 64, W, D)
92
+
93
+#define DO_2OP(NAME, BIT, E, DO_OP) \
94
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
95
+{ \
96
+ int i; \
97
+ VReg *Vd = &(env->fpr[vd].vreg); \
98
+ VReg *Vj = &(env->fpr[vj].vreg); \
99
+ \
100
+ for (i = 0; i < LSX_LEN/BIT; i++) \
101
+ { \
102
+ Vd->E(i) = DO_OP(Vj->E(i)); \
103
+ } \
104
+}
105
+
106
+#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
107
+#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16)
108
+#define DO_CLO_W(N) (clz32(~N))
109
+#define DO_CLO_D(N) (clz64(~N))
110
+#define DO_CLZ_B(N) (clz32(N) - 24)
111
+#define DO_CLZ_H(N) (clz32(N) - 16)
112
+#define DO_CLZ_W(N) (clz32(N))
113
+#define DO_CLZ_D(N) (clz64(N))
114
+
115
+DO_2OP(vclo_b, 8, UB, DO_CLO_B)
116
+DO_2OP(vclo_h, 16, UH, DO_CLO_H)
117
+DO_2OP(vclo_w, 32, UW, DO_CLO_W)
118
+DO_2OP(vclo_d, 64, UD, DO_CLO_D)
119
+DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
120
+DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
121
+DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
122
+DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
123
--
124
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VPCNT.{B/H/W/D}.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-31-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 5 +++++
9
target/loongarch/helper.h | 5 +++++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++++
11
target/loongarch/insns.decode | 5 +++++
12
target/loongarch/lsx_helper.c | 18 ++++++++++++++++++
13
5 files changed, 38 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vclz_b, vv)
20
INSN_LSX(vclz_h, vv)
21
INSN_LSX(vclz_w, vv)
22
INSN_LSX(vclz_d, vv)
23
+
24
+INSN_LSX(vpcnt_b, vv)
25
+INSN_LSX(vpcnt_h, vv)
26
+INSN_LSX(vpcnt_w, vv)
27
+INSN_LSX(vpcnt_d, vv)
28
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/loongarch/helper.h
31
+++ b/target/loongarch/helper.h
32
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vclz_b, void, env, i32, i32)
33
DEF_HELPER_3(vclz_h, void, env, i32, i32)
34
DEF_HELPER_3(vclz_w, void, env, i32, i32)
35
DEF_HELPER_3(vclz_d, void, env, i32, i32)
36
+
37
+DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
38
+DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
39
+DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
40
+DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
41
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
44
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
45
@@ -XXX,XX +XXX,XX @@ TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
46
TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
47
TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
48
TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
49
+
50
+TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
51
+TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
52
+TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
53
+TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
54
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/loongarch/insns.decode
57
+++ b/target/loongarch/insns.decode
58
@@ -XXX,XX +XXX,XX @@ vclz_b 0111 00101001 11000 00100 ..... ..... @vv
59
vclz_h 0111 00101001 11000 00101 ..... ..... @vv
60
vclz_w 0111 00101001 11000 00110 ..... ..... @vv
61
vclz_d 0111 00101001 11000 00111 ..... ..... @vv
62
+
63
+vpcnt_b 0111 00101001 11000 01000 ..... ..... @vv
64
+vpcnt_h 0111 00101001 11000 01001 ..... ..... @vv
65
+vpcnt_w 0111 00101001 11000 01010 ..... ..... @vv
66
+vpcnt_d 0111 00101001 11000 01011 ..... ..... @vv
67
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/loongarch/lsx_helper.c
70
+++ b/target/loongarch/lsx_helper.c
71
@@ -XXX,XX +XXX,XX @@ DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
72
DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
73
DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
74
DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
75
+
76
+#define VPCNT(NAME, BIT, E, FN) \
77
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
78
+{ \
79
+ int i; \
80
+ VReg *Vd = &(env->fpr[vd].vreg); \
81
+ VReg *Vj = &(env->fpr[vj].vreg); \
82
+ \
83
+ for (i = 0; i < LSX_LEN/BIT; i++) \
84
+ { \
85
+ Vd->E(i) = FN(Vj->E(i)); \
86
+ } \
87
+}
88
+
89
+VPCNT(vpcnt_b, 8, UB, ctpop8)
90
+VPCNT(vpcnt_h, 16, UH, ctpop16)
91
+VPCNT(vpcnt_w, 32, UW, ctpop32)
92
+VPCNT(vpcnt_d, 64, UD, ctpop64)
93
--
94
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VBITCLR[I].{B/H/W/D};
3
- VBITSET[I].{B/H/W/D};
4
- VBITREV[I].{B/H/W/D}.
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-32-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 25 ++
11
target/loongarch/helper.h | 27 ++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 305 ++++++++++++++++++++
13
target/loongarch/insns.decode | 25 ++
14
target/loongarch/lsx_helper.c | 55 ++++
15
5 files changed, 437 insertions(+)
16
17
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/disas.c
20
+++ b/target/loongarch/disas.c
21
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vpcnt_b, vv)
22
INSN_LSX(vpcnt_h, vv)
23
INSN_LSX(vpcnt_w, vv)
24
INSN_LSX(vpcnt_d, vv)
25
+
26
+INSN_LSX(vbitclr_b, vvv)
27
+INSN_LSX(vbitclr_h, vvv)
28
+INSN_LSX(vbitclr_w, vvv)
29
+INSN_LSX(vbitclr_d, vvv)
30
+INSN_LSX(vbitclri_b, vv_i)
31
+INSN_LSX(vbitclri_h, vv_i)
32
+INSN_LSX(vbitclri_w, vv_i)
33
+INSN_LSX(vbitclri_d, vv_i)
34
+INSN_LSX(vbitset_b, vvv)
35
+INSN_LSX(vbitset_h, vvv)
36
+INSN_LSX(vbitset_w, vvv)
37
+INSN_LSX(vbitset_d, vvv)
38
+INSN_LSX(vbitseti_b, vv_i)
39
+INSN_LSX(vbitseti_h, vv_i)
40
+INSN_LSX(vbitseti_w, vv_i)
41
+INSN_LSX(vbitseti_d, vv_i)
42
+INSN_LSX(vbitrev_b, vvv)
43
+INSN_LSX(vbitrev_h, vvv)
44
+INSN_LSX(vbitrev_w, vvv)
45
+INSN_LSX(vbitrev_d, vvv)
46
+INSN_LSX(vbitrevi_b, vv_i)
47
+INSN_LSX(vbitrevi_h, vv_i)
48
+INSN_LSX(vbitrevi_w, vv_i)
49
+INSN_LSX(vbitrevi_d, vv_i)
50
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/helper.h
53
+++ b/target/loongarch/helper.h
54
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
55
DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
56
DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
57
DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
58
+
59
+DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
60
+DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
61
+DEF_HELPER_FLAGS_4(vbitclr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
62
+DEF_HELPER_FLAGS_4(vbitclr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
63
+DEF_HELPER_FLAGS_4(vbitclri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
64
+DEF_HELPER_FLAGS_4(vbitclri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
65
+DEF_HELPER_FLAGS_4(vbitclri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
66
+DEF_HELPER_FLAGS_4(vbitclri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
67
+
68
+DEF_HELPER_FLAGS_4(vbitset_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
69
+DEF_HELPER_FLAGS_4(vbitset_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
70
+DEF_HELPER_FLAGS_4(vbitset_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
71
+DEF_HELPER_FLAGS_4(vbitset_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
72
+DEF_HELPER_FLAGS_4(vbitseti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
73
+DEF_HELPER_FLAGS_4(vbitseti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
74
+DEF_HELPER_FLAGS_4(vbitseti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
75
+DEF_HELPER_FLAGS_4(vbitseti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
76
+
77
+DEF_HELPER_FLAGS_4(vbitrev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
78
+DEF_HELPER_FLAGS_4(vbitrev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
79
+DEF_HELPER_FLAGS_4(vbitrev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
80
+DEF_HELPER_FLAGS_4(vbitrev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
81
+DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
82
+DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
83
+DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
84
+DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
85
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
88
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
89
@@ -XXX,XX +XXX,XX @@ TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
90
TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
91
TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
92
TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
93
+
94
+static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
95
+ void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
96
+{
97
+ TCGv_vec mask, lsh, t1, one;
98
+
99
+ lsh = tcg_temp_new_vec_matching(t);
100
+ t1 = tcg_temp_new_vec_matching(t);
101
+ mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
102
+ one = tcg_constant_vec_matching(t, vece, 1);
103
+
104
+ tcg_gen_and_vec(vece, lsh, b, mask);
105
+ tcg_gen_shlv_vec(vece, t1, one, lsh);
106
+ func(vece, t, a, t1);
107
+}
108
+
109
+static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
110
+{
111
+ do_vbit(vece, t, a, b, tcg_gen_andc_vec);
112
+}
113
+
114
+static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
115
+{
116
+ do_vbit(vece, t, a, b, tcg_gen_or_vec);
117
+}
118
+
119
+static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
120
+{
121
+ do_vbit(vece, t, a, b, tcg_gen_xor_vec);
122
+}
123
+
124
+static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
125
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
126
+{
127
+ static const TCGOpcode vecop_list[] = {
128
+ INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
129
+ };
130
+ static const GVecGen3 op[4] = {
131
+ {
132
+ .fniv = gen_vbitclr,
133
+ .fno = gen_helper_vbitclr_b,
134
+ .opt_opc = vecop_list,
135
+ .vece = MO_8
136
+ },
137
+ {
138
+ .fniv = gen_vbitclr,
139
+ .fno = gen_helper_vbitclr_h,
140
+ .opt_opc = vecop_list,
141
+ .vece = MO_16
142
+ },
143
+ {
144
+ .fniv = gen_vbitclr,
145
+ .fno = gen_helper_vbitclr_w,
146
+ .opt_opc = vecop_list,
147
+ .vece = MO_32
148
+ },
149
+ {
150
+ .fniv = gen_vbitclr,
151
+ .fno = gen_helper_vbitclr_d,
152
+ .opt_opc = vecop_list,
153
+ .vece = MO_64
154
+ },
155
+ };
156
+
157
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
158
+}
159
+
160
+TRANS(vbitclr_b, gvec_vvv, MO_8, do_vbitclr)
161
+TRANS(vbitclr_h, gvec_vvv, MO_16, do_vbitclr)
162
+TRANS(vbitclr_w, gvec_vvv, MO_32, do_vbitclr)
163
+TRANS(vbitclr_d, gvec_vvv, MO_64, do_vbitclr)
164
+
165
+static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
166
+ void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
167
+{
168
+ int lsh;
169
+ TCGv_vec t1, one;
170
+
171
+ lsh = imm & ((8 << vece) -1);
172
+ t1 = tcg_temp_new_vec_matching(t);
173
+ one = tcg_constant_vec_matching(t, vece, 1);
174
+
175
+ tcg_gen_shli_vec(vece, t1, one, lsh);
176
+ func(vece, t, a, t1);
177
+}
178
+
179
+static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
180
+{
181
+ do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
182
+}
183
+
184
+static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
185
+{
186
+ do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
187
+}
188
+
189
+static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
190
+{
191
+ do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
192
+}
193
+
194
+static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
195
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
196
+{
197
+ static const TCGOpcode vecop_list[] = {
198
+ INDEX_op_shli_vec, INDEX_op_andc_vec, 0
199
+ };
200
+ static const GVecGen2i op[4] = {
201
+ {
202
+ .fniv = gen_vbitclri,
203
+ .fnoi = gen_helper_vbitclri_b,
204
+ .opt_opc = vecop_list,
205
+ .vece = MO_8
206
+ },
207
+ {
208
+ .fniv = gen_vbitclri,
209
+ .fnoi = gen_helper_vbitclri_h,
210
+ .opt_opc = vecop_list,
211
+ .vece = MO_16
212
+ },
213
+ {
214
+ .fniv = gen_vbitclri,
215
+ .fnoi = gen_helper_vbitclri_w,
216
+ .opt_opc = vecop_list,
217
+ .vece = MO_32
218
+ },
219
+ {
220
+ .fniv = gen_vbitclri,
221
+ .fnoi = gen_helper_vbitclri_d,
222
+ .opt_opc = vecop_list,
223
+ .vece = MO_64
224
+ },
225
+ };
226
+
227
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
228
+}
229
+
230
+TRANS(vbitclri_b, gvec_vv_i, MO_8, do_vbitclri)
231
+TRANS(vbitclri_h, gvec_vv_i, MO_16, do_vbitclri)
232
+TRANS(vbitclri_w, gvec_vv_i, MO_32, do_vbitclri)
233
+TRANS(vbitclri_d, gvec_vv_i, MO_64, do_vbitclri)
234
+
235
+static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
236
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
237
+{
238
+ static const TCGOpcode vecop_list[] = {
239
+ INDEX_op_shlv_vec, 0
240
+ };
241
+ static const GVecGen3 op[4] = {
242
+ {
243
+ .fniv = gen_vbitset,
244
+ .fno = gen_helper_vbitset_b,
245
+ .opt_opc = vecop_list,
246
+ .vece = MO_8
247
+ },
248
+ {
249
+ .fniv = gen_vbitset,
250
+ .fno = gen_helper_vbitset_h,
251
+ .opt_opc = vecop_list,
252
+ .vece = MO_16
253
+ },
254
+ {
255
+ .fniv = gen_vbitset,
256
+ .fno = gen_helper_vbitset_w,
257
+ .opt_opc = vecop_list,
258
+ .vece = MO_32
259
+ },
260
+ {
261
+ .fniv = gen_vbitset,
262
+ .fno = gen_helper_vbitset_d,
263
+ .opt_opc = vecop_list,
264
+ .vece = MO_64
265
+ },
266
+ };
267
+
268
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
269
+}
270
+
271
+TRANS(vbitset_b, gvec_vvv, MO_8, do_vbitset)
272
+TRANS(vbitset_h, gvec_vvv, MO_16, do_vbitset)
273
+TRANS(vbitset_w, gvec_vvv, MO_32, do_vbitset)
274
+TRANS(vbitset_d, gvec_vvv, MO_64, do_vbitset)
275
+
276
+static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
277
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
278
+{
279
+ static const TCGOpcode vecop_list[] = {
280
+ INDEX_op_shli_vec, 0
281
+ };
282
+ static const GVecGen2i op[4] = {
283
+ {
284
+ .fniv = gen_vbitseti,
285
+ .fnoi = gen_helper_vbitseti_b,
286
+ .opt_opc = vecop_list,
287
+ .vece = MO_8
288
+ },
289
+ {
290
+ .fniv = gen_vbitseti,
291
+ .fnoi = gen_helper_vbitseti_h,
292
+ .opt_opc = vecop_list,
293
+ .vece = MO_16
294
+ },
295
+ {
296
+ .fniv = gen_vbitseti,
297
+ .fnoi = gen_helper_vbitseti_w,
298
+ .opt_opc = vecop_list,
299
+ .vece = MO_32
300
+ },
301
+ {
302
+ .fniv = gen_vbitseti,
303
+ .fnoi = gen_helper_vbitseti_d,
304
+ .opt_opc = vecop_list,
305
+ .vece = MO_64
306
+ },
307
+ };
308
+
309
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
310
+}
311
+
312
+TRANS(vbitseti_b, gvec_vv_i, MO_8, do_vbitseti)
313
+TRANS(vbitseti_h, gvec_vv_i, MO_16, do_vbitseti)
314
+TRANS(vbitseti_w, gvec_vv_i, MO_32, do_vbitseti)
315
+TRANS(vbitseti_d, gvec_vv_i, MO_64, do_vbitseti)
316
+
317
+static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
318
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
319
+{
320
+ static const TCGOpcode vecop_list[] = {
321
+ INDEX_op_shlv_vec, 0
322
+ };
323
+ static const GVecGen3 op[4] = {
324
+ {
325
+ .fniv = gen_vbitrev,
326
+ .fno = gen_helper_vbitrev_b,
327
+ .opt_opc = vecop_list,
328
+ .vece = MO_8
329
+ },
330
+ {
331
+ .fniv = gen_vbitrev,
332
+ .fno = gen_helper_vbitrev_h,
333
+ .opt_opc = vecop_list,
334
+ .vece = MO_16
335
+ },
336
+ {
337
+ .fniv = gen_vbitrev,
338
+ .fno = gen_helper_vbitrev_w,
339
+ .opt_opc = vecop_list,
340
+ .vece = MO_32
341
+ },
342
+ {
343
+ .fniv = gen_vbitrev,
344
+ .fno = gen_helper_vbitrev_d,
345
+ .opt_opc = vecop_list,
346
+ .vece = MO_64
347
+ },
348
+ };
349
+
350
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
351
+}
352
+
353
+TRANS(vbitrev_b, gvec_vvv, MO_8, do_vbitrev)
354
+TRANS(vbitrev_h, gvec_vvv, MO_16, do_vbitrev)
355
+TRANS(vbitrev_w, gvec_vvv, MO_32, do_vbitrev)
356
+TRANS(vbitrev_d, gvec_vvv, MO_64, do_vbitrev)
357
+
358
+static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
359
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
360
+{
361
+ static const TCGOpcode vecop_list[] = {
362
+ INDEX_op_shli_vec, 0
363
+ };
364
+ static const GVecGen2i op[4] = {
365
+ {
366
+ .fniv = gen_vbitrevi,
367
+ .fnoi = gen_helper_vbitrevi_b,
368
+ .opt_opc = vecop_list,
369
+ .vece = MO_8
370
+ },
371
+ {
372
+ .fniv = gen_vbitrevi,
373
+ .fnoi = gen_helper_vbitrevi_h,
374
+ .opt_opc = vecop_list,
375
+ .vece = MO_16
376
+ },
377
+ {
378
+ .fniv = gen_vbitrevi,
379
+ .fnoi = gen_helper_vbitrevi_w,
380
+ .opt_opc = vecop_list,
381
+ .vece = MO_32
382
+ },
383
+ {
384
+ .fniv = gen_vbitrevi,
385
+ .fnoi = gen_helper_vbitrevi_d,
386
+ .opt_opc = vecop_list,
387
+ .vece = MO_64
388
+ },
389
+ };
390
+
391
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
392
+}
393
+
394
+TRANS(vbitrevi_b, gvec_vv_i, MO_8, do_vbitrevi)
395
+TRANS(vbitrevi_h, gvec_vv_i, MO_16, do_vbitrevi)
396
+TRANS(vbitrevi_w, gvec_vv_i, MO_32, do_vbitrevi)
397
+TRANS(vbitrevi_d, gvec_vv_i, MO_64, do_vbitrevi)
398
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
399
index XXXXXXX..XXXXXXX 100644
400
--- a/target/loongarch/insns.decode
401
+++ b/target/loongarch/insns.decode
402
@@ -XXX,XX +XXX,XX @@ vpcnt_b 0111 00101001 11000 01000 ..... ..... @vv
403
vpcnt_h 0111 00101001 11000 01001 ..... ..... @vv
404
vpcnt_w 0111 00101001 11000 01010 ..... ..... @vv
405
vpcnt_d 0111 00101001 11000 01011 ..... ..... @vv
406
+
407
+vbitclr_b 0111 00010000 11000 ..... ..... ..... @vvv
408
+vbitclr_h 0111 00010000 11001 ..... ..... ..... @vvv
409
+vbitclr_w 0111 00010000 11010 ..... ..... ..... @vvv
410
+vbitclr_d 0111 00010000 11011 ..... ..... ..... @vvv
411
+vbitclri_b 0111 00110001 00000 01 ... ..... ..... @vv_ui3
412
+vbitclri_h 0111 00110001 00000 1 .... ..... ..... @vv_ui4
413
+vbitclri_w 0111 00110001 00001 ..... ..... ..... @vv_ui5
414
+vbitclri_d 0111 00110001 0001 ...... ..... ..... @vv_ui6
415
+vbitset_b 0111 00010000 11100 ..... ..... ..... @vvv
416
+vbitset_h 0111 00010000 11101 ..... ..... ..... @vvv
417
+vbitset_w 0111 00010000 11110 ..... ..... ..... @vvv
418
+vbitset_d 0111 00010000 11111 ..... ..... ..... @vvv
419
+vbitseti_b 0111 00110001 01000 01 ... ..... ..... @vv_ui3
420
+vbitseti_h 0111 00110001 01000 1 .... ..... ..... @vv_ui4
421
+vbitseti_w 0111 00110001 01001 ..... ..... ..... @vv_ui5
422
+vbitseti_d 0111 00110001 0101 ...... ..... ..... @vv_ui6
423
+vbitrev_b 0111 00010001 00000 ..... ..... ..... @vvv
424
+vbitrev_h 0111 00010001 00001 ..... ..... ..... @vvv
425
+vbitrev_w 0111 00010001 00010 ..... ..... ..... @vvv
426
+vbitrev_d 0111 00010001 00011 ..... ..... ..... @vvv
427
+vbitrevi_b 0111 00110001 10000 01 ... ..... ..... @vv_ui3
428
+vbitrevi_h 0111 00110001 10000 1 .... ..... ..... @vv_ui4
429
+vbitrevi_w 0111 00110001 10001 ..... ..... ..... @vv_ui5
430
+vbitrevi_d 0111 00110001 1001 ...... ..... ..... @vv_ui6
431
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
432
index XXXXXXX..XXXXXXX 100644
433
--- a/target/loongarch/lsx_helper.c
434
+++ b/target/loongarch/lsx_helper.c
435
@@ -XXX,XX +XXX,XX @@ VPCNT(vpcnt_b, 8, UB, ctpop8)
436
VPCNT(vpcnt_h, 16, UH, ctpop16)
437
VPCNT(vpcnt_w, 32, UW, ctpop32)
438
VPCNT(vpcnt_d, 64, UD, ctpop64)
439
+
440
+#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
441
+#define DO_BITSET(a, bit) (a | 1ull << bit)
442
+#define DO_BITREV(a, bit) (a ^ (1ull << bit))
443
+
444
+#define DO_BIT(NAME, BIT, E, DO_OP) \
445
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
446
+{ \
447
+ int i; \
448
+ VReg *Vd = (VReg *)vd; \
449
+ VReg *Vj = (VReg *)vj; \
450
+ VReg *Vk = (VReg *)vk; \
451
+ \
452
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
453
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
454
+ } \
455
+}
456
+
457
+DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
458
+DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
459
+DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
460
+DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
461
+DO_BIT(vbitset_b, 8, UB, DO_BITSET)
462
+DO_BIT(vbitset_h, 16, UH, DO_BITSET)
463
+DO_BIT(vbitset_w, 32, UW, DO_BITSET)
464
+DO_BIT(vbitset_d, 64, UD, DO_BITSET)
465
+DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
466
+DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
467
+DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
468
+DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
469
+
470
+#define DO_BITI(NAME, BIT, E, DO_OP) \
471
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
472
+{ \
473
+ int i; \
474
+ VReg *Vd = (VReg *)vd; \
475
+ VReg *Vj = (VReg *)vj; \
476
+ \
477
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
478
+ Vd->E(i) = DO_OP(Vj->E(i), imm); \
479
+ } \
480
+}
481
+
482
+DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
483
+DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
484
+DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
485
+DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
486
+DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
487
+DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
488
+DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
489
+DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
490
+DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
491
+DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
492
+DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
493
+DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
494
--
495
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VFRSTP[I].{B/H}.
3
1
4
Acked-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-33-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 5 +++
9
target/loongarch/helper.h | 5 +++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 5 +++
11
target/loongarch/insns.decode | 5 +++
12
target/loongarch/lsx_helper.c | 41 +++++++++++++++++++++
13
5 files changed, 61 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vbitrevi_b, vv_i)
20
INSN_LSX(vbitrevi_h, vv_i)
21
INSN_LSX(vbitrevi_w, vv_i)
22
INSN_LSX(vbitrevi_d, vv_i)
23
+
24
+INSN_LSX(vfrstp_b, vvv)
25
+INSN_LSX(vfrstp_h, vvv)
26
+INSN_LSX(vfrstpi_b, vv_i)
27
+INSN_LSX(vfrstpi_h, vv_i)
28
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/loongarch/helper.h
31
+++ b/target/loongarch/helper.h
32
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
33
DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
34
DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
35
DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
36
+
37
+DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
38
+DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
39
+DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
40
+DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
41
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
44
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
45
@@ -XXX,XX +XXX,XX @@ TRANS(vbitrevi_b, gvec_vv_i, MO_8, do_vbitrevi)
46
TRANS(vbitrevi_h, gvec_vv_i, MO_16, do_vbitrevi)
47
TRANS(vbitrevi_w, gvec_vv_i, MO_32, do_vbitrevi)
48
TRANS(vbitrevi_d, gvec_vv_i, MO_64, do_vbitrevi)
49
+
50
+TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b)
51
+TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h)
52
+TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b)
53
+TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h)
54
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/loongarch/insns.decode
57
+++ b/target/loongarch/insns.decode
58
@@ -XXX,XX +XXX,XX @@ vbitrevi_b 0111 00110001 10000 01 ... ..... ..... @vv_ui3
59
vbitrevi_h 0111 00110001 10000 1 .... ..... ..... @vv_ui4
60
vbitrevi_w 0111 00110001 10001 ..... ..... ..... @vv_ui5
61
vbitrevi_d 0111 00110001 1001 ...... ..... ..... @vv_ui6
62
+
63
+vfrstp_b 0111 00010010 10110 ..... ..... ..... @vvv
64
+vfrstp_h 0111 00010010 10111 ..... ..... ..... @vvv
65
+vfrstpi_b 0111 00101001 10100 ..... ..... ..... @vv_ui5
66
+vfrstpi_h 0111 00101001 10101 ..... ..... ..... @vv_ui5
67
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/loongarch/lsx_helper.c
70
+++ b/target/loongarch/lsx_helper.c
71
@@ -XXX,XX +XXX,XX @@ DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
72
DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
73
DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
74
DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
75
+
76
+#define VFRSTP(NAME, BIT, MASK, E) \
77
+void HELPER(NAME)(CPULoongArchState *env, \
78
+ uint32_t vd, uint32_t vj, uint32_t vk) \
79
+{ \
80
+ int i, m; \
81
+ VReg *Vd = &(env->fpr[vd].vreg); \
82
+ VReg *Vj = &(env->fpr[vj].vreg); \
83
+ VReg *Vk = &(env->fpr[vk].vreg); \
84
+ \
85
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
86
+ if (Vj->E(i) < 0) { \
87
+ break; \
88
+ } \
89
+ } \
90
+ m = Vk->E(0) & MASK; \
91
+ Vd->E(m) = i; \
92
+}
93
+
94
+VFRSTP(vfrstp_b, 8, 0xf, B)
95
+VFRSTP(vfrstp_h, 16, 0x7, H)
96
+
97
+#define VFRSTPI(NAME, BIT, E) \
98
+void HELPER(NAME)(CPULoongArchState *env, \
99
+ uint32_t vd, uint32_t vj, uint32_t imm) \
100
+{ \
101
+ int i, m; \
102
+ VReg *Vd = &(env->fpr[vd].vreg); \
103
+ VReg *Vj = &(env->fpr[vj].vreg); \
104
+ \
105
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
106
+ if (Vj->E(i) < 0) { \
107
+ break; \
108
+ } \
109
+ } \
110
+ m = imm % (LSX_LEN/BIT); \
111
+ Vd->E(m) = i; \
112
+}
113
+
114
+VFRSTPI(vfrstpi_b, 8, B)
115
+VFRSTPI(vfrstpi_h, 16, H)
116
--
117
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VFCVT{L/H}.{S.H/D.S};
3
- VFCVT.{H.S/S.D};
4
- VFRINT[{RNE/RZ/RP/RM}].{S/D};
5
- VFTINT[{RNE/RZ/RP/RM}].{W.S/L.D};
6
- VFTINT[RZ].{WU.S/LU.D};
7
- VFTINT[{RNE/RZ/RP/RM}].W.D;
8
- VFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S;
9
- VFFINT.{S.W/D.L}[U];
10
- VFFINT.S.L, VFFINT{L/H}.D.W.
11
1
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Song Gao <gaosong@loongson.cn>
14
Message-Id: <20230504122810.4094787-35-gaosong@loongson.cn>
15
---
16
target/loongarch/disas.c | 56 +++
17
target/loongarch/helper.h | 56 +++
18
target/loongarch/insn_trans/trans_lsx.c.inc | 56 +++
19
target/loongarch/insns.decode | 56 +++
20
target/loongarch/lsx_helper.c | 376 ++++++++++++++++++++
21
5 files changed, 600 insertions(+)
22
23
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/loongarch/disas.c
26
+++ b/target/loongarch/disas.c
27
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vfrecip_s, vv)
28
INSN_LSX(vfrecip_d, vv)
29
INSN_LSX(vfrsqrt_s, vv)
30
INSN_LSX(vfrsqrt_d, vv)
31
+
32
+INSN_LSX(vfcvtl_s_h, vv)
33
+INSN_LSX(vfcvth_s_h, vv)
34
+INSN_LSX(vfcvtl_d_s, vv)
35
+INSN_LSX(vfcvth_d_s, vv)
36
+INSN_LSX(vfcvt_h_s, vvv)
37
+INSN_LSX(vfcvt_s_d, vvv)
38
+
39
+INSN_LSX(vfrint_s, vv)
40
+INSN_LSX(vfrint_d, vv)
41
+INSN_LSX(vfrintrm_s, vv)
42
+INSN_LSX(vfrintrm_d, vv)
43
+INSN_LSX(vfrintrp_s, vv)
44
+INSN_LSX(vfrintrp_d, vv)
45
+INSN_LSX(vfrintrz_s, vv)
46
+INSN_LSX(vfrintrz_d, vv)
47
+INSN_LSX(vfrintrne_s, vv)
48
+INSN_LSX(vfrintrne_d, vv)
49
+
50
+INSN_LSX(vftint_w_s, vv)
51
+INSN_LSX(vftint_l_d, vv)
52
+INSN_LSX(vftintrm_w_s, vv)
53
+INSN_LSX(vftintrm_l_d, vv)
54
+INSN_LSX(vftintrp_w_s, vv)
55
+INSN_LSX(vftintrp_l_d, vv)
56
+INSN_LSX(vftintrz_w_s, vv)
57
+INSN_LSX(vftintrz_l_d, vv)
58
+INSN_LSX(vftintrne_w_s, vv)
59
+INSN_LSX(vftintrne_l_d, vv)
60
+INSN_LSX(vftint_wu_s, vv)
61
+INSN_LSX(vftint_lu_d, vv)
62
+INSN_LSX(vftintrz_wu_s, vv)
63
+INSN_LSX(vftintrz_lu_d, vv)
64
+INSN_LSX(vftint_w_d, vvv)
65
+INSN_LSX(vftintrm_w_d, vvv)
66
+INSN_LSX(vftintrp_w_d, vvv)
67
+INSN_LSX(vftintrz_w_d, vvv)
68
+INSN_LSX(vftintrne_w_d, vvv)
69
+INSN_LSX(vftintl_l_s, vv)
70
+INSN_LSX(vftinth_l_s, vv)
71
+INSN_LSX(vftintrml_l_s, vv)
72
+INSN_LSX(vftintrmh_l_s, vv)
73
+INSN_LSX(vftintrpl_l_s, vv)
74
+INSN_LSX(vftintrph_l_s, vv)
75
+INSN_LSX(vftintrzl_l_s, vv)
76
+INSN_LSX(vftintrzh_l_s, vv)
77
+INSN_LSX(vftintrnel_l_s, vv)
78
+INSN_LSX(vftintrneh_l_s, vv)
79
+
80
+INSN_LSX(vffint_s_w, vv)
81
+INSN_LSX(vffint_s_wu, vv)
82
+INSN_LSX(vffint_d_l, vv)
83
+INSN_LSX(vffint_d_lu, vv)
84
+INSN_LSX(vffintl_d_w, vv)
85
+INSN_LSX(vffinth_d_w, vv)
86
+INSN_LSX(vffint_s_l, vvv)
87
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/loongarch/helper.h
90
+++ b/target/loongarch/helper.h
91
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
92
DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
93
DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
94
DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
95
+
96
+DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
97
+DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
98
+DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
99
+DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
100
+DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32)
101
+DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32)
102
+
103
+DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
104
+DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
105
+DEF_HELPER_3(vfrintrz_s, void, env, i32, i32)
106
+DEF_HELPER_3(vfrintrz_d, void, env, i32, i32)
107
+DEF_HELPER_3(vfrintrp_s, void, env, i32, i32)
108
+DEF_HELPER_3(vfrintrp_d, void, env, i32, i32)
109
+DEF_HELPER_3(vfrintrm_s, void, env, i32, i32)
110
+DEF_HELPER_3(vfrintrm_d, void, env, i32, i32)
111
+DEF_HELPER_3(vfrint_s, void, env, i32, i32)
112
+DEF_HELPER_3(vfrint_d, void, env, i32, i32)
113
+
114
+DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32)
115
+DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32)
116
+DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32)
117
+DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32)
118
+DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32)
119
+DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32)
120
+DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32)
121
+DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32)
122
+DEF_HELPER_3(vftint_w_s, void, env, i32, i32)
123
+DEF_HELPER_3(vftint_l_d, void, env, i32, i32)
124
+DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
125
+DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
126
+DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
127
+DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
128
+DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32)
129
+DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32)
130
+DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32)
131
+DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32)
132
+DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32)
133
+DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
134
+DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
135
+DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
136
+DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32)
137
+DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32)
138
+DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32)
139
+DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32)
140
+DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32)
141
+DEF_HELPER_3(vftintl_l_s, void, env, i32, i32)
142
+DEF_HELPER_3(vftinth_l_s, void, env, i32, i32)
143
+
144
+DEF_HELPER_3(vffint_s_w, void, env, i32, i32)
145
+DEF_HELPER_3(vffint_d_l, void, env, i32, i32)
146
+DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
147
+DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
148
+DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
149
+DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
150
+DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
151
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
154
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
155
@@ -XXX,XX +XXX,XX @@ TRANS(vfrecip_s, gen_vv, gen_helper_vfrecip_s)
156
TRANS(vfrecip_d, gen_vv, gen_helper_vfrecip_d)
157
TRANS(vfrsqrt_s, gen_vv, gen_helper_vfrsqrt_s)
158
TRANS(vfrsqrt_d, gen_vv, gen_helper_vfrsqrt_d)
159
+
160
+TRANS(vfcvtl_s_h, gen_vv, gen_helper_vfcvtl_s_h)
161
+TRANS(vfcvth_s_h, gen_vv, gen_helper_vfcvth_s_h)
162
+TRANS(vfcvtl_d_s, gen_vv, gen_helper_vfcvtl_d_s)
163
+TRANS(vfcvth_d_s, gen_vv, gen_helper_vfcvth_d_s)
164
+TRANS(vfcvt_h_s, gen_vvv, gen_helper_vfcvt_h_s)
165
+TRANS(vfcvt_s_d, gen_vvv, gen_helper_vfcvt_s_d)
166
+
167
+TRANS(vfrintrne_s, gen_vv, gen_helper_vfrintrne_s)
168
+TRANS(vfrintrne_d, gen_vv, gen_helper_vfrintrne_d)
169
+TRANS(vfrintrz_s, gen_vv, gen_helper_vfrintrz_s)
170
+TRANS(vfrintrz_d, gen_vv, gen_helper_vfrintrz_d)
171
+TRANS(vfrintrp_s, gen_vv, gen_helper_vfrintrp_s)
172
+TRANS(vfrintrp_d, gen_vv, gen_helper_vfrintrp_d)
173
+TRANS(vfrintrm_s, gen_vv, gen_helper_vfrintrm_s)
174
+TRANS(vfrintrm_d, gen_vv, gen_helper_vfrintrm_d)
175
+TRANS(vfrint_s, gen_vv, gen_helper_vfrint_s)
176
+TRANS(vfrint_d, gen_vv, gen_helper_vfrint_d)
177
+
178
+TRANS(vftintrne_w_s, gen_vv, gen_helper_vftintrne_w_s)
179
+TRANS(vftintrne_l_d, gen_vv, gen_helper_vftintrne_l_d)
180
+TRANS(vftintrz_w_s, gen_vv, gen_helper_vftintrz_w_s)
181
+TRANS(vftintrz_l_d, gen_vv, gen_helper_vftintrz_l_d)
182
+TRANS(vftintrp_w_s, gen_vv, gen_helper_vftintrp_w_s)
183
+TRANS(vftintrp_l_d, gen_vv, gen_helper_vftintrp_l_d)
184
+TRANS(vftintrm_w_s, gen_vv, gen_helper_vftintrm_w_s)
185
+TRANS(vftintrm_l_d, gen_vv, gen_helper_vftintrm_l_d)
186
+TRANS(vftint_w_s, gen_vv, gen_helper_vftint_w_s)
187
+TRANS(vftint_l_d, gen_vv, gen_helper_vftint_l_d)
188
+TRANS(vftintrz_wu_s, gen_vv, gen_helper_vftintrz_wu_s)
189
+TRANS(vftintrz_lu_d, gen_vv, gen_helper_vftintrz_lu_d)
190
+TRANS(vftint_wu_s, gen_vv, gen_helper_vftint_wu_s)
191
+TRANS(vftint_lu_d, gen_vv, gen_helper_vftint_lu_d)
192
+TRANS(vftintrne_w_d, gen_vvv, gen_helper_vftintrne_w_d)
193
+TRANS(vftintrz_w_d, gen_vvv, gen_helper_vftintrz_w_d)
194
+TRANS(vftintrp_w_d, gen_vvv, gen_helper_vftintrp_w_d)
195
+TRANS(vftintrm_w_d, gen_vvv, gen_helper_vftintrm_w_d)
196
+TRANS(vftint_w_d, gen_vvv, gen_helper_vftint_w_d)
197
+TRANS(vftintrnel_l_s, gen_vv, gen_helper_vftintrnel_l_s)
198
+TRANS(vftintrneh_l_s, gen_vv, gen_helper_vftintrneh_l_s)
199
+TRANS(vftintrzl_l_s, gen_vv, gen_helper_vftintrzl_l_s)
200
+TRANS(vftintrzh_l_s, gen_vv, gen_helper_vftintrzh_l_s)
201
+TRANS(vftintrpl_l_s, gen_vv, gen_helper_vftintrpl_l_s)
202
+TRANS(vftintrph_l_s, gen_vv, gen_helper_vftintrph_l_s)
203
+TRANS(vftintrml_l_s, gen_vv, gen_helper_vftintrml_l_s)
204
+TRANS(vftintrmh_l_s, gen_vv, gen_helper_vftintrmh_l_s)
205
+TRANS(vftintl_l_s, gen_vv, gen_helper_vftintl_l_s)
206
+TRANS(vftinth_l_s, gen_vv, gen_helper_vftinth_l_s)
207
+
208
+TRANS(vffint_s_w, gen_vv, gen_helper_vffint_s_w)
209
+TRANS(vffint_d_l, gen_vv, gen_helper_vffint_d_l)
210
+TRANS(vffint_s_wu, gen_vv, gen_helper_vffint_s_wu)
211
+TRANS(vffint_d_lu, gen_vv, gen_helper_vffint_d_lu)
212
+TRANS(vffintl_d_w, gen_vv, gen_helper_vffintl_d_w)
213
+TRANS(vffinth_d_w, gen_vv, gen_helper_vffinth_d_w)
214
+TRANS(vffint_s_l, gen_vvv, gen_helper_vffint_s_l)
215
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
216
index XXXXXXX..XXXXXXX 100644
217
--- a/target/loongarch/insns.decode
218
+++ b/target/loongarch/insns.decode
219
@@ -XXX,XX +XXX,XX @@ vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv
220
vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv
221
vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv
222
vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv
223
+
224
+vfcvtl_s_h 0111 00101001 11011 11010 ..... ..... @vv
225
+vfcvth_s_h 0111 00101001 11011 11011 ..... ..... @vv
226
+vfcvtl_d_s 0111 00101001 11011 11100 ..... ..... @vv
227
+vfcvth_d_s 0111 00101001 11011 11101 ..... ..... @vv
228
+vfcvt_h_s 0111 00010100 01100 ..... ..... ..... @vvv
229
+vfcvt_s_d 0111 00010100 01101 ..... ..... ..... @vvv
230
+
231
+vfrint_s 0111 00101001 11010 01101 ..... ..... @vv
232
+vfrint_d 0111 00101001 11010 01110 ..... ..... @vv
233
+vfrintrm_s 0111 00101001 11010 10001 ..... ..... @vv
234
+vfrintrm_d 0111 00101001 11010 10010 ..... ..... @vv
235
+vfrintrp_s 0111 00101001 11010 10101 ..... ..... @vv
236
+vfrintrp_d 0111 00101001 11010 10110 ..... ..... @vv
237
+vfrintrz_s 0111 00101001 11010 11001 ..... ..... @vv
238
+vfrintrz_d 0111 00101001 11010 11010 ..... ..... @vv
239
+vfrintrne_s 0111 00101001 11010 11101 ..... ..... @vv
240
+vfrintrne_d 0111 00101001 11010 11110 ..... ..... @vv
241
+
242
+vftint_w_s 0111 00101001 11100 01100 ..... ..... @vv
243
+vftint_l_d 0111 00101001 11100 01101 ..... ..... @vv
244
+vftintrm_w_s 0111 00101001 11100 01110 ..... ..... @vv
245
+vftintrm_l_d 0111 00101001 11100 01111 ..... ..... @vv
246
+vftintrp_w_s 0111 00101001 11100 10000 ..... ..... @vv
247
+vftintrp_l_d 0111 00101001 11100 10001 ..... ..... @vv
248
+vftintrz_w_s 0111 00101001 11100 10010 ..... ..... @vv
249
+vftintrz_l_d 0111 00101001 11100 10011 ..... ..... @vv
250
+vftintrne_w_s 0111 00101001 11100 10100 ..... ..... @vv
251
+vftintrne_l_d 0111 00101001 11100 10101 ..... ..... @vv
252
+vftint_wu_s 0111 00101001 11100 10110 ..... ..... @vv
253
+vftint_lu_d 0111 00101001 11100 10111 ..... ..... @vv
254
+vftintrz_wu_s 0111 00101001 11100 11100 ..... ..... @vv
255
+vftintrz_lu_d 0111 00101001 11100 11101 ..... ..... @vv
256
+vftint_w_d 0111 00010100 10011 ..... ..... ..... @vvv
257
+vftintrm_w_d 0111 00010100 10100 ..... ..... ..... @vvv
258
+vftintrp_w_d 0111 00010100 10101 ..... ..... ..... @vvv
259
+vftintrz_w_d 0111 00010100 10110 ..... ..... ..... @vvv
260
+vftintrne_w_d 0111 00010100 10111 ..... ..... ..... @vvv
261
+vftintl_l_s 0111 00101001 11101 00000 ..... ..... @vv
262
+vftinth_l_s 0111 00101001 11101 00001 ..... ..... @vv
263
+vftintrml_l_s 0111 00101001 11101 00010 ..... ..... @vv
264
+vftintrmh_l_s 0111 00101001 11101 00011 ..... ..... @vv
265
+vftintrpl_l_s 0111 00101001 11101 00100 ..... ..... @vv
266
+vftintrph_l_s 0111 00101001 11101 00101 ..... ..... @vv
267
+vftintrzl_l_s 0111 00101001 11101 00110 ..... ..... @vv
268
+vftintrzh_l_s 0111 00101001 11101 00111 ..... ..... @vv
269
+vftintrnel_l_s 0111 00101001 11101 01000 ..... ..... @vv
270
+vftintrneh_l_s 0111 00101001 11101 01001 ..... ..... @vv
271
+
272
+vffint_s_w 0111 00101001 11100 00000 ..... ..... @vv
273
+vffint_s_wu 0111 00101001 11100 00001 ..... ..... @vv
274
+vffint_d_l 0111 00101001 11100 00010 ..... ..... @vv
275
+vffint_d_lu 0111 00101001 11100 00011 ..... ..... @vv
276
+vffintl_d_w 0111 00101001 11100 00100 ..... ..... @vv
277
+vffinth_d_w 0111 00101001 11100 00101 ..... ..... @vv
278
+vffint_s_l 0111 00010100 10000 ..... ..... ..... @vvv
279
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
280
index XXXXXXX..XXXXXXX 100644
281
--- a/target/loongarch/lsx_helper.c
282
+++ b/target/loongarch/lsx_helper.c
283
@@ -XXX,XX +XXX,XX @@ DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
284
DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
285
DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
286
DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
287
+
288
+static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
289
+{
290
+ return float16_to_float32(h, true, status);
291
+}
292
+static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
293
+{
294
+ return float32_to_float64(s, status);
295
+}
296
+
297
+static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
298
+{
299
+ return float32_to_float16(s, true, status);
300
+}
301
+static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
302
+{
303
+ return float64_to_float32(d, status);
304
+}
305
+
306
+void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
307
+{
308
+ int i;
309
+ VReg temp;
310
+ VReg *Vd = &(env->fpr[vd].vreg);
311
+ VReg *Vj = &(env->fpr[vj].vreg);
312
+
313
+ vec_clear_cause(env);
314
+ for (i = 0; i < LSX_LEN/32; i++) {
315
+ temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
316
+ vec_update_fcsr0(env, GETPC());
317
+ }
318
+ *Vd = temp;
319
+}
320
+
321
+void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
322
+{
323
+ int i;
324
+ VReg temp;
325
+ VReg *Vd = &(env->fpr[vd].vreg);
326
+ VReg *Vj = &(env->fpr[vj].vreg);
327
+
328
+ vec_clear_cause(env);
329
+ for (i = 0; i < LSX_LEN/64; i++) {
330
+ temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
331
+ vec_update_fcsr0(env, GETPC());
332
+ }
333
+ *Vd = temp;
334
+}
335
+
336
+void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
337
+{
338
+ int i;
339
+ VReg temp;
340
+ VReg *Vd = &(env->fpr[vd].vreg);
341
+ VReg *Vj = &(env->fpr[vj].vreg);
342
+
343
+ vec_clear_cause(env);
344
+ for (i = 0; i < LSX_LEN/32; i++) {
345
+ temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
346
+ vec_update_fcsr0(env, GETPC());
347
+ }
348
+ *Vd = temp;
349
+}
350
+
351
+void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
352
+{
353
+ int i;
354
+ VReg temp;
355
+ VReg *Vd = &(env->fpr[vd].vreg);
356
+ VReg *Vj = &(env->fpr[vj].vreg);
357
+
358
+ vec_clear_cause(env);
359
+ for (i = 0; i < LSX_LEN/64; i++) {
360
+ temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
361
+ vec_update_fcsr0(env, GETPC());
362
+ }
363
+ *Vd = temp;
364
+}
365
+
366
+void HELPER(vfcvt_h_s)(CPULoongArchState *env,
367
+ uint32_t vd, uint32_t vj, uint32_t vk)
368
+{
369
+ int i;
370
+ VReg temp;
371
+ VReg *Vd = &(env->fpr[vd].vreg);
372
+ VReg *Vj = &(env->fpr[vj].vreg);
373
+ VReg *Vk = &(env->fpr[vk].vreg);
374
+
375
+ vec_clear_cause(env);
376
+ for(i = 0; i < LSX_LEN/32; i++) {
377
+ temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
378
+ temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status);
379
+ vec_update_fcsr0(env, GETPC());
380
+ }
381
+ *Vd = temp;
382
+}
383
+
384
+void HELPER(vfcvt_s_d)(CPULoongArchState *env,
385
+ uint32_t vd, uint32_t vj, uint32_t vk)
386
+{
387
+ int i;
388
+ VReg temp;
389
+ VReg *Vd = &(env->fpr[vd].vreg);
390
+ VReg *Vj = &(env->fpr[vj].vreg);
391
+ VReg *Vk = &(env->fpr[vk].vreg);
392
+
393
+ vec_clear_cause(env);
394
+ for(i = 0; i < LSX_LEN/64; i++) {
395
+ temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
396
+ temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status);
397
+ vec_update_fcsr0(env, GETPC());
398
+ }
399
+ *Vd = temp;
400
+}
401
+
402
+void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
403
+{
404
+ int i;
405
+ VReg *Vd = &(env->fpr[vd].vreg);
406
+ VReg *Vj = &(env->fpr[vj].vreg);
407
+
408
+ vec_clear_cause(env);
409
+ for (i = 0; i < 4; i++) {
410
+ Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
411
+ vec_update_fcsr0(env, GETPC());
412
+ }
413
+}
414
+
415
+void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
416
+{
417
+ int i;
418
+ VReg *Vd = &(env->fpr[vd].vreg);
419
+ VReg *Vj = &(env->fpr[vj].vreg);
420
+
421
+ vec_clear_cause(env);
422
+ for (i = 0; i < 2; i++) {
423
+ Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
424
+ vec_update_fcsr0(env, GETPC());
425
+ }
426
+}
427
+
428
+#define FCVT_2OP(NAME, BIT, E, MODE) \
429
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
430
+{ \
431
+ int i; \
432
+ VReg *Vd = &(env->fpr[vd].vreg); \
433
+ VReg *Vj = &(env->fpr[vj].vreg); \
434
+ \
435
+ vec_clear_cause(env); \
436
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
437
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
438
+ set_float_rounding_mode(MODE, &env->fp_status); \
439
+ Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
440
+ set_float_rounding_mode(old_mode, &env->fp_status); \
441
+ vec_update_fcsr0(env, GETPC()); \
442
+ } \
443
+}
444
+
445
+FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
446
+FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
447
+FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
448
+FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
449
+FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
450
+FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
451
+FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
452
+FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
453
+
454
+#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \
455
+static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \
456
+{ \
457
+ T2 fd; \
458
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
459
+ \
460
+ set_float_rounding_mode(MODE, &env->fp_status); \
461
+ fd = do_## FMT1 ##_to_## FMT2(env, fj); \
462
+ set_float_rounding_mode(old_mode, &env->fp_status); \
463
+ return fd; \
464
+}
465
+
466
+#define DO_FTINT(FMT1, FMT2, T1, T2) \
467
+static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \
468
+{ \
469
+ T2 fd; \
470
+ \
471
+ fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
472
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
473
+ if (FMT1 ##_is_any_nan(fj)) { \
474
+ fd = 0; \
475
+ } \
476
+ } \
477
+ vec_update_fcsr0(env, GETPC()); \
478
+ return fd; \
479
+}
480
+
481
+DO_FTINT(float32, int32, uint32_t, uint32_t)
482
+DO_FTINT(float64, int64, uint64_t, uint64_t)
483
+DO_FTINT(float32, uint32, uint32_t, uint32_t)
484
+DO_FTINT(float64, uint64, uint64_t, uint64_t)
485
+DO_FTINT(float64, int32, uint64_t, uint32_t)
486
+DO_FTINT(float32, int64, uint32_t, uint64_t)
487
+
488
+FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
489
+FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
490
+FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
491
+FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
492
+FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
493
+FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
494
+FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
495
+FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
496
+
497
+DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
498
+DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
499
+DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
500
+DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
501
+DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
502
+DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
503
+DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
504
+DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
505
+DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
506
+DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
507
+
508
+FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
509
+FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
510
+
511
+DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
512
+DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
513
+DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
514
+DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
515
+
516
+FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
517
+FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
518
+FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
519
+FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
520
+
521
+#define FTINT_W_D(NAME, FN) \
522
+void HELPER(NAME)(CPULoongArchState *env, \
523
+ uint32_t vd, uint32_t vj, uint32_t vk) \
524
+{ \
525
+ int i; \
526
+ VReg temp; \
527
+ VReg *Vd = &(env->fpr[vd].vreg); \
528
+ VReg *Vj = &(env->fpr[vj].vreg); \
529
+ VReg *Vk = &(env->fpr[vk].vreg); \
530
+ \
531
+ vec_clear_cause(env); \
532
+ for (i = 0; i < 2; i++) { \
533
+ temp.W(i + 2) = FN(env, Vj->UD(i)); \
534
+ temp.W(i) = FN(env, Vk->UD(i)); \
535
+ } \
536
+ *Vd = temp; \
537
+}
538
+
539
+FTINT_W_D(vftint_w_d, do_float64_to_int32)
540
+FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
541
+FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
542
+FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
543
+FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
544
+
545
+FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
546
+FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
547
+FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
548
+FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
549
+FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
550
+FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
551
+FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
552
+FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
553
+
554
+#define FTINTL_L_S(NAME, FN) \
555
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
556
+{ \
557
+ int i; \
558
+ VReg temp; \
559
+ VReg *Vd = &(env->fpr[vd].vreg); \
560
+ VReg *Vj = &(env->fpr[vj].vreg); \
561
+ \
562
+ vec_clear_cause(env); \
563
+ for (i = 0; i < 2; i++) { \
564
+ temp.D(i) = FN(env, Vj->UW(i)); \
565
+ } \
566
+ *Vd = temp; \
567
+}
568
+
569
+FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
570
+FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
571
+FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
572
+FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
573
+FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
574
+
575
+#define FTINTH_L_S(NAME, FN) \
576
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
577
+{ \
578
+ int i; \
579
+ VReg temp; \
580
+ VReg *Vd = &(env->fpr[vd].vreg); \
581
+ VReg *Vj = &(env->fpr[vj].vreg); \
582
+ \
583
+ vec_clear_cause(env); \
584
+ for (i = 0; i < 2; i++) { \
585
+ temp.D(i) = FN(env, Vj->UW(i + 2)); \
586
+ } \
587
+ *Vd = temp; \
588
+}
589
+
590
+FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
591
+FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
592
+FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
593
+FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
594
+FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
595
+
596
+#define FFINT(NAME, FMT1, FMT2, T1, T2) \
597
+static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
598
+{ \
599
+ T2 fd; \
600
+ \
601
+ fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
602
+ vec_update_fcsr0(env, GETPC()); \
603
+ return fd; \
604
+}
605
+
606
+FFINT(s_w, int32, float32, int32_t, uint32_t)
607
+FFINT(d_l, int64, float64, int64_t, uint64_t)
608
+FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
609
+FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
610
+
611
+DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
612
+DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
613
+DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
614
+DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
615
+
616
+void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
617
+{
618
+ int i;
619
+ VReg temp;
620
+ VReg *Vd = &(env->fpr[vd].vreg);
621
+ VReg *Vj = &(env->fpr[vj].vreg);
622
+
623
+ vec_clear_cause(env);
624
+ for (i = 0; i < 2; i++) {
625
+ temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
626
+ vec_update_fcsr0(env, GETPC());
627
+ }
628
+ *Vd = temp;
629
+}
630
+
631
+void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
632
+{
633
+ int i;
634
+ VReg temp;
635
+ VReg *Vd = &(env->fpr[vd].vreg);
636
+ VReg *Vj = &(env->fpr[vj].vreg);
637
+
638
+ vec_clear_cause(env);
639
+ for (i = 0; i < 2; i++) {
640
+ temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
641
+ vec_update_fcsr0(env, GETPC());
642
+ }
643
+ *Vd = temp;
644
+}
645
+
646
+void HELPER(vffint_s_l)(CPULoongArchState *env,
647
+ uint32_t vd, uint32_t vj, uint32_t vk)
648
+{
649
+ int i;
650
+ VReg temp;
651
+ VReg *Vd = &(env->fpr[vd].vreg);
652
+ VReg *Vj = &(env->fpr[vj].vreg);
653
+ VReg *Vk = &(env->fpr[vk].vreg);
654
+
655
+ vec_clear_cause(env);
656
+ for (i = 0; i < 2; i++) {
657
+ temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
658
+ temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
659
+ vec_update_fcsr0(env, GETPC());
660
+ }
661
+ *Vd = temp;
662
+}
663
--
664
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VSEQ[I].{B/H/W/D};
3
- VSLE[I].{B/H/W/D}[U];
4
- VSLT[I].{B/H/W/D/}[U].
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-36-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 43 +++++
11
target/loongarch/helper.h | 23 +++
12
target/loongarch/insn_trans/trans_lsx.c.inc | 185 ++++++++++++++++++++
13
target/loongarch/insns.decode | 43 +++++
14
target/loongarch/lsx_helper.c | 38 ++++
15
5 files changed, 332 insertions(+)
16
17
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/loongarch/disas.c
20
+++ b/target/loongarch/disas.c
21
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vffint_d_lu, vv)
22
INSN_LSX(vffintl_d_w, vv)
23
INSN_LSX(vffinth_d_w, vv)
24
INSN_LSX(vffint_s_l, vvv)
25
+
26
+INSN_LSX(vseq_b, vvv)
27
+INSN_LSX(vseq_h, vvv)
28
+INSN_LSX(vseq_w, vvv)
29
+INSN_LSX(vseq_d, vvv)
30
+INSN_LSX(vseqi_b, vv_i)
31
+INSN_LSX(vseqi_h, vv_i)
32
+INSN_LSX(vseqi_w, vv_i)
33
+INSN_LSX(vseqi_d, vv_i)
34
+
35
+INSN_LSX(vsle_b, vvv)
36
+INSN_LSX(vsle_h, vvv)
37
+INSN_LSX(vsle_w, vvv)
38
+INSN_LSX(vsle_d, vvv)
39
+INSN_LSX(vslei_b, vv_i)
40
+INSN_LSX(vslei_h, vv_i)
41
+INSN_LSX(vslei_w, vv_i)
42
+INSN_LSX(vslei_d, vv_i)
43
+INSN_LSX(vsle_bu, vvv)
44
+INSN_LSX(vsle_hu, vvv)
45
+INSN_LSX(vsle_wu, vvv)
46
+INSN_LSX(vsle_du, vvv)
47
+INSN_LSX(vslei_bu, vv_i)
48
+INSN_LSX(vslei_hu, vv_i)
49
+INSN_LSX(vslei_wu, vv_i)
50
+INSN_LSX(vslei_du, vv_i)
51
+
52
+INSN_LSX(vslt_b, vvv)
53
+INSN_LSX(vslt_h, vvv)
54
+INSN_LSX(vslt_w, vvv)
55
+INSN_LSX(vslt_d, vvv)
56
+INSN_LSX(vslti_b, vv_i)
57
+INSN_LSX(vslti_h, vv_i)
58
+INSN_LSX(vslti_w, vv_i)
59
+INSN_LSX(vslti_d, vv_i)
60
+INSN_LSX(vslt_bu, vvv)
61
+INSN_LSX(vslt_hu, vvv)
62
+INSN_LSX(vslt_wu, vvv)
63
+INSN_LSX(vslt_du, vvv)
64
+INSN_LSX(vslti_bu, vv_i)
65
+INSN_LSX(vslti_hu, vv_i)
66
+INSN_LSX(vslti_wu, vv_i)
67
+INSN_LSX(vslti_du, vv_i)
68
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/loongarch/helper.h
71
+++ b/target/loongarch/helper.h
72
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
73
DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
74
DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
75
DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
76
+
77
+DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
78
+DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
79
+DEF_HELPER_FLAGS_4(vseqi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
80
+DEF_HELPER_FLAGS_4(vseqi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
81
+
82
+DEF_HELPER_FLAGS_4(vslei_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
83
+DEF_HELPER_FLAGS_4(vslei_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
84
+DEF_HELPER_FLAGS_4(vslei_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
85
+DEF_HELPER_FLAGS_4(vslei_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
86
+DEF_HELPER_FLAGS_4(vslei_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
87
+DEF_HELPER_FLAGS_4(vslei_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
88
+DEF_HELPER_FLAGS_4(vslei_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
89
+DEF_HELPER_FLAGS_4(vslei_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
90
+
91
+DEF_HELPER_FLAGS_4(vslti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
92
+DEF_HELPER_FLAGS_4(vslti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
93
+DEF_HELPER_FLAGS_4(vslti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
94
+DEF_HELPER_FLAGS_4(vslti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
95
+DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
96
+DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
97
+DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
98
+DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
99
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
102
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
103
@@ -XXX,XX +XXX,XX @@ TRANS(vffint_d_lu, gen_vv, gen_helper_vffint_d_lu)
104
TRANS(vffintl_d_w, gen_vv, gen_helper_vffintl_d_w)
105
TRANS(vffinth_d_w, gen_vv, gen_helper_vffinth_d_w)
106
TRANS(vffint_s_l, gen_vvv, gen_helper_vffint_s_l)
107
+
108
+static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
109
+{
110
+ uint32_t vd_ofs, vj_ofs, vk_ofs;
111
+
112
+ CHECK_SXE;
113
+
114
+ vd_ofs = vec_full_offset(a->vd);
115
+ vj_ofs = vec_full_offset(a->vj);
116
+ vk_ofs = vec_full_offset(a->vk);
117
+
118
+ tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
119
+ return true;
120
+}
121
+
122
+static void do_cmpi_vec(TCGCond cond,
123
+ unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
124
+{
125
+ tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm));
126
+}
127
+
128
+static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
129
+{
130
+ do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm);
131
+}
132
+
133
+static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
134
+{
135
+ do_cmpi_vec(TCG_COND_LE, vece, t, a, imm);
136
+}
137
+
138
+static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
139
+{
140
+ do_cmpi_vec(TCG_COND_LT, vece, t, a, imm);
141
+}
142
+
143
+static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
144
+{
145
+ do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm);
146
+}
147
+
148
+static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
149
+{
150
+ do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
151
+}
152
+
153
+#define DO_CMPI_S(NAME) \
154
+static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
155
+{ \
156
+ uint32_t vd_ofs, vj_ofs; \
157
+ \
158
+ CHECK_SXE; \
159
+ \
160
+ static const TCGOpcode vecop_list[] = { \
161
+ INDEX_op_cmp_vec, 0 \
162
+ }; \
163
+ static const GVecGen2i op[4] = { \
164
+ { \
165
+ .fniv = gen_## NAME ##_s_vec, \
166
+ .fnoi = gen_helper_## NAME ##_b, \
167
+ .opt_opc = vecop_list, \
168
+ .vece = MO_8 \
169
+ }, \
170
+ { \
171
+ .fniv = gen_## NAME ##_s_vec, \
172
+ .fnoi = gen_helper_## NAME ##_h, \
173
+ .opt_opc = vecop_list, \
174
+ .vece = MO_16 \
175
+ }, \
176
+ { \
177
+ .fniv = gen_## NAME ##_s_vec, \
178
+ .fnoi = gen_helper_## NAME ##_w, \
179
+ .opt_opc = vecop_list, \
180
+ .vece = MO_32 \
181
+ }, \
182
+ { \
183
+ .fniv = gen_## NAME ##_s_vec, \
184
+ .fnoi = gen_helper_## NAME ##_d, \
185
+ .opt_opc = vecop_list, \
186
+ .vece = MO_64 \
187
+ } \
188
+ }; \
189
+ \
190
+ vd_ofs = vec_full_offset(a->vd); \
191
+ vj_ofs = vec_full_offset(a->vj); \
192
+ \
193
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
194
+ \
195
+ return true; \
196
+}
197
+
198
+DO_CMPI_S(vseqi)
199
+DO_CMPI_S(vslei)
200
+DO_CMPI_S(vslti)
201
+
202
+#define DO_CMPI_U(NAME) \
203
+static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
204
+{ \
205
+ uint32_t vd_ofs, vj_ofs; \
206
+ \
207
+ CHECK_SXE; \
208
+ \
209
+ static const TCGOpcode vecop_list[] = { \
210
+ INDEX_op_cmp_vec, 0 \
211
+ }; \
212
+ static const GVecGen2i op[4] = { \
213
+ { \
214
+ .fniv = gen_## NAME ##_u_vec, \
215
+ .fnoi = gen_helper_## NAME ##_bu, \
216
+ .opt_opc = vecop_list, \
217
+ .vece = MO_8 \
218
+ }, \
219
+ { \
220
+ .fniv = gen_## NAME ##_u_vec, \
221
+ .fnoi = gen_helper_## NAME ##_hu, \
222
+ .opt_opc = vecop_list, \
223
+ .vece = MO_16 \
224
+ }, \
225
+ { \
226
+ .fniv = gen_## NAME ##_u_vec, \
227
+ .fnoi = gen_helper_## NAME ##_wu, \
228
+ .opt_opc = vecop_list, \
229
+ .vece = MO_32 \
230
+ }, \
231
+ { \
232
+ .fniv = gen_## NAME ##_u_vec, \
233
+ .fnoi = gen_helper_## NAME ##_du, \
234
+ .opt_opc = vecop_list, \
235
+ .vece = MO_64 \
236
+ } \
237
+ }; \
238
+ \
239
+ vd_ofs = vec_full_offset(a->vd); \
240
+ vj_ofs = vec_full_offset(a->vj); \
241
+ \
242
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
243
+ \
244
+ return true; \
245
+}
246
+
247
+DO_CMPI_U(vslei)
248
+DO_CMPI_U(vslti)
249
+
250
+TRANS(vseq_b, do_cmp, MO_8, TCG_COND_EQ)
251
+TRANS(vseq_h, do_cmp, MO_16, TCG_COND_EQ)
252
+TRANS(vseq_w, do_cmp, MO_32, TCG_COND_EQ)
253
+TRANS(vseq_d, do_cmp, MO_64, TCG_COND_EQ)
254
+TRANS(vseqi_b, do_vseqi_s, MO_8)
255
+TRANS(vseqi_h, do_vseqi_s, MO_16)
256
+TRANS(vseqi_w, do_vseqi_s, MO_32)
257
+TRANS(vseqi_d, do_vseqi_s, MO_64)
258
+
259
+TRANS(vsle_b, do_cmp, MO_8, TCG_COND_LE)
260
+TRANS(vsle_h, do_cmp, MO_16, TCG_COND_LE)
261
+TRANS(vsle_w, do_cmp, MO_32, TCG_COND_LE)
262
+TRANS(vsle_d, do_cmp, MO_64, TCG_COND_LE)
263
+TRANS(vslei_b, do_vslei_s, MO_8)
264
+TRANS(vslei_h, do_vslei_s, MO_16)
265
+TRANS(vslei_w, do_vslei_s, MO_32)
266
+TRANS(vslei_d, do_vslei_s, MO_64)
267
+TRANS(vsle_bu, do_cmp, MO_8, TCG_COND_LEU)
268
+TRANS(vsle_hu, do_cmp, MO_16, TCG_COND_LEU)
269
+TRANS(vsle_wu, do_cmp, MO_32, TCG_COND_LEU)
270
+TRANS(vsle_du, do_cmp, MO_64, TCG_COND_LEU)
271
+TRANS(vslei_bu, do_vslei_u, MO_8)
272
+TRANS(vslei_hu, do_vslei_u, MO_16)
273
+TRANS(vslei_wu, do_vslei_u, MO_32)
274
+TRANS(vslei_du, do_vslei_u, MO_64)
275
+
276
+TRANS(vslt_b, do_cmp, MO_8, TCG_COND_LT)
277
+TRANS(vslt_h, do_cmp, MO_16, TCG_COND_LT)
278
+TRANS(vslt_w, do_cmp, MO_32, TCG_COND_LT)
279
+TRANS(vslt_d, do_cmp, MO_64, TCG_COND_LT)
280
+TRANS(vslti_b, do_vslti_s, MO_8)
281
+TRANS(vslti_h, do_vslti_s, MO_16)
282
+TRANS(vslti_w, do_vslti_s, MO_32)
283
+TRANS(vslti_d, do_vslti_s, MO_64)
284
+TRANS(vslt_bu, do_cmp, MO_8, TCG_COND_LTU)
285
+TRANS(vslt_hu, do_cmp, MO_16, TCG_COND_LTU)
286
+TRANS(vslt_wu, do_cmp, MO_32, TCG_COND_LTU)
287
+TRANS(vslt_du, do_cmp, MO_64, TCG_COND_LTU)
288
+TRANS(vslti_bu, do_vslti_u, MO_8)
289
+TRANS(vslti_hu, do_vslti_u, MO_16)
290
+TRANS(vslti_wu, do_vslti_u, MO_32)
291
+TRANS(vslti_du, do_vslti_u, MO_64)
292
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
293
index XXXXXXX..XXXXXXX 100644
294
--- a/target/loongarch/insns.decode
295
+++ b/target/loongarch/insns.decode
296
@@ -XXX,XX +XXX,XX @@ vffint_d_lu 0111 00101001 11100 00011 ..... ..... @vv
297
vffintl_d_w 0111 00101001 11100 00100 ..... ..... @vv
298
vffinth_d_w 0111 00101001 11100 00101 ..... ..... @vv
299
vffint_s_l 0111 00010100 10000 ..... ..... ..... @vvv
300
+
301
+vseq_b 0111 00000000 00000 ..... ..... ..... @vvv
302
+vseq_h 0111 00000000 00001 ..... ..... ..... @vvv
303
+vseq_w 0111 00000000 00010 ..... ..... ..... @vvv
304
+vseq_d 0111 00000000 00011 ..... ..... ..... @vvv
305
+vseqi_b 0111 00101000 00000 ..... ..... ..... @vv_i5
306
+vseqi_h 0111 00101000 00001 ..... ..... ..... @vv_i5
307
+vseqi_w 0111 00101000 00010 ..... ..... ..... @vv_i5
308
+vseqi_d 0111 00101000 00011 ..... ..... ..... @vv_i5
309
+
310
+vsle_b 0111 00000000 00100 ..... ..... ..... @vvv
311
+vsle_h 0111 00000000 00101 ..... ..... ..... @vvv
312
+vsle_w 0111 00000000 00110 ..... ..... ..... @vvv
313
+vsle_d 0111 00000000 00111 ..... ..... ..... @vvv
314
+vslei_b 0111 00101000 00100 ..... ..... ..... @vv_i5
315
+vslei_h 0111 00101000 00101 ..... ..... ..... @vv_i5
316
+vslei_w 0111 00101000 00110 ..... ..... ..... @vv_i5
317
+vslei_d 0111 00101000 00111 ..... ..... ..... @vv_i5
318
+vsle_bu 0111 00000000 01000 ..... ..... ..... @vvv
319
+vsle_hu 0111 00000000 01001 ..... ..... ..... @vvv
320
+vsle_wu 0111 00000000 01010 ..... ..... ..... @vvv
321
+vsle_du 0111 00000000 01011 ..... ..... ..... @vvv
322
+vslei_bu 0111 00101000 01000 ..... ..... ..... @vv_ui5
323
+vslei_hu 0111 00101000 01001 ..... ..... ..... @vv_ui5
324
+vslei_wu 0111 00101000 01010 ..... ..... ..... @vv_ui5
325
+vslei_du 0111 00101000 01011 ..... ..... ..... @vv_ui5
326
+
327
+vslt_b 0111 00000000 01100 ..... ..... ..... @vvv
328
+vslt_h 0111 00000000 01101 ..... ..... ..... @vvv
329
+vslt_w 0111 00000000 01110 ..... ..... ..... @vvv
330
+vslt_d 0111 00000000 01111 ..... ..... ..... @vvv
331
+vslti_b 0111 00101000 01100 ..... ..... ..... @vv_i5
332
+vslti_h 0111 00101000 01101 ..... ..... ..... @vv_i5
333
+vslti_w 0111 00101000 01110 ..... ..... ..... @vv_i5
334
+vslti_d 0111 00101000 01111 ..... ..... ..... @vv_i5
335
+vslt_bu 0111 00000000 10000 ..... ..... ..... @vvv
336
+vslt_hu 0111 00000000 10001 ..... ..... ..... @vvv
337
+vslt_wu 0111 00000000 10010 ..... ..... ..... @vvv
338
+vslt_du 0111 00000000 10011 ..... ..... ..... @vvv
339
+vslti_bu 0111 00101000 10000 ..... ..... ..... @vv_ui5
340
+vslti_hu 0111 00101000 10001 ..... ..... ..... @vv_ui5
341
+vslti_wu 0111 00101000 10010 ..... ..... ..... @vv_ui5
342
+vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5
343
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/loongarch/lsx_helper.c
346
+++ b/target/loongarch/lsx_helper.c
347
@@ -XXX,XX +XXX,XX @@ void HELPER(vffint_s_l)(CPULoongArchState *env,
348
}
349
*Vd = temp;
350
}
351
+
352
+#define VSEQ(a, b) (a == b ? -1 : 0)
353
+#define VSLE(a, b) (a <= b ? -1 : 0)
354
+#define VSLT(a, b) (a < b ? -1 : 0)
355
+
356
+#define VCMPI(NAME, BIT, E, DO_OP) \
357
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
358
+{ \
359
+ int i; \
360
+ VReg *Vd = (VReg *)vd; \
361
+ VReg *Vj = (VReg *)vj; \
362
+ typedef __typeof(Vd->E(0)) TD; \
363
+ \
364
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
365
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
366
+ } \
367
+}
368
+
369
+VCMPI(vseqi_b, 8, B, VSEQ)
370
+VCMPI(vseqi_h, 16, H, VSEQ)
371
+VCMPI(vseqi_w, 32, W, VSEQ)
372
+VCMPI(vseqi_d, 64, D, VSEQ)
373
+VCMPI(vslei_b, 8, B, VSLE)
374
+VCMPI(vslei_h, 16, H, VSLE)
375
+VCMPI(vslei_w, 32, W, VSLE)
376
+VCMPI(vslei_d, 64, D, VSLE)
377
+VCMPI(vslei_bu, 8, UB, VSLE)
378
+VCMPI(vslei_hu, 16, UH, VSLE)
379
+VCMPI(vslei_wu, 32, UW, VSLE)
380
+VCMPI(vslei_du, 64, UD, VSLE)
381
+VCMPI(vslti_b, 8, B, VSLT)
382
+VCMPI(vslti_h, 16, H, VSLT)
383
+VCMPI(vslti_w, 32, W, VSLT)
384
+VCMPI(vslti_d, 64, D, VSLT)
385
+VCMPI(vslti_bu, 8, UB, VSLT)
386
+VCMPI(vslti_hu, 16, UH, VSLT)
387
+VCMPI(vslti_wu, 32, UW, VSLT)
388
+VCMPI(vslti_du, 64, UD, VSLT)
389
--
390
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VFCMP.cond.{S/D}.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-37-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 94 +++++++++++++++++++++
9
target/loongarch/helper.h | 5 ++
10
target/loongarch/insn_trans/trans_lsx.c.inc | 32 +++++++
11
target/loongarch/insns.decode | 5 ++
12
target/loongarch/lsx_helper.c | 54 ++++++++++++
13
5 files changed, 190 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vslti_bu, vv_i)
20
INSN_LSX(vslti_hu, vv_i)
21
INSN_LSX(vslti_wu, vv_i)
22
INSN_LSX(vslti_du, vv_i)
23
+
24
+#define output_vfcmp(C, PREFIX, SUFFIX) \
25
+{ \
26
+ (C)->info->fprintf_func((C)->info->stream, "%08x %s%s\t%d, f%d, f%d", \
27
+ (C)->insn, PREFIX, SUFFIX, a->vd, \
28
+ a->vj, a->vk); \
29
+}
30
+
31
+static bool output_vvv_fcond(DisasContext *ctx, arg_vvv_fcond * a,
32
+ const char *suffix)
33
+{
34
+ bool ret = true;
35
+ switch (a->fcond) {
36
+ case 0x0:
37
+ output_vfcmp(ctx, "vfcmp_caf_", suffix);
38
+ break;
39
+ case 0x1:
40
+ output_vfcmp(ctx, "vfcmp_saf_", suffix);
41
+ break;
42
+ case 0x2:
43
+ output_vfcmp(ctx, "vfcmp_clt_", suffix);
44
+ break;
45
+ case 0x3:
46
+ output_vfcmp(ctx, "vfcmp_slt_", suffix);
47
+ break;
48
+ case 0x4:
49
+ output_vfcmp(ctx, "vfcmp_ceq_", suffix);
50
+ break;
51
+ case 0x5:
52
+ output_vfcmp(ctx, "vfcmp_seq_", suffix);
53
+ break;
54
+ case 0x6:
55
+ output_vfcmp(ctx, "vfcmp_cle_", suffix);
56
+ break;
57
+ case 0x7:
58
+ output_vfcmp(ctx, "vfcmp_sle_", suffix);
59
+ break;
60
+ case 0x8:
61
+ output_vfcmp(ctx, "vfcmp_cun_", suffix);
62
+ break;
63
+ case 0x9:
64
+ output_vfcmp(ctx, "vfcmp_sun_", suffix);
65
+ break;
66
+ case 0xA:
67
+ output_vfcmp(ctx, "vfcmp_cult_", suffix);
68
+ break;
69
+ case 0xB:
70
+ output_vfcmp(ctx, "vfcmp_sult_", suffix);
71
+ break;
72
+ case 0xC:
73
+ output_vfcmp(ctx, "vfcmp_cueq_", suffix);
74
+ break;
75
+ case 0xD:
76
+ output_vfcmp(ctx, "vfcmp_sueq_", suffix);
77
+ break;
78
+ case 0xE:
79
+ output_vfcmp(ctx, "vfcmp_cule_", suffix);
80
+ break;
81
+ case 0xF:
82
+ output_vfcmp(ctx, "vfcmp_sule_", suffix);
83
+ break;
84
+ case 0x10:
85
+ output_vfcmp(ctx, "vfcmp_cne_", suffix);
86
+ break;
87
+ case 0x11:
88
+ output_vfcmp(ctx, "vfcmp_sne_", suffix);
89
+ break;
90
+ case 0x14:
91
+ output_vfcmp(ctx, "vfcmp_cor_", suffix);
92
+ break;
93
+ case 0x15:
94
+ output_vfcmp(ctx, "vfcmp_sor_", suffix);
95
+ break;
96
+ case 0x18:
97
+ output_vfcmp(ctx, "vfcmp_cune_", suffix);
98
+ break;
99
+ case 0x19:
100
+ output_vfcmp(ctx, "vfcmp_sune_", suffix);
101
+ break;
102
+ default:
103
+ ret = false;
104
+ }
105
+ return ret;
106
+}
107
+
108
+#define LSX_FCMP_INSN(suffix) \
109
+static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, \
110
+ arg_vvv_fcond * a) \
111
+{ \
112
+ return output_vvv_fcond(ctx, a, #suffix); \
113
+}
114
+
115
+LSX_FCMP_INSN(s)
116
+LSX_FCMP_INSN(d)
117
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/loongarch/helper.h
120
+++ b/target/loongarch/helper.h
121
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
122
DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
123
DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
124
DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
125
+
126
+DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
127
+DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
128
+DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
129
+DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
130
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
131
index XXXXXXX..XXXXXXX 100644
132
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
133
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
134
@@ -XXX,XX +XXX,XX @@ TRANS(vslti_bu, do_vslti_u, MO_8)
135
TRANS(vslti_hu, do_vslti_u, MO_16)
136
TRANS(vslti_wu, do_vslti_u, MO_32)
137
TRANS(vslti_du, do_vslti_u, MO_64)
138
+
139
+static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
140
+{
141
+ uint32_t flags;
142
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
143
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
144
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
145
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
146
+
147
+ CHECK_SXE;
148
+
149
+ fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
150
+ flags = get_fcmp_flags(a->fcond >> 1);
151
+ fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
152
+
153
+ return true;
154
+}
155
+
156
+static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
157
+{
158
+ uint32_t flags;
159
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
160
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
161
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
162
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
163
+
164
+ fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
165
+ flags = get_fcmp_flags(a->fcond >> 1);
166
+ fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
167
+
168
+ return true;
169
+}
170
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
171
index XXXXXXX..XXXXXXX 100644
172
--- a/target/loongarch/insns.decode
173
+++ b/target/loongarch/insns.decode
174
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
175
&vvv vd vj vk
176
&vv_i vd vj imm
177
&vvvv vd vj vk va
178
+&vvv_fcond vd vj vk fcond
179
180
#
181
# LSX Formats
182
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
183
@vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i
184
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
185
@vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv
186
+@vvv_fcond .... ........ fcond:5 vk:5 vj:5 vd:5 &vvv_fcond
187
188
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
189
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
190
@@ -XXX,XX +XXX,XX @@ vslti_bu 0111 00101000 10000 ..... ..... ..... @vv_ui5
191
vslti_hu 0111 00101000 10001 ..... ..... ..... @vv_ui5
192
vslti_wu 0111 00101000 10010 ..... ..... ..... @vv_ui5
193
vslti_du 0111 00101000 10011 ..... ..... ..... @vv_ui5
194
+
195
+vfcmp_cond_s 0000 11000101 ..... ..... ..... ..... @vvv_fcond
196
+vfcmp_cond_d 0000 11000110 ..... ..... ..... ..... @vvv_fcond
197
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
198
index XXXXXXX..XXXXXXX 100644
199
--- a/target/loongarch/lsx_helper.c
200
+++ b/target/loongarch/lsx_helper.c
201
@@ -XXX,XX +XXX,XX @@ VCMPI(vslti_bu, 8, UB, VSLT)
202
VCMPI(vslti_hu, 16, UH, VSLT)
203
VCMPI(vslti_wu, 32, UW, VSLT)
204
VCMPI(vslti_du, 64, UD, VSLT)
205
+
206
+static uint64_t vfcmp_common(CPULoongArchState *env,
207
+ FloatRelation cmp, uint32_t flags)
208
+{
209
+ uint64_t ret = 0;
210
+
211
+ switch (cmp) {
212
+ case float_relation_less:
213
+ ret = (flags & FCMP_LT);
214
+ break;
215
+ case float_relation_equal:
216
+ ret = (flags & FCMP_EQ);
217
+ break;
218
+ case float_relation_greater:
219
+ ret = (flags & FCMP_GT);
220
+ break;
221
+ case float_relation_unordered:
222
+ ret = (flags & FCMP_UN);
223
+ break;
224
+ default:
225
+ g_assert_not_reached();
226
+ }
227
+
228
+ if (ret) {
229
+ ret = -1;
230
+ }
231
+
232
+ return ret;
233
+}
234
+
235
+#define VFCMP(NAME, BIT, E, FN) \
236
+void HELPER(NAME)(CPULoongArchState *env, \
237
+ uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
238
+{ \
239
+ int i; \
240
+ VReg t; \
241
+ VReg *Vd = &(env->fpr[vd].vreg); \
242
+ VReg *Vj = &(env->fpr[vj].vreg); \
243
+ VReg *Vk = &(env->fpr[vk].vreg); \
244
+ \
245
+ vec_clear_cause(env); \
246
+ for (i = 0; i < LSX_LEN/BIT ; i++) { \
247
+ FloatRelation cmp; \
248
+ cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
249
+ t.E(i) = vfcmp_common(env, cmp, flags); \
250
+ vec_update_fcsr0(env, GETPC()); \
251
+ } \
252
+ *Vd = t; \
253
+}
254
+
255
+VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
256
+VFCMP(vfcmp_s_s, 32, UW, float32_compare)
257
+VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
258
+VFCMP(vfcmp_s_d, 64, UD, float64_compare)
259
--
260
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VINSGR2VR.{B/H/W/D};
3
- VPICKVE2GR.{B/H/W/D}[U];
4
- VREPLGR2VR.{B/H/W/D}.
5
1
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Song Gao <gaosong@loongson.cn>
8
Message-Id: <20230504122810.4094787-39-gaosong@loongson.cn>
9
---
10
target/loongarch/disas.c | 33 ++++++
11
target/loongarch/insn_trans/trans_lsx.c.inc | 110 ++++++++++++++++++++
12
target/loongarch/insns.decode | 30 ++++++
13
3 files changed, 173 insertions(+)
14
15
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/loongarch/disas.c
18
+++ b/target/loongarch/disas.c
19
@@ -XXX,XX +XXX,XX @@ static void output_vvvv(DisasContext *ctx, arg_vvvv *a, const char *mnemonic)
20
output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va);
21
}
22
23
+static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
24
+{
25
+ output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm);
26
+}
27
+
28
+static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
29
+{
30
+ output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj, a->imm);
31
+}
32
+
33
+static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic)
34
+{
35
+ output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj);
36
+}
37
+
38
INSN_LSX(vadd_b, vvv)
39
INSN_LSX(vadd_h, vvv)
40
INSN_LSX(vadd_w, vvv)
41
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vsetallnez_b, cv)
42
INSN_LSX(vsetallnez_h, cv)
43
INSN_LSX(vsetallnez_w, cv)
44
INSN_LSX(vsetallnez_d, cv)
45
+
46
+INSN_LSX(vinsgr2vr_b, vr_i)
47
+INSN_LSX(vinsgr2vr_h, vr_i)
48
+INSN_LSX(vinsgr2vr_w, vr_i)
49
+INSN_LSX(vinsgr2vr_d, vr_i)
50
+INSN_LSX(vpickve2gr_b, rv_i)
51
+INSN_LSX(vpickve2gr_h, rv_i)
52
+INSN_LSX(vpickve2gr_w, rv_i)
53
+INSN_LSX(vpickve2gr_d, rv_i)
54
+INSN_LSX(vpickve2gr_bu, rv_i)
55
+INSN_LSX(vpickve2gr_hu, rv_i)
56
+INSN_LSX(vpickve2gr_wu, rv_i)
57
+INSN_LSX(vpickve2gr_du, rv_i)
58
+
59
+INSN_LSX(vreplgr2vr_b, vr)
60
+INSN_LSX(vreplgr2vr_h, vr)
61
+INSN_LSX(vreplgr2vr_w, vr)
62
+INSN_LSX(vreplgr2vr_d, vr)
63
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
66
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
67
@@ -XXX,XX +XXX,XX @@ TRANS(vsetallnez_b, gen_cv, gen_helper_vsetallnez_b)
68
TRANS(vsetallnez_h, gen_cv, gen_helper_vsetallnez_h)
69
TRANS(vsetallnez_w, gen_cv, gen_helper_vsetallnez_w)
70
TRANS(vsetallnez_d, gen_cv, gen_helper_vsetallnez_d)
71
+
72
+static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
73
+{
74
+ CHECK_SXE;
75
+ tcg_gen_st8_i64(cpu_gpr[a->rj], cpu_env,
76
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
77
+ return true;
78
+}
79
+
80
+static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
81
+{
82
+ CHECK_SXE;
83
+ tcg_gen_st16_i64(cpu_gpr[a->rj], cpu_env,
84
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
85
+ return true;
86
+}
87
+
88
+static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
89
+{
90
+ CHECK_SXE;
91
+ tcg_gen_st32_i64(cpu_gpr[a->rj], cpu_env,
92
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
93
+ return true;
94
+}
95
+
96
+static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
97
+{
98
+ CHECK_SXE;
99
+ tcg_gen_st_i64(cpu_gpr[a->rj], cpu_env,
100
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
101
+ return true;
102
+}
103
+
104
+static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
105
+{
106
+ CHECK_SXE;
107
+ tcg_gen_ld8s_i64(cpu_gpr[a->rd], cpu_env,
108
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
109
+ return true;
110
+}
111
+
112
+static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
113
+{
114
+ CHECK_SXE;
115
+ tcg_gen_ld16s_i64(cpu_gpr[a->rd], cpu_env,
116
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
117
+ return true;
118
+}
119
+
120
+static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
121
+{
122
+ CHECK_SXE;
123
+ tcg_gen_ld32s_i64(cpu_gpr[a->rd], cpu_env,
124
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
125
+ return true;
126
+}
127
+
128
+static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
129
+{
130
+ CHECK_SXE;
131
+ tcg_gen_ld_i64(cpu_gpr[a->rd], cpu_env,
132
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
133
+ return true;
134
+}
135
+
136
+static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
137
+{
138
+ CHECK_SXE;
139
+ tcg_gen_ld8u_i64(cpu_gpr[a->rd], cpu_env,
140
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
141
+ return true;
142
+}
143
+
144
+static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
145
+{
146
+ CHECK_SXE;
147
+ tcg_gen_ld16u_i64(cpu_gpr[a->rd], cpu_env,
148
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
149
+ return true;
150
+}
151
+
152
+static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
153
+{
154
+ CHECK_SXE;
155
+ tcg_gen_ld32u_i64(cpu_gpr[a->rd], cpu_env,
156
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
157
+ return true;
158
+}
159
+
160
+static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
161
+{
162
+ CHECK_SXE;
163
+ tcg_gen_ld_i64(cpu_gpr[a->rd], cpu_env,
164
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
165
+ return true;
166
+}
167
+
168
+static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
169
+{
170
+ CHECK_SXE;
171
+
172
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
173
+ 16, ctx->vl/8, cpu_gpr[a->rj]);
174
+ return true;
175
+}
176
+
177
+TRANS(vreplgr2vr_b, gvec_dup, MO_8)
178
+TRANS(vreplgr2vr_h, gvec_dup, MO_16)
179
+TRANS(vreplgr2vr_w, gvec_dup, MO_32)
180
+TRANS(vreplgr2vr_d, gvec_dup, MO_64)
181
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
182
index XXXXXXX..XXXXXXX 100644
183
--- a/target/loongarch/insns.decode
184
+++ b/target/loongarch/insns.decode
185
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
186
&vv_i vd vj imm
187
&vvvv vd vj vk va
188
&vvv_fcond vd vj vk fcond
189
+&vr_i vd rj imm
190
+&rv_i rd vj imm
191
+&vr vd rj
192
193
#
194
# LSX Formats
195
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
196
@vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i
197
@vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv
198
@vvv_fcond .... ........ fcond:5 vk:5 vj:5 vd:5 &vvv_fcond
199
+@vr_ui4 .... ........ ..... . imm:4 rj:5 vd:5 &vr_i
200
+@vr_ui3 .... ........ ..... .. imm:3 rj:5 vd:5 &vr_i
201
+@vr_ui2 .... ........ ..... ... imm:2 rj:5 vd:5 &vr_i
202
+@vr_ui1 .... ........ ..... .... imm:1 rj:5 vd:5 &vr_i
203
+@rv_ui4 .... ........ ..... . imm:4 vj:5 rd:5 &rv_i
204
+@rv_ui3 .... ........ ..... .. imm:3 vj:5 rd:5 &rv_i
205
+@rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i
206
+@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i
207
+@vr .... ........ ..... ..... rj:5 vd:5 &vr
208
209
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
210
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
211
@@ -XXX,XX +XXX,XX @@ vsetallnez_b 0111 00101001 11001 01100 ..... 00 ... @cv
212
vsetallnez_h 0111 00101001 11001 01101 ..... 00 ... @cv
213
vsetallnez_w 0111 00101001 11001 01110 ..... 00 ... @cv
214
vsetallnez_d 0111 00101001 11001 01111 ..... 00 ... @cv
215
+
216
+vinsgr2vr_b 0111 00101110 10111 0 .... ..... ..... @vr_ui4
217
+vinsgr2vr_h 0111 00101110 10111 10 ... ..... ..... @vr_ui3
218
+vinsgr2vr_w 0111 00101110 10111 110 .. ..... ..... @vr_ui2
219
+vinsgr2vr_d 0111 00101110 10111 1110 . ..... ..... @vr_ui1
220
+vpickve2gr_b 0111 00101110 11111 0 .... ..... ..... @rv_ui4
221
+vpickve2gr_h 0111 00101110 11111 10 ... ..... ..... @rv_ui3
222
+vpickve2gr_w 0111 00101110 11111 110 .. ..... ..... @rv_ui2
223
+vpickve2gr_d 0111 00101110 11111 1110 . ..... ..... @rv_ui1
224
+vpickve2gr_bu 0111 00101111 00111 0 .... ..... ..... @rv_ui4
225
+vpickve2gr_hu 0111 00101111 00111 10 ... ..... ..... @rv_ui3
226
+vpickve2gr_wu 0111 00101111 00111 110 .. ..... ..... @rv_ui2
227
+vpickve2gr_du 0111 00101111 00111 1110 . ..... ..... @rv_ui1
228
+
229
+vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr
230
+vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr
231
+vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr
232
+vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr
233
--
234
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VILV{L/H}.{B/H/W/D};
3
- VSHUF.{B/H/W/D};
4
- VSHUF4I.{B/H/W/D};
5
- VPERMI.W;
6
- VEXTRINS.{B/H/W/D}.
7
1
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Song Gao <gaosong@loongson.cn>
10
Message-Id: <20230504122810.4094787-41-gaosong@loongson.cn>
11
---
12
target/loongarch/disas.c | 25 ++++
13
target/loongarch/helper.h | 25 ++++
14
target/loongarch/insn_trans/trans_lsx.c.inc | 25 ++++
15
target/loongarch/insns.decode | 25 ++++
16
target/loongarch/lsx_helper.c | 148 ++++++++++++++++++++
17
5 files changed, 248 insertions(+)
18
19
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/loongarch/disas.c
22
+++ b/target/loongarch/disas.c
23
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vpickod_b, vvv)
24
INSN_LSX(vpickod_h, vvv)
25
INSN_LSX(vpickod_w, vvv)
26
INSN_LSX(vpickod_d, vvv)
27
+
28
+INSN_LSX(vilvl_b, vvv)
29
+INSN_LSX(vilvl_h, vvv)
30
+INSN_LSX(vilvl_w, vvv)
31
+INSN_LSX(vilvl_d, vvv)
32
+INSN_LSX(vilvh_b, vvv)
33
+INSN_LSX(vilvh_h, vvv)
34
+INSN_LSX(vilvh_w, vvv)
35
+INSN_LSX(vilvh_d, vvv)
36
+
37
+INSN_LSX(vshuf_b, vvvv)
38
+INSN_LSX(vshuf_h, vvv)
39
+INSN_LSX(vshuf_w, vvv)
40
+INSN_LSX(vshuf_d, vvv)
41
+INSN_LSX(vshuf4i_b, vv_i)
42
+INSN_LSX(vshuf4i_h, vv_i)
43
+INSN_LSX(vshuf4i_w, vv_i)
44
+INSN_LSX(vshuf4i_d, vv_i)
45
+
46
+INSN_LSX(vpermi_w, vv_i)
47
+
48
+INSN_LSX(vextrins_d, vv_i)
49
+INSN_LSX(vextrins_w, vv_i)
50
+INSN_LSX(vextrins_h, vv_i)
51
+INSN_LSX(vextrins_b, vv_i)
52
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/loongarch/helper.h
55
+++ b/target/loongarch/helper.h
56
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
57
DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
58
DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
59
DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
60
+
61
+DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32)
62
+DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32)
63
+DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32)
64
+DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32)
65
+DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32)
66
+DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
67
+DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
68
+DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
69
+
70
+DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32)
71
+DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
72
+DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
73
+DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
74
+DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
75
+DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
76
+DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
77
+DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32)
78
+
79
+DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32)
80
+
81
+DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32)
82
+DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32)
83
+DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32)
84
+DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32)
85
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
88
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
89
@@ -XXX,XX +XXX,XX @@ TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b)
90
TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h)
91
TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w)
92
TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d)
93
+
94
+TRANS(vilvl_b, gen_vvv, gen_helper_vilvl_b)
95
+TRANS(vilvl_h, gen_vvv, gen_helper_vilvl_h)
96
+TRANS(vilvl_w, gen_vvv, gen_helper_vilvl_w)
97
+TRANS(vilvl_d, gen_vvv, gen_helper_vilvl_d)
98
+TRANS(vilvh_b, gen_vvv, gen_helper_vilvh_b)
99
+TRANS(vilvh_h, gen_vvv, gen_helper_vilvh_h)
100
+TRANS(vilvh_w, gen_vvv, gen_helper_vilvh_w)
101
+TRANS(vilvh_d, gen_vvv, gen_helper_vilvh_d)
102
+
103
+TRANS(vshuf_b, gen_vvvv, gen_helper_vshuf_b)
104
+TRANS(vshuf_h, gen_vvv, gen_helper_vshuf_h)
105
+TRANS(vshuf_w, gen_vvv, gen_helper_vshuf_w)
106
+TRANS(vshuf_d, gen_vvv, gen_helper_vshuf_d)
107
+TRANS(vshuf4i_b, gen_vv_i, gen_helper_vshuf4i_b)
108
+TRANS(vshuf4i_h, gen_vv_i, gen_helper_vshuf4i_h)
109
+TRANS(vshuf4i_w, gen_vv_i, gen_helper_vshuf4i_w)
110
+TRANS(vshuf4i_d, gen_vv_i, gen_helper_vshuf4i_d)
111
+
112
+TRANS(vpermi_w, gen_vv_i, gen_helper_vpermi_w)
113
+
114
+TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b)
115
+TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h)
116
+TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w)
117
+TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d)
118
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
119
index XXXXXXX..XXXXXXX 100644
120
--- a/target/loongarch/insns.decode
121
+++ b/target/loongarch/insns.decode
122
@@ -XXX,XX +XXX,XX @@ vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv
123
vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv
124
vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv
125
vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv
126
+
127
+vilvl_b 0111 00010001 10100 ..... ..... ..... @vvv
128
+vilvl_h 0111 00010001 10101 ..... ..... ..... @vvv
129
+vilvl_w 0111 00010001 10110 ..... ..... ..... @vvv
130
+vilvl_d 0111 00010001 10111 ..... ..... ..... @vvv
131
+vilvh_b 0111 00010001 11000 ..... ..... ..... @vvv
132
+vilvh_h 0111 00010001 11001 ..... ..... ..... @vvv
133
+vilvh_w 0111 00010001 11010 ..... ..... ..... @vvv
134
+vilvh_d 0111 00010001 11011 ..... ..... ..... @vvv
135
+
136
+vshuf_b 0000 11010101 ..... ..... ..... ..... @vvvv
137
+vshuf_h 0111 00010111 10101 ..... ..... ..... @vvv
138
+vshuf_w 0111 00010111 10110 ..... ..... ..... @vvv
139
+vshuf_d 0111 00010111 10111 ..... ..... ..... @vvv
140
+vshuf4i_b 0111 00111001 00 ........ ..... ..... @vv_ui8
141
+vshuf4i_h 0111 00111001 01 ........ ..... ..... @vv_ui8
142
+vshuf4i_w 0111 00111001 10 ........ ..... ..... @vv_ui8
143
+vshuf4i_d 0111 00111001 11 ........ ..... ..... @vv_ui8
144
+
145
+vpermi_w 0111 00111110 01 ........ ..... ..... @vv_ui8
146
+
147
+vextrins_d 0111 00111000 00 ........ ..... ..... @vv_ui8
148
+vextrins_w 0111 00111000 01 ........ ..... ..... @vv_ui8
149
+vextrins_h 0111 00111000 10 ........ ..... ..... @vv_ui8
150
+vextrins_b 0111 00111000 11 ........ ..... ..... @vv_ui8
151
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/loongarch/lsx_helper.c
154
+++ b/target/loongarch/lsx_helper.c
155
@@ -XXX,XX +XXX,XX @@ VPICKOD(vpickod_b, 16, B)
156
VPICKOD(vpickod_h, 32, H)
157
VPICKOD(vpickod_w, 64, W)
158
VPICKOD(vpickod_d, 128, D)
159
+
160
+#define VILVL(NAME, BIT, E) \
161
+void HELPER(NAME)(CPULoongArchState *env, \
162
+ uint32_t vd, uint32_t vj, uint32_t vk) \
163
+{ \
164
+ int i; \
165
+ VReg temp; \
166
+ VReg *Vd = &(env->fpr[vd].vreg); \
167
+ VReg *Vj = &(env->fpr[vj].vreg); \
168
+ VReg *Vk = &(env->fpr[vk].vreg); \
169
+ \
170
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
171
+ temp.E(2 * i + 1) = Vj->E(i); \
172
+ temp.E(2 * i) = Vk->E(i); \
173
+ } \
174
+ *Vd = temp; \
175
+}
176
+
177
+VILVL(vilvl_b, 16, B)
178
+VILVL(vilvl_h, 32, H)
179
+VILVL(vilvl_w, 64, W)
180
+VILVL(vilvl_d, 128, D)
181
+
182
+#define VILVH(NAME, BIT, E) \
183
+void HELPER(NAME)(CPULoongArchState *env, \
184
+ uint32_t vd, uint32_t vj, uint32_t vk) \
185
+{ \
186
+ int i; \
187
+ VReg temp; \
188
+ VReg *Vd = &(env->fpr[vd].vreg); \
189
+ VReg *Vj = &(env->fpr[vj].vreg); \
190
+ VReg *Vk = &(env->fpr[vk].vreg); \
191
+ \
192
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
193
+ temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
194
+ temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
195
+ } \
196
+ *Vd = temp; \
197
+}
198
+
199
+VILVH(vilvh_b, 16, B)
200
+VILVH(vilvh_h, 32, H)
201
+VILVH(vilvh_w, 64, W)
202
+VILVH(vilvh_d, 128, D)
203
+
204
+void HELPER(vshuf_b)(CPULoongArchState *env,
205
+ uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
206
+{
207
+ int i, m;
208
+ VReg temp;
209
+ VReg *Vd = &(env->fpr[vd].vreg);
210
+ VReg *Vj = &(env->fpr[vj].vreg);
211
+ VReg *Vk = &(env->fpr[vk].vreg);
212
+ VReg *Va = &(env->fpr[va].vreg);
213
+
214
+ m = LSX_LEN/8;
215
+ for (i = 0; i < m ; i++) {
216
+ uint64_t k = (uint8_t)Va->B(i) % (2 * m);
217
+ temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
218
+ }
219
+ *Vd = temp;
220
+}
221
+
222
+#define VSHUF(NAME, BIT, E) \
223
+void HELPER(NAME)(CPULoongArchState *env, \
224
+ uint32_t vd, uint32_t vj, uint32_t vk) \
225
+{ \
226
+ int i, m; \
227
+ VReg temp; \
228
+ VReg *Vd = &(env->fpr[vd].vreg); \
229
+ VReg *Vj = &(env->fpr[vj].vreg); \
230
+ VReg *Vk = &(env->fpr[vk].vreg); \
231
+ \
232
+ m = LSX_LEN/BIT; \
233
+ for (i = 0; i < m; i++) { \
234
+ uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
235
+ temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
236
+ } \
237
+ *Vd = temp; \
238
+}
239
+
240
+VSHUF(vshuf_h, 16, H)
241
+VSHUF(vshuf_w, 32, W)
242
+VSHUF(vshuf_d, 64, D)
243
+
244
+#define VSHUF4I(NAME, BIT, E) \
245
+void HELPER(NAME)(CPULoongArchState *env, \
246
+ uint32_t vd, uint32_t vj, uint32_t imm) \
247
+{ \
248
+ int i; \
249
+ VReg temp; \
250
+ VReg *Vd = &(env->fpr[vd].vreg); \
251
+ VReg *Vj = &(env->fpr[vj].vreg); \
252
+ \
253
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
254
+ temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
255
+ (2 * ((i) & 0x03))) & 0x03)); \
256
+ } \
257
+ *Vd = temp; \
258
+}
259
+
260
+VSHUF4I(vshuf4i_b, 8, B)
261
+VSHUF4I(vshuf4i_h, 16, H)
262
+VSHUF4I(vshuf4i_w, 32, W)
263
+
264
+void HELPER(vshuf4i_d)(CPULoongArchState *env,
265
+ uint32_t vd, uint32_t vj, uint32_t imm)
266
+{
267
+ VReg *Vd = &(env->fpr[vd].vreg);
268
+ VReg *Vj = &(env->fpr[vj].vreg);
269
+
270
+ VReg temp;
271
+ temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
272
+ temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
273
+ *Vd = temp;
274
+}
275
+
276
+void HELPER(vpermi_w)(CPULoongArchState *env,
277
+ uint32_t vd, uint32_t vj, uint32_t imm)
278
+{
279
+ VReg temp;
280
+ VReg *Vd = &(env->fpr[vd].vreg);
281
+ VReg *Vj = &(env->fpr[vj].vreg);
282
+
283
+ temp.W(0) = Vj->W(imm & 0x3);
284
+ temp.W(1) = Vj->W((imm >> 2) & 0x3);
285
+ temp.W(2) = Vd->W((imm >> 4) & 0x3);
286
+ temp.W(3) = Vd->W((imm >> 6) & 0x3);
287
+ *Vd = temp;
288
+}
289
+
290
+#define VEXTRINS(NAME, BIT, E, MASK) \
291
+void HELPER(NAME)(CPULoongArchState *env, \
292
+ uint32_t vd, uint32_t vj, uint32_t imm) \
293
+{ \
294
+ int ins, extr; \
295
+ VReg *Vd = &(env->fpr[vd].vreg); \
296
+ VReg *Vj = &(env->fpr[vj].vreg); \
297
+ \
298
+ ins = (imm >> 4) & MASK; \
299
+ extr = imm & MASK; \
300
+ Vd->E(ins) = Vj->E(extr); \
301
+}
302
+
303
+VEXTRINS(vextrins_b, 8, B, 0xf)
304
+VEXTRINS(vextrins_h, 16, H, 0x7)
305
+VEXTRINS(vextrins_w, 32, W, 0x3)
306
+VEXTRINS(vextrins_d, 64, D, 0x1)
307
--
308
2.31.1
diff view generated by jsdifflib
Deleted patch
1
This patch includes:
2
- VLDI.
3
1
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Song Gao <gaosong@loongson.cn>
6
Message-Id: <20230504122810.4094787-43-gaosong@loongson.cn>
7
---
8
target/loongarch/disas.c | 7 +
9
target/loongarch/insn_trans/trans_lsx.c.inc | 137 ++++++++++++++++++++
10
target/loongarch/insns.decode | 4 +
11
3 files changed, 148 insertions(+)
12
13
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/loongarch/disas.c
16
+++ b/target/loongarch/disas.c
17
@@ -XXX,XX +XXX,XX @@ static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
18
output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk);
19
}
20
21
+static void output_v_i(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
22
+{
23
+ output(ctx, mnemonic, "v%d, 0x%x", a->vd, a->imm);
24
+}
25
+
26
INSN_LSX(vadd_b, vvv)
27
INSN_LSX(vadd_h, vvv)
28
INSN_LSX(vadd_w, vvv)
29
@@ -XXX,XX +XXX,XX @@ INSN_LSX(vmskltz_d, vv)
30
INSN_LSX(vmskgez_b, vv)
31
INSN_LSX(vmsknz_b, vv)
32
33
+INSN_LSX(vldi, v_i)
34
+
35
INSN_LSX(vand_v, vvv)
36
INSN_LSX(vor_v, vvv)
37
INSN_LSX(vxor_v, vvv)
38
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
41
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
42
@@ -XXX,XX +XXX,XX @@ TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
43
TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
44
TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
45
46
+#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
47
+
48
+static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
49
+{
50
+ int mode;
51
+ uint64_t data, t;
52
+
53
+ /*
54
+ * imm bit [11:8] is mode, mode value is 0-12.
55
+ * other values are invalid.
56
+ */
57
+ mode = (imm >> 8) & 0xf;
58
+ t = imm & 0xff;
59
+ switch (mode) {
60
+ case 0:
61
+ /* data: {2{24'0, imm[7:0]}} */
62
+ data = (t << 32) | t ;
63
+ break;
64
+ case 1:
65
+ /* data: {2{16'0, imm[7:0], 8'0}} */
66
+ data = (t << 24) | (t << 8);
67
+ break;
68
+ case 2:
69
+ /* data: {2{8'0, imm[7:0], 16'0}} */
70
+ data = (t << 48) | (t << 16);
71
+ break;
72
+ case 3:
73
+ /* data: {2{imm[7:0], 24'0}} */
74
+ data = (t << 56) | (t << 24);
75
+ break;
76
+ case 4:
77
+ /* data: {4{8'0, imm[7:0]}} */
78
+ data = (t << 48) | (t << 32) | (t << 16) | t;
79
+ break;
80
+ case 5:
81
+ /* data: {4{imm[7:0], 8'0}} */
82
+ data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
83
+ break;
84
+ case 6:
85
+ /* data: {2{16'0, imm[7:0], 8'1}} */
86
+ data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
87
+ break;
88
+ case 7:
89
+ /* data: {2{8'0, imm[7:0], 16'1}} */
90
+ data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
91
+ break;
92
+ case 8:
93
+ /* data: {8{imm[7:0]}} */
94
+ data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
95
+ (t << 24) | (t << 16) | (t << 8) | t;
96
+ break;
97
+ case 9:
98
+ /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
99
+ {
100
+ uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
101
+ b0 = t& 0x1;
102
+ b1 = (t & 0x2) >> 1;
103
+ b2 = (t & 0x4) >> 2;
104
+ b3 = (t & 0x8) >> 3;
105
+ b4 = (t & 0x10) >> 4;
106
+ b5 = (t & 0x20) >> 5;
107
+ b6 = (t & 0x40) >> 6;
108
+ b7 = (t & 0x80) >> 7;
109
+ data = (EXPAND_BYTE(b7) << 56) |
110
+ (EXPAND_BYTE(b6) << 48) |
111
+ (EXPAND_BYTE(b5) << 40) |
112
+ (EXPAND_BYTE(b4) << 32) |
113
+ (EXPAND_BYTE(b3) << 24) |
114
+ (EXPAND_BYTE(b2) << 16) |
115
+ (EXPAND_BYTE(b1) << 8) |
116
+ EXPAND_BYTE(b0);
117
+ }
118
+ break;
119
+ case 10:
120
+ /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
121
+ {
122
+ uint64_t b6, b7;
123
+ uint64_t t0, t1;
124
+ b6 = (imm & 0x40) >> 6;
125
+ b7 = (imm & 0x80) >> 7;
126
+ t0 = (imm & 0x3f);
127
+ t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
128
+ data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
129
+ }
130
+ break;
131
+ case 11:
132
+ /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
133
+ {
134
+ uint64_t b6,b7;
135
+ uint64_t t0, t1;
136
+ b6 = (imm & 0x40) >> 6;
137
+ b7 = (imm & 0x80) >> 7;
138
+ t0 = (imm & 0x3f);
139
+ t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
140
+ data = (t1 << 25) | (t0 << 19);
141
+ }
142
+ break;
143
+ case 12:
144
+ /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
145
+ {
146
+ uint64_t b6,b7;
147
+ uint64_t t0, t1;
148
+ b6 = (imm & 0x40) >> 6;
149
+ b7 = (imm & 0x80) >> 7;
150
+ t0 = (imm & 0x3f);
151
+ t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
152
+ data = (t1 << 54) | (t0 << 48);
153
+ }
154
+ break;
155
+ default:
156
+ generate_exception(ctx, EXCCODE_INE);
157
+ g_assert_not_reached();
158
+ }
159
+ return data;
160
+}
161
+
162
+static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
163
+{
164
+ int sel, vece;
165
+ uint64_t value;
166
+ CHECK_SXE;
167
+
168
+ sel = (a->imm >> 12) & 0x1;
169
+
170
+ if (sel) {
171
+ value = vldi_get_value(ctx, a->imm);
172
+ vece = MO_64;
173
+ } else {
174
+ value = ((int32_t)(a->imm << 22)) >> 22;
175
+ vece = (a->imm >> 10) & 0x3;
176
+ }
177
+
178
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
179
+ tcg_constant_i64(value));
180
+ return true;
181
+}
182
+
183
TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and)
184
TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or)
185
TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor)
186
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/loongarch/insns.decode
189
+++ b/target/loongarch/insns.decode
190
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
191
&vvr vd vj rk
192
&vrr vd rj rk
193
&vr_ii vd rj imm imm2
194
+&v_i vd imm
195
196
#
197
# LSX Formats
198
@@ -XXX,XX +XXX,XX @@ dbcl 0000 00000010 10101 ............... @i15
199
@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1
200
@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii
201
@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr
202
+@v_i13 .... ........ .. imm:13 vd:5 &v_i
203
204
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
205
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
206
@@ -XXX,XX +XXX,XX @@ vmskltz_d 0111 00101001 11000 10011 ..... ..... @vv
207
vmskgez_b 0111 00101001 11000 10100 ..... ..... @vv
208
vmsknz_b 0111 00101001 11000 11000 ..... ..... @vv
209
210
+vldi 0111 00111110 00 ............. ..... @v_i13
211
+
212
vand_v 0111 00010010 01100 ..... ..... ..... @vvv
213
vor_v 0111 00010010 01101 ..... ..... ..... @vvv
214
vxor_v 0111 00010010 01110 ..... ..... ..... @vvv
215
--
216
2.31.1
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Song Gao <gaosong@loongson.cn>
3
Message-Id: <20230504122810.4094787-45-gaosong@loongson.cn>
4
---
5
target/loongarch/cpu.c | 1 +
6
1 file changed, 1 insertion(+)
7
1
8
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/loongarch/cpu.c
11
+++ b/target/loongarch/cpu.c
12
@@ -XXX,XX +XXX,XX @@ static void loongarch_la464_initfn(Object *obj)
13
data = FIELD_DP32(data, CPUCFG2, FP_SP, 1);
14
data = FIELD_DP32(data, CPUCFG2, FP_DP, 1);
15
data = FIELD_DP32(data, CPUCFG2, FP_VER, 1);
16
+ data = FIELD_DP32(data, CPUCFG2, LSX, 1),
17
data = FIELD_DP32(data, CPUCFG2, LLFTP, 1);
18
data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1);
19
data = FIELD_DP32(data, CPUCFG2, LAM, 1);
20
--
21
2.31.1
diff view generated by jsdifflib