1
The following changes since commit ad1b4ec39caa5b3f17cbd8160283a03a3dcfe2ae:
1
Mostly my decodetree stuff, but also some patches for various
2
smaller bugs/features from others.
2
3
3
Merge remote-tracking branch 'remotes/kraxel/tags/input-20180515-pull-request' into staging (2018-05-15 12:50:06 +0100)
4
thanks
5
-- PMM
6
7
The following changes since commit 53550e81e2cafe7c03a39526b95cd21b5194d9b1:
8
9
Merge remote-tracking branch 'remotes/berrange/tags/qcrypto-next-pull-request' into staging (2020-06-15 16:36:34 +0100)
4
10
5
are available in the Git repository at:
11
are available in the Git repository at:
6
12
7
git://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20180515
13
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200616
8
14
9
for you to fetch changes up to ae7651804748c6b479d5ae09aeac4edb9c44f76e:
15
for you to fetch changes up to 64b397417a26509bcdff44ab94356a35c7901c79:
10
16
11
tcg: Optionally log FPU state in TCG -d cpu logging (2018-05-15 14:58:44 +0100)
17
hw: arm: Set vendor property for IMX SDHCI emulations (2020-06-16 10:32:29 +0100)
12
18
13
----------------------------------------------------------------
19
----------------------------------------------------------------
14
target-arm queue:
20
* hw: arm: Set vendor property for IMX SDHCI emulations
15
* Fix coverity nit in int_to_float code
21
* sd: sdhci: Implement basic vendor specific register support
16
* Don't set Invalid for float-to-int(MAXINT)
22
* hw/net/imx_fec: Convert debug fprintf() to trace events
17
* Fix fp_status_f16 tininess before rounding
23
* target/arm/cpu: adjust virtual time for all KVM arm cpus
18
* Add various missing insns from the v8.2-FP16 extension
24
* Implement configurable descriptor size in ftgmac100
19
* Fix sqrt_f16 exception raising
25
* hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
20
* sdcard: Correct CRC16 offset in sd_function_switch()
26
* target/arm: More Neon decodetree conversion work
21
* tcg: Optionally log FPU state in TCG -d cpu logging
22
27
23
----------------------------------------------------------------
28
----------------------------------------------------------------
24
Alex Bennée (5):
29
Erik Smit (1):
25
fpu/softfloat: int_to_float ensure r fully initialised
30
Implement configurable descriptor size in ftgmac100
26
target/arm: Implement FCMP for fp16
27
target/arm: Implement FCSEL for fp16
28
target/arm: Implement FMOV (immediate) for fp16
29
target/arm: Fix sqrt_f16 exception raising
30
31
31
Peter Maydell (3):
32
Guenter Roeck (2):
32
fpu/softfloat: Don't set Invalid for float-to-int(MAXINT)
33
sd: sdhci: Implement basic vendor specific register support
33
target/arm: Fix fp_status_f16 tininess before rounding
34
hw: arm: Set vendor property for IMX SDHCI emulations
34
tcg: Optionally log FPU state in TCG -d cpu logging
35
35
36
Philippe Mathieu-Daudé (1):
36
Jean-Christophe Dubois (2):
37
sdcard: Correct CRC16 offset in sd_function_switch()
37
hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
38
hw/net/imx_fec: Convert debug fprintf() to trace events
38
39
39
Richard Henderson (7):
40
Peter Maydell (17):
40
target/arm: Implement FMOV (general) for fp16
41
target/arm: Fix missing temp frees in do_vshll_2sh
41
target/arm: Early exit after unallocated_encoding in disas_fp_int_conv
42
target/arm: Convert Neon 3-reg-diff prewidening ops to decodetree
42
target/arm: Implement FCVT (scalar, integer) for fp16
43
target/arm: Convert Neon 3-reg-diff narrowing ops to decodetree
43
target/arm: Implement FCVT (scalar, fixed-point) for fp16
44
target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree
44
target/arm: Introduce and use read_fp_hreg
45
target/arm: Convert Neon 3-reg-diff long multiplies
45
target/arm: Implement FP data-processing (2 source) for fp16
46
target/arm: Convert Neon 3-reg-diff saturating doubling multiplies
46
target/arm: Implement FP data-processing (3 source) for fp16
47
target/arm: Convert Neon 3-reg-diff polynomial VMULL
48
target/arm: Add 'static' and 'const' annotations to VSHLL function arrays
49
target/arm: Add missing TCG temp free in do_2shift_env_64()
50
target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree
51
target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree
52
target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree
53
target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree
54
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
55
target/arm: Convert Neon VEXT to decodetree
56
target/arm: Convert Neon VTBL, VTBX to decodetree
57
target/arm: Convert Neon VDUP (scalar) to decodetree
47
58
48
include/qemu/log.h | 1 +
59
fangying (1):
49
target/arm/helper-a64.h | 2 +
60
target/arm/cpu: adjust virtual time for all KVM arm cpus
50
target/arm/helper.h | 6 +
51
accel/tcg/cpu-exec.c | 9 +-
52
fpu/softfloat.c | 6 +-
53
hw/sd/sd.c | 2 +-
54
target/arm/cpu.c | 2 +
55
target/arm/helper-a64.c | 10 ++
56
target/arm/helper.c | 38 +++-
57
target/arm/translate-a64.c | 421 ++++++++++++++++++++++++++++++++++++++-------
58
util/log.c | 2 +
59
11 files changed, 428 insertions(+), 71 deletions(-)
60
61
62
hw/sd/sdhci-internal.h | 5 +
63
include/hw/sd/sdhci.h | 5 +
64
target/arm/translate.h | 1 +
65
target/arm/neon-dp.decode | 130 +++++
66
hw/arm/fsl-imx25.c | 6 +
67
hw/arm/fsl-imx6.c | 6 +
68
hw/arm/fsl-imx6ul.c | 2 +
69
hw/arm/fsl-imx7.c | 2 +
70
hw/misc/imx6ul_ccm.c | 76 ++-
71
hw/net/ftgmac100.c | 26 +-
72
hw/net/imx_fec.c | 106 ++--
73
hw/sd/sdhci.c | 18 +-
74
target/arm/cpu.c | 6 +-
75
target/arm/cpu64.c | 1 -
76
target/arm/kvm.c | 21 +-
77
target/arm/translate-neon.inc.c | 1148 ++++++++++++++++++++++++++++++++++++++-
78
target/arm/translate.c | 684 +----------------------
79
hw/net/trace-events | 18 +
80
18 files changed, 1495 insertions(+), 766 deletions(-)
81
diff view generated by jsdifflib
New patch
1
The widenfn() in do_vshll_2sh() does not free the input 32-bit
2
TCGv, so we need to do this in the calling code.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
---
8
target/arm/translate-neon.inc.c | 2 ++
9
1 file changed, 2 insertions(+)
10
11
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-neon.inc.c
14
+++ b/target/arm/translate-neon.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
16
tmp = tcg_temp_new_i64();
17
18
widenfn(tmp, rm0);
19
+ tcg_temp_free_i32(rm0);
20
if (a->shift != 0) {
21
tcg_gen_shli_i64(tmp, tmp, a->shift);
22
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
23
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
24
neon_store_reg64(tmp, a->vd);
25
26
widenfn(tmp, rm1);
27
+ tcg_temp_free_i32(rm1);
28
if (a->shift != 0) {
29
tcg_gen_shli_i64(tmp, tmp, a->shift);
30
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
31
--
32
2.20.1
33
34
diff view generated by jsdifflib
New patch
1
1
Convert the "pre-widening" insns VADDL, VSUBL, VADDW and VSUBW
2
in the Neon 3-registers-different-lengths group to decodetree.
3
These insns work by widening one or both inputs to double their
4
size, performing an add or subtract at the doubled size and
5
then storing the double-size result.
6
7
As usual, rather than copying the loop of the original decoder
8
(which needs awkward code to avoid problems when source and
9
destination registers overlap) we just unroll the two passes.
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
target/arm/neon-dp.decode | 43 +++++++++++++
15
target/arm/translate-neon.inc.c | 104 ++++++++++++++++++++++++++++++++
16
target/arm/translate.c | 16 ++---
17
3 files changed, 151 insertions(+), 12 deletions(-)
18
19
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/neon-dp.decode
22
+++ b/target/arm/neon-dp.decode
23
@@ -XXX,XX +XXX,XX @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
24
# So we have a single decode line and check the cmode/op in the
25
# trans function.
26
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
27
+
28
+######################################################################
29
+# Within the "two registers, or three registers of different lengths"
30
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
31
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
32
+# or they are a size field for the three-reg-different-lengths and
33
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
34
+# is slightly awkward for decodetree: we handle it with this
35
+# non-exclusive group which contains within it two exclusive groups:
36
+# one for the size=0b11 patterns, and one for the size-not-0b11
37
+# patterns. This allows us to check that none of the insns within
38
+# each subgroup accidentally overlap each other. Note that all the
39
+# trans functions for the size-not-0b11 patterns must check and
40
+# return false for size==3.
41
+######################################################################
42
+{
43
+ # 0b11 subgroup will go here
44
+
45
+ # Subgroup for size != 0b11
46
+ [
47
+ ##################################################################
48
+ # 3-reg-different-length grouping:
49
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
50
+ ##################################################################
51
+
52
+ &3diff vm vn vd size
53
+
54
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
55
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
56
+
57
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
58
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
59
+
60
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
61
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
62
+
63
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
64
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
65
+
66
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
67
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
68
+ ]
69
+}
70
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-neon.inc.c
73
+++ b/target/arm/translate-neon.inc.c
74
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
75
}
76
return do_1reg_imm(s, a, fn);
77
}
78
+
79
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
80
+ NeonGenWidenFn *widenfn,
81
+ NeonGenTwo64OpFn *opfn,
82
+ bool src1_wide)
83
+{
84
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
85
+ TCGv_i64 rn0_64, rn1_64, rm_64;
86
+ TCGv_i32 rm;
87
+
88
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
89
+ return false;
90
+ }
91
+
92
+ /* UNDEF accesses to D16-D31 if they don't exist. */
93
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
94
+ ((a->vd | a->vn | a->vm) & 0x10)) {
95
+ return false;
96
+ }
97
+
98
+ if (!widenfn || !opfn) {
99
+ /* size == 3 case, which is an entirely different insn group */
100
+ return false;
101
+ }
102
+
103
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
104
+ return false;
105
+ }
106
+
107
+ if (!vfp_access_check(s)) {
108
+ return true;
109
+ }
110
+
111
+ rn0_64 = tcg_temp_new_i64();
112
+ rn1_64 = tcg_temp_new_i64();
113
+ rm_64 = tcg_temp_new_i64();
114
+
115
+ if (src1_wide) {
116
+ neon_load_reg64(rn0_64, a->vn);
117
+ } else {
118
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
119
+ widenfn(rn0_64, tmp);
120
+ tcg_temp_free_i32(tmp);
121
+ }
122
+ rm = neon_load_reg(a->vm, 0);
123
+
124
+ widenfn(rm_64, rm);
125
+ tcg_temp_free_i32(rm);
126
+ opfn(rn0_64, rn0_64, rm_64);
127
+
128
+ /*
129
+ * Load second pass inputs before storing the first pass result, to
130
+ * avoid incorrect results if a narrow input overlaps with the result.
131
+ */
132
+ if (src1_wide) {
133
+ neon_load_reg64(rn1_64, a->vn + 1);
134
+ } else {
135
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
136
+ widenfn(rn1_64, tmp);
137
+ tcg_temp_free_i32(tmp);
138
+ }
139
+ rm = neon_load_reg(a->vm, 1);
140
+
141
+ neon_store_reg64(rn0_64, a->vd);
142
+
143
+ widenfn(rm_64, rm);
144
+ tcg_temp_free_i32(rm);
145
+ opfn(rn1_64, rn1_64, rm_64);
146
+ neon_store_reg64(rn1_64, a->vd + 1);
147
+
148
+ tcg_temp_free_i64(rn0_64);
149
+ tcg_temp_free_i64(rn1_64);
150
+ tcg_temp_free_i64(rm_64);
151
+
152
+ return true;
153
+}
154
+
155
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
156
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
157
+ { \
158
+ static NeonGenWidenFn * const widenfn[] = { \
159
+ gen_helper_neon_widen_##S##8, \
160
+ gen_helper_neon_widen_##S##16, \
161
+ tcg_gen_##EXT##_i32_i64, \
162
+ NULL, \
163
+ }; \
164
+ static NeonGenTwo64OpFn * const addfn[] = { \
165
+ gen_helper_neon_##OP##l_u16, \
166
+ gen_helper_neon_##OP##l_u32, \
167
+ tcg_gen_##OP##_i64, \
168
+ NULL, \
169
+ }; \
170
+ return do_prewiden_3d(s, a, widenfn[a->size], \
171
+ addfn[a->size], SRC1WIDE); \
172
+ }
173
+
174
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
175
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
176
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
177
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
178
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
179
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
180
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
181
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
182
diff --git a/target/arm/translate.c b/target/arm/translate.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/target/arm/translate.c
185
+++ b/target/arm/translate.c
186
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
187
/* Three registers of different lengths. */
188
int src1_wide;
189
int src2_wide;
190
- int prewiden;
191
/* undefreq: bit 0 : UNDEF if size == 0
192
* bit 1 : UNDEF if size == 1
193
* bit 2 : UNDEF if size == 2
194
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
195
int undefreq;
196
/* prewiden, src1_wide, src2_wide, undefreq */
197
static const int neon_3reg_wide[16][4] = {
198
- {1, 0, 0, 0}, /* VADDL */
199
- {1, 1, 0, 0}, /* VADDW */
200
- {1, 0, 0, 0}, /* VSUBL */
201
- {1, 1, 0, 0}, /* VSUBW */
202
+ {0, 0, 0, 7}, /* VADDL: handled by decodetree */
203
+ {0, 0, 0, 7}, /* VADDW: handled by decodetree */
204
+ {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
205
+ {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
206
{0, 1, 1, 0}, /* VADDHN */
207
{0, 0, 0, 0}, /* VABAL */
208
{0, 1, 1, 0}, /* VSUBHN */
209
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
210
{0, 0, 0, 7}, /* Reserved: always UNDEF */
211
};
212
213
- prewiden = neon_3reg_wide[op][0];
214
src1_wide = neon_3reg_wide[op][1];
215
src2_wide = neon_3reg_wide[op][2];
216
undefreq = neon_3reg_wide[op][3];
217
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
218
} else {
219
tmp = neon_load_reg(rn, pass);
220
}
221
- if (prewiden) {
222
- gen_neon_widen(cpu_V0, tmp, size, u);
223
- }
224
}
225
if (src2_wide) {
226
neon_load_reg64(cpu_V1, rm + pass);
227
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
228
} else {
229
tmp2 = neon_load_reg(rm, pass);
230
}
231
- if (prewiden) {
232
- gen_neon_widen(cpu_V1, tmp2, size, u);
233
- }
234
}
235
switch (op) {
236
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
237
--
238
2.20.1
239
240
diff view generated by jsdifflib
New patch
1
1
Convert the narrow-to-high-half insns VADDHN, VSUBHN, VRADDHN,
2
VRSUBHN in the Neon 3-registers-different-lengths group to
3
decodetree.
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/neon-dp.decode | 6 +++
9
target/arm/translate-neon.inc.c | 87 +++++++++++++++++++++++++++++++
10
target/arm/translate.c | 91 ++++-----------------------------
11
3 files changed, 104 insertions(+), 80 deletions(-)
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
18
19
VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
20
VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
21
+
22
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
23
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
24
+
25
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
26
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
27
]
28
}
29
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-neon.inc.c
32
+++ b/target/arm/translate-neon.inc.c
33
@@ -XXX,XX +XXX,XX @@ DO_PREWIDEN(VADDW_S, s, ext, add, true)
34
DO_PREWIDEN(VADDW_U, u, extu, add, true)
35
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
36
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
37
+
38
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
39
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
40
+{
41
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
42
+ TCGv_i64 rn_64, rm_64;
43
+ TCGv_i32 rd0, rd1;
44
+
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
46
+ return false;
47
+ }
48
+
49
+ /* UNDEF accesses to D16-D31 if they don't exist. */
50
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
51
+ ((a->vd | a->vn | a->vm) & 0x10)) {
52
+ return false;
53
+ }
54
+
55
+ if (!opfn || !narrowfn) {
56
+ /* size == 3 case, which is an entirely different insn group */
57
+ return false;
58
+ }
59
+
60
+ if ((a->vn | a->vm) & 1) {
61
+ return false;
62
+ }
63
+
64
+ if (!vfp_access_check(s)) {
65
+ return true;
66
+ }
67
+
68
+ rn_64 = tcg_temp_new_i64();
69
+ rm_64 = tcg_temp_new_i64();
70
+ rd0 = tcg_temp_new_i32();
71
+ rd1 = tcg_temp_new_i32();
72
+
73
+ neon_load_reg64(rn_64, a->vn);
74
+ neon_load_reg64(rm_64, a->vm);
75
+
76
+ opfn(rn_64, rn_64, rm_64);
77
+
78
+ narrowfn(rd0, rn_64);
79
+
80
+ neon_load_reg64(rn_64, a->vn + 1);
81
+ neon_load_reg64(rm_64, a->vm + 1);
82
+
83
+ opfn(rn_64, rn_64, rm_64);
84
+
85
+ narrowfn(rd1, rn_64);
86
+
87
+ neon_store_reg(a->vd, 0, rd0);
88
+ neon_store_reg(a->vd, 1, rd1);
89
+
90
+ tcg_temp_free_i64(rn_64);
91
+ tcg_temp_free_i64(rm_64);
92
+
93
+ return true;
94
+}
95
+
96
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
97
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
98
+ { \
99
+ static NeonGenTwo64OpFn * const addfn[] = { \
100
+ gen_helper_neon_##OP##l_u16, \
101
+ gen_helper_neon_##OP##l_u32, \
102
+ tcg_gen_##OP##_i64, \
103
+ NULL, \
104
+ }; \
105
+ static NeonGenNarrowFn * const narrowfn[] = { \
106
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
107
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
108
+ EXTOP, \
109
+ NULL, \
110
+ }; \
111
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
112
+ }
113
+
114
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
115
+{
116
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
117
+ tcg_gen_extrh_i64_i32(rd, rn);
118
+}
119
+
120
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
121
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
122
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
123
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
124
diff --git a/target/arm/translate.c b/target/arm/translate.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/arm/translate.c
127
+++ b/target/arm/translate.c
128
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
129
}
130
}
131
132
-static inline void gen_neon_subl(int size)
133
-{
134
- switch (size) {
135
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
136
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
137
- case 2: tcg_gen_sub_i64(CPU_V001); break;
138
- default: abort();
139
- }
140
-}
141
-
142
static inline void gen_neon_negl(TCGv_i64 var, int size)
143
{
144
switch (size) {
145
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
146
op = (insn >> 8) & 0xf;
147
if ((insn & (1 << 6)) == 0) {
148
/* Three registers of different lengths. */
149
- int src1_wide;
150
- int src2_wide;
151
/* undefreq: bit 0 : UNDEF if size == 0
152
* bit 1 : UNDEF if size == 1
153
* bit 2 : UNDEF if size == 2
154
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
155
{0, 0, 0, 7}, /* VADDW: handled by decodetree */
156
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
157
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
158
- {0, 1, 1, 0}, /* VADDHN */
159
+ {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
160
{0, 0, 0, 0}, /* VABAL */
161
- {0, 1, 1, 0}, /* VSUBHN */
162
+ {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
163
{0, 0, 0, 0}, /* VABDL */
164
{0, 0, 0, 0}, /* VMLAL */
165
{0, 0, 0, 9}, /* VQDMLAL */
166
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
167
{0, 0, 0, 7}, /* Reserved: always UNDEF */
168
};
169
170
- src1_wide = neon_3reg_wide[op][1];
171
- src2_wide = neon_3reg_wide[op][2];
172
undefreq = neon_3reg_wide[op][3];
173
174
if ((undefreq & (1 << size)) ||
175
((undefreq & 8) && u)) {
176
return 1;
177
}
178
- if ((src1_wide && (rn & 1)) ||
179
- (src2_wide && (rm & 1)) ||
180
- (!src2_wide && (rd & 1))) {
181
+ if (rd & 1) {
182
return 1;
183
}
184
185
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
186
/* Avoid overlapping operands. Wide source operands are
187
always aligned so will never overlap with wide
188
destinations in problematic ways. */
189
- if (rd == rm && !src2_wide) {
190
+ if (rd == rm) {
191
tmp = neon_load_reg(rm, 1);
192
neon_store_scratch(2, tmp);
193
- } else if (rd == rn && !src1_wide) {
194
+ } else if (rd == rn) {
195
tmp = neon_load_reg(rn, 1);
196
neon_store_scratch(2, tmp);
197
}
198
tmp3 = NULL;
199
for (pass = 0; pass < 2; pass++) {
200
- if (src1_wide) {
201
- neon_load_reg64(cpu_V0, rn + pass);
202
- tmp = NULL;
203
+ if (pass == 1 && rd == rn) {
204
+ tmp = neon_load_scratch(2);
205
} else {
206
- if (pass == 1 && rd == rn) {
207
- tmp = neon_load_scratch(2);
208
- } else {
209
- tmp = neon_load_reg(rn, pass);
210
- }
211
+ tmp = neon_load_reg(rn, pass);
212
}
213
- if (src2_wide) {
214
- neon_load_reg64(cpu_V1, rm + pass);
215
- tmp2 = NULL;
216
+ if (pass == 1 && rd == rm) {
217
+ tmp2 = neon_load_scratch(2);
218
} else {
219
- if (pass == 1 && rd == rm) {
220
- tmp2 = neon_load_scratch(2);
221
- } else {
222
- tmp2 = neon_load_reg(rm, pass);
223
- }
224
+ tmp2 = neon_load_reg(rm, pass);
225
}
226
switch (op) {
227
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
228
- gen_neon_addl(size);
229
- break;
230
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
231
- gen_neon_subl(size);
232
- break;
233
case 5: case 7: /* VABAL, VABDL */
234
switch ((size << 1) | u) {
235
case 0:
236
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
237
abort();
238
}
239
neon_store_reg64(cpu_V0, rd + pass);
240
- } else if (op == 4 || op == 6) {
241
- /* Narrowing operation. */
242
- tmp = tcg_temp_new_i32();
243
- if (!u) {
244
- switch (size) {
245
- case 0:
246
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
247
- break;
248
- case 1:
249
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
250
- break;
251
- case 2:
252
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
253
- break;
254
- default: abort();
255
- }
256
- } else {
257
- switch (size) {
258
- case 0:
259
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
260
- break;
261
- case 1:
262
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
263
- break;
264
- case 2:
265
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
266
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
267
- break;
268
- default: abort();
269
- }
270
- }
271
- if (pass == 0) {
272
- tmp3 = tmp;
273
- } else {
274
- neon_store_reg(rd, 0, tmp3);
275
- neon_store_reg(rd, 1, tmp);
276
- }
277
} else {
278
/* Write back the result. */
279
neon_store_reg64(cpu_V0, rd + pass);
280
--
281
2.20.1
282
283
diff view generated by jsdifflib
New patch
1
1
Convert the Neon 3-reg-diff insns VABAL and VABDL to decodetree.
2
Like almost all the remaining insns in this group, these are
3
a combination of a two-input operation which returns a double width
4
result and then a possible accumulation of that double width
5
result into the destination.
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/translate.h | 1 +
11
target/arm/neon-dp.decode | 6 ++
12
target/arm/translate-neon.inc.c | 132 ++++++++++++++++++++++++++++++++
13
target/arm/translate.c | 31 +-------
14
4 files changed, 142 insertions(+), 28 deletions(-)
15
16
diff --git a/target/arm/translate.h b/target/arm/translate.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate.h
19
+++ b/target/arm/translate.h
20
@@ -XXX,XX +XXX,XX @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
21
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
22
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
23
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
24
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
25
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
26
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
27
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
28
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/neon-dp.decode
31
+++ b/target/arm/neon-dp.decode
32
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
33
VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
34
VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
35
36
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
37
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
38
+
39
VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
40
VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
41
+
42
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
43
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
44
]
45
}
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
51
DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
52
DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
53
DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
54
+
55
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
56
+ NeonGenTwoOpWidenFn *opfn,
57
+ NeonGenTwo64OpFn *accfn)
58
+{
59
+ /*
60
+ * 3-regs different lengths, long operations.
61
+ * These perform an operation on two inputs that returns a double-width
62
+ * result, and then possibly perform an accumulation operation of
63
+ * that result into the double-width destination.
64
+ */
65
+ TCGv_i64 rd0, rd1, tmp;
66
+ TCGv_i32 rn, rm;
67
+
68
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
69
+ return false;
70
+ }
71
+
72
+ /* UNDEF accesses to D16-D31 if they don't exist. */
73
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
74
+ ((a->vd | a->vn | a->vm) & 0x10)) {
75
+ return false;
76
+ }
77
+
78
+ if (!opfn) {
79
+ /* size == 3 case, which is an entirely different insn group */
80
+ return false;
81
+ }
82
+
83
+ if (a->vd & 1) {
84
+ return false;
85
+ }
86
+
87
+ if (!vfp_access_check(s)) {
88
+ return true;
89
+ }
90
+
91
+ rd0 = tcg_temp_new_i64();
92
+ rd1 = tcg_temp_new_i64();
93
+
94
+ rn = neon_load_reg(a->vn, 0);
95
+ rm = neon_load_reg(a->vm, 0);
96
+ opfn(rd0, rn, rm);
97
+ tcg_temp_free_i32(rn);
98
+ tcg_temp_free_i32(rm);
99
+
100
+ rn = neon_load_reg(a->vn, 1);
101
+ rm = neon_load_reg(a->vm, 1);
102
+ opfn(rd1, rn, rm);
103
+ tcg_temp_free_i32(rn);
104
+ tcg_temp_free_i32(rm);
105
+
106
+ /* Don't store results until after all loads: they might overlap */
107
+ if (accfn) {
108
+ tmp = tcg_temp_new_i64();
109
+ neon_load_reg64(tmp, a->vd);
110
+ accfn(tmp, tmp, rd0);
111
+ neon_store_reg64(tmp, a->vd);
112
+ neon_load_reg64(tmp, a->vd + 1);
113
+ accfn(tmp, tmp, rd1);
114
+ neon_store_reg64(tmp, a->vd + 1);
115
+ tcg_temp_free_i64(tmp);
116
+ } else {
117
+ neon_store_reg64(rd0, a->vd);
118
+ neon_store_reg64(rd1, a->vd + 1);
119
+ }
120
+
121
+ tcg_temp_free_i64(rd0);
122
+ tcg_temp_free_i64(rd1);
123
+
124
+ return true;
125
+}
126
+
127
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
128
+{
129
+ static NeonGenTwoOpWidenFn * const opfn[] = {
130
+ gen_helper_neon_abdl_s16,
131
+ gen_helper_neon_abdl_s32,
132
+ gen_helper_neon_abdl_s64,
133
+ NULL,
134
+ };
135
+
136
+ return do_long_3d(s, a, opfn[a->size], NULL);
137
+}
138
+
139
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
140
+{
141
+ static NeonGenTwoOpWidenFn * const opfn[] = {
142
+ gen_helper_neon_abdl_u16,
143
+ gen_helper_neon_abdl_u32,
144
+ gen_helper_neon_abdl_u64,
145
+ NULL,
146
+ };
147
+
148
+ return do_long_3d(s, a, opfn[a->size], NULL);
149
+}
150
+
151
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
152
+{
153
+ static NeonGenTwoOpWidenFn * const opfn[] = {
154
+ gen_helper_neon_abdl_s16,
155
+ gen_helper_neon_abdl_s32,
156
+ gen_helper_neon_abdl_s64,
157
+ NULL,
158
+ };
159
+ static NeonGenTwo64OpFn * const addfn[] = {
160
+ gen_helper_neon_addl_u16,
161
+ gen_helper_neon_addl_u32,
162
+ tcg_gen_add_i64,
163
+ NULL,
164
+ };
165
+
166
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
167
+}
168
+
169
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
170
+{
171
+ static NeonGenTwoOpWidenFn * const opfn[] = {
172
+ gen_helper_neon_abdl_u16,
173
+ gen_helper_neon_abdl_u32,
174
+ gen_helper_neon_abdl_u64,
175
+ NULL,
176
+ };
177
+ static NeonGenTwo64OpFn * const addfn[] = {
178
+ gen_helper_neon_addl_u16,
179
+ gen_helper_neon_addl_u32,
180
+ tcg_gen_add_i64,
181
+ NULL,
182
+ };
183
+
184
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
185
+}
186
diff --git a/target/arm/translate.c b/target/arm/translate.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/arm/translate.c
189
+++ b/target/arm/translate.c
190
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
191
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
192
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
193
{0, 0, 0, 7}, /* VADDHN: handled by decodetree */
194
- {0, 0, 0, 0}, /* VABAL */
195
+ {0, 0, 0, 7}, /* VABAL */
196
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
197
- {0, 0, 0, 0}, /* VABDL */
198
+ {0, 0, 0, 7}, /* VABDL */
199
{0, 0, 0, 0}, /* VMLAL */
200
{0, 0, 0, 9}, /* VQDMLAL */
201
{0, 0, 0, 0}, /* VMLSL */
202
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
203
tmp2 = neon_load_reg(rm, pass);
204
}
205
switch (op) {
206
- case 5: case 7: /* VABAL, VABDL */
207
- switch ((size << 1) | u) {
208
- case 0:
209
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
210
- break;
211
- case 1:
212
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
213
- break;
214
- case 2:
215
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
216
- break;
217
- case 3:
218
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
219
- break;
220
- case 4:
221
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
222
- break;
223
- case 5:
224
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
225
- break;
226
- default: abort();
227
- }
228
- tcg_temp_free_i32(tmp2);
229
- tcg_temp_free_i32(tmp);
230
- break;
231
case 8: case 9: case 10: case 11: case 12: case 13:
232
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
233
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
234
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
235
case 10: /* VMLSL */
236
gen_neon_negl(cpu_V0, size);
237
/* Fall through */
238
- case 5: case 8: /* VABAL, VMLAL */
239
+ case 8: /* VABAL, VMLAL */
240
gen_neon_addl(size);
241
break;
242
case 9: case 11: /* VQDMLAL, VQDMLSL */
243
--
244
2.20.1
245
246
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Convert the Neon 3-reg-diff insns VMULL, VMLAL and VMLSL; these perform
2
a 32x32->64 multiply with possible accumulate.
2
3
3
Cc: qemu-stable@nongnu.org
4
Note that for VMLSL we do the accumulate directly with a subtraction
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
rather than doing a negate-then-add as the old code did.
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
6
Tested-by: Alex Bennée <alex.bennee@linaro.org>
7
Message-id: 20180512003217.9105-4-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
9
---
10
target/arm/helper.h | 6 +++
10
target/arm/neon-dp.decode | 9 +++++
11
target/arm/helper.c | 38 ++++++++++++++-
11
target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
12
target/arm/translate-a64.c | 96 +++++++++++++++++++++++++++++++-------
12
target/arm/translate.c | 21 +++-------
13
3 files changed, 122 insertions(+), 18 deletions(-)
13
3 files changed, 86 insertions(+), 15 deletions(-)
14
14
15
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper.h
17
--- a/target/arm/neon-dp.decode
18
+++ b/target/arm/helper.h
18
+++ b/target/arm/neon-dp.decode
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
19
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
20
21
DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
21
VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
22
DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
22
VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
23
+DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
23
+
24
+DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
24
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
25
+DEF_HELPER_3(vfp_touqh, i64, f16, i32, ptr)
25
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
26
+DEF_HELPER_3(vfp_tosqh, i64, f16, i32, ptr)
26
+
27
DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
27
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
28
DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
28
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
29
DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
29
+
30
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
30
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
31
DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
31
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
32
DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
32
]
33
DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
33
}
34
+DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr)
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
35
+DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)
36
37
DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
38
DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
39
diff --git a/target/arm/helper.c b/target/arm/helper.c
40
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/helper.c
36
--- a/target/arm/translate-neon.inc.c
42
+++ b/target/arm/helper.c
37
+++ b/target/arm/translate-neon.inc.c
43
@@ -XXX,XX +XXX,XX @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
44
#undef VFP_CONV_FIX_A64
39
45
40
return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
46
/* Conversion to/from f16 can overflow to infinity before/after scaling.
47
- * Therefore we convert to f64 (which does not round), scale,
48
- * and then convert f64 to f16 (which may round).
49
+ * Therefore we convert to f64, scale, and then convert f64 to f16; or
50
+ * vice versa for conversion to integer.
51
+ *
52
+ * For 16- and 32-bit integers, the conversion to f64 never rounds.
53
+ * For 64-bit integers, any integer that would cause rounding will also
54
+ * overflow to f16 infinity, so there is no double rounding problem.
55
*/
56
57
static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
58
@@ -XXX,XX +XXX,XX @@ float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
59
return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
60
}
41
}
61
42
+
62
+float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
43
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
63
+{
44
+{
64
+ return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
45
+ TCGv_i32 lo = tcg_temp_new_i32();
46
+ TCGv_i32 hi = tcg_temp_new_i32();
47
+
48
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
49
+ tcg_gen_concat_i32_i64(rd, lo, hi);
50
+
51
+ tcg_temp_free_i32(lo);
52
+ tcg_temp_free_i32(hi);
65
+}
53
+}
66
+
54
+
67
+float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
55
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
68
+{
56
+{
69
+ return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
57
+ TCGv_i32 lo = tcg_temp_new_i32();
58
+ TCGv_i32 hi = tcg_temp_new_i32();
59
+
60
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
61
+ tcg_gen_concat_i32_i64(rd, lo, hi);
62
+
63
+ tcg_temp_free_i32(lo);
64
+ tcg_temp_free_i32(hi);
70
+}
65
+}
71
+
66
+
72
static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
67
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
73
{
74
if (unlikely(float16_is_any_nan(f))) {
75
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
76
return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
77
}
78
79
+uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst)
80
+{
68
+{
81
+ return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
69
+ static NeonGenTwoOpWidenFn * const opfn[] = {
70
+ gen_helper_neon_mull_s8,
71
+ gen_helper_neon_mull_s16,
72
+ gen_mull_s32,
73
+ NULL,
74
+ };
75
+
76
+ return do_long_3d(s, a, opfn[a->size], NULL);
82
+}
77
+}
83
+
78
+
84
+uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst)
79
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
85
+{
80
+{
86
+ return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
81
+ static NeonGenTwoOpWidenFn * const opfn[] = {
82
+ gen_helper_neon_mull_u8,
83
+ gen_helper_neon_mull_u16,
84
+ gen_mull_u32,
85
+ NULL,
86
+ };
87
+
88
+ return do_long_3d(s, a, opfn[a->size], NULL);
87
+}
89
+}
88
+
90
+
89
+uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst)
91
+#define DO_VMLAL(INSN,MULL,ACC) \
90
+{
92
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
91
+ return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
93
+ { \
92
+}
94
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
95
+ gen_helper_neon_##MULL##8, \
96
+ gen_helper_neon_##MULL##16, \
97
+ gen_##MULL##32, \
98
+ NULL, \
99
+ }; \
100
+ static NeonGenTwo64OpFn * const accfn[] = { \
101
+ gen_helper_neon_##ACC##l_u16, \
102
+ gen_helper_neon_##ACC##l_u32, \
103
+ tcg_gen_##ACC##_i64, \
104
+ NULL, \
105
+ }; \
106
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
107
+ }
93
+
108
+
94
+uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst)
109
+DO_VMLAL(VMLAL_S,mull_s,add)
95
+{
110
+DO_VMLAL(VMLAL_U,mull_u,add)
96
+ return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
111
+DO_VMLAL(VMLSL_S,mull_s,sub)
97
+}
112
+DO_VMLAL(VMLSL_U,mull_u,sub)
98
+
113
diff --git a/target/arm/translate.c b/target/arm/translate.c
99
/* Set the current fp rounding mode and return the old one.
100
* The argument is a softfloat float_round_ value.
101
*/
102
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
103
index XXXXXXX..XXXXXXX 100644
114
index XXXXXXX..XXXXXXX 100644
104
--- a/target/arm/translate-a64.c
115
--- a/target/arm/translate.c
105
+++ b/target/arm/translate-a64.c
116
+++ b/target/arm/translate.c
106
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
117
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
107
bool itof, int rmode, int scale, int sf, int type)
118
{0, 0, 0, 7}, /* VABAL */
108
{
119
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
109
bool is_signed = !(opcode & 1);
120
{0, 0, 0, 7}, /* VABDL */
110
- bool is_double = type;
121
- {0, 0, 0, 0}, /* VMLAL */
111
TCGv_ptr tcg_fpstatus;
122
+ {0, 0, 0, 7}, /* VMLAL */
112
- TCGv_i32 tcg_shift;
123
{0, 0, 0, 9}, /* VQDMLAL */
113
+ TCGv_i32 tcg_shift, tcg_single;
124
- {0, 0, 0, 0}, /* VMLSL */
114
+ TCGv_i64 tcg_double;
125
+ {0, 0, 0, 7}, /* VMLSL */
115
126
{0, 0, 0, 9}, /* VQDMLSL */
116
- tcg_fpstatus = get_fpstatus_ptr(false);
127
- {0, 0, 0, 0}, /* Integer VMULL */
117
+ tcg_fpstatus = get_fpstatus_ptr(type == 3);
128
+ {0, 0, 0, 7}, /* Integer VMULL */
118
129
{0, 0, 0, 9}, /* VQDMULL */
119
tcg_shift = tcg_const_i32(64 - scale);
130
{0, 0, 0, 0xa}, /* Polynomial VMULL */
120
131
{0, 0, 0, 7}, /* Reserved: always UNDEF */
121
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
122
tcg_int = tcg_extend;
133
tmp2 = neon_load_reg(rm, pass);
123
}
134
}
124
135
switch (op) {
125
- if (is_double) {
136
- case 8: case 9: case 10: case 11: case 12: case 13:
126
- TCGv_i64 tcg_double = tcg_temp_new_i64();
137
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
127
+ switch (type) {
138
+ case 9: case 11: case 13:
128
+ case 1: /* float64 */
139
+ /* VQDMLAL, VQDMLSL, VQDMULL */
129
+ tcg_double = tcg_temp_new_i64();
140
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
130
if (is_signed) {
141
break;
131
gen_helper_vfp_sqtod(tcg_double, tcg_int,
142
default: /* 15 is RESERVED: caught earlier */
132
tcg_shift, tcg_fpstatus);
143
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
133
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
144
/* VQDMULL */
134
}
145
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
135
write_fp_dreg(s, rd, tcg_double);
146
neon_store_reg64(cpu_V0, rd + pass);
136
tcg_temp_free_i64(tcg_double);
147
- } else if (op == 5 || (op >= 8 && op <= 11)) {
137
- } else {
148
+ } else {
138
- TCGv_i32 tcg_single = tcg_temp_new_i32();
149
/* Accumulate. */
139
+ break;
150
neon_load_reg64(cpu_V1, rd + pass);
140
+
151
switch (op) {
141
+ case 0: /* float32 */
152
- case 10: /* VMLSL */
142
+ tcg_single = tcg_temp_new_i32();
153
- gen_neon_negl(cpu_V0, size);
143
if (is_signed) {
154
- /* Fall through */
144
gen_helper_vfp_sqtos(tcg_single, tcg_int,
155
- case 8: /* VABAL, VMLAL */
145
tcg_shift, tcg_fpstatus);
156
- gen_neon_addl(size);
146
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
157
- break;
147
}
158
case 9: case 11: /* VQDMLAL, VQDMLSL */
148
write_fp_sreg(s, rd, tcg_single);
159
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
149
tcg_temp_free_i32(tcg_single);
160
if (op == 11) {
150
+ break;
161
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
151
+
162
abort();
152
+ case 3: /* float16 */
163
}
153
+ tcg_single = tcg_temp_new_i32();
164
neon_store_reg64(cpu_V0, rd + pass);
154
+ if (is_signed) {
165
- } else {
155
+ gen_helper_vfp_sqtoh(tcg_single, tcg_int,
166
- /* Write back the result. */
156
+ tcg_shift, tcg_fpstatus);
167
- neon_store_reg64(cpu_V0, rd + pass);
157
+ } else {
168
}
158
+ gen_helper_vfp_uqtoh(tcg_single, tcg_int,
159
+ tcg_shift, tcg_fpstatus);
160
+ }
161
+ write_fp_sreg(s, rd, tcg_single);
162
+ tcg_temp_free_i32(tcg_single);
163
+ break;
164
+
165
+ default:
166
+ g_assert_not_reached();
167
}
168
} else {
169
TCGv_i64 tcg_int = cpu_reg(s, rd);
170
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
171
172
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
173
174
- if (is_double) {
175
- TCGv_i64 tcg_double = read_fp_dreg(s, rn);
176
+ switch (type) {
177
+ case 1: /* float64 */
178
+ tcg_double = read_fp_dreg(s, rn);
179
if (is_signed) {
180
if (!sf) {
181
gen_helper_vfp_tosld(tcg_int, tcg_double,
182
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
183
tcg_shift, tcg_fpstatus);
184
}
169
}
185
}
170
} else {
186
+ if (!sf) {
187
+ tcg_gen_ext32u_i64(tcg_int, tcg_int);
188
+ }
189
tcg_temp_free_i64(tcg_double);
190
- } else {
191
- TCGv_i32 tcg_single = read_fp_sreg(s, rn);
192
+ break;
193
+
194
+ case 0: /* float32 */
195
+ tcg_single = read_fp_sreg(s, rn);
196
if (sf) {
197
if (is_signed) {
198
gen_helper_vfp_tosqs(tcg_int, tcg_single,
199
@@ -XXX,XX +XXX,XX @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
200
tcg_temp_free_i32(tcg_dest);
201
}
202
tcg_temp_free_i32(tcg_single);
203
+ break;
204
+
205
+ case 3: /* float16 */
206
+ tcg_single = read_fp_sreg(s, rn);
207
+ if (sf) {
208
+ if (is_signed) {
209
+ gen_helper_vfp_tosqh(tcg_int, tcg_single,
210
+ tcg_shift, tcg_fpstatus);
211
+ } else {
212
+ gen_helper_vfp_touqh(tcg_int, tcg_single,
213
+ tcg_shift, tcg_fpstatus);
214
+ }
215
+ } else {
216
+ TCGv_i32 tcg_dest = tcg_temp_new_i32();
217
+ if (is_signed) {
218
+ gen_helper_vfp_toslh(tcg_dest, tcg_single,
219
+ tcg_shift, tcg_fpstatus);
220
+ } else {
221
+ gen_helper_vfp_toulh(tcg_dest, tcg_single,
222
+ tcg_shift, tcg_fpstatus);
223
+ }
224
+ tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
225
+ tcg_temp_free_i32(tcg_dest);
226
+ }
227
+ tcg_temp_free_i32(tcg_single);
228
+ break;
229
+
230
+ default:
231
+ g_assert_not_reached();
232
}
233
234
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
235
tcg_temp_free_i32(tcg_rmode);
236
-
237
- if (!sf) {
238
- tcg_gen_ext32u_i64(tcg_int, tcg_int);
239
- }
240
}
241
242
tcg_temp_free_ptr(tcg_fpstatus);
243
@@ -XXX,XX +XXX,XX @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
244
/* actual FP conversions */
245
bool itof = extract32(opcode, 1, 1);
246
247
- if (type > 1 || (rmode != 0 && opcode > 1)) {
248
+ if (rmode != 0 && opcode > 1) {
249
+ unallocated_encoding(s);
250
+ return;
251
+ }
252
+ switch (type) {
253
+ case 0: /* float32 */
254
+ case 1: /* float64 */
255
+ break;
256
+ case 3: /* float16 */
257
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
258
+ break;
259
+ }
260
+ /* fallthru */
261
+ default:
262
unallocated_encoding(s);
263
return;
264
}
265
--
171
--
266
2.17.0
172
2.20.1
267
173
268
174
diff view generated by jsdifflib
New patch
1
1
Convert the Neon 3-reg-diff insns VQDMULL, VQDMLAL and VQDMLSL:
2
these are all saturating doubling long multiplies with a possible
3
accumulate step.
4
5
These are the last insns in the group which use the pass-over-each
6
elements loop, so we can delete that code.
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/arm/neon-dp.decode | 6 +++
12
target/arm/translate-neon.inc.c | 82 +++++++++++++++++++++++++++++++++
13
target/arm/translate.c | 59 ++----------------------
14
3 files changed, 92 insertions(+), 55 deletions(-)
15
16
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/neon-dp.decode
19
+++ b/target/arm/neon-dp.decode
20
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
21
VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
22
VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
23
24
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
25
+
26
VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
27
VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
28
29
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
30
+
31
VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
32
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
33
+
34
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
35
]
36
}
37
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-neon.inc.c
40
+++ b/target/arm/translate-neon.inc.c
41
@@ -XXX,XX +XXX,XX @@ DO_VMLAL(VMLAL_S,mull_s,add)
42
DO_VMLAL(VMLAL_U,mull_u,add)
43
DO_VMLAL(VMLSL_S,mull_s,sub)
44
DO_VMLAL(VMLSL_U,mull_u,sub)
45
+
46
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
47
+{
48
+ gen_helper_neon_mull_s16(rd, rn, rm);
49
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
50
+}
51
+
52
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
53
+{
54
+ gen_mull_s32(rd, rn, rm);
55
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
56
+}
57
+
58
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
59
+{
60
+ static NeonGenTwoOpWidenFn * const opfn[] = {
61
+ NULL,
62
+ gen_VQDMULL_16,
63
+ gen_VQDMULL_32,
64
+ NULL,
65
+ };
66
+
67
+ return do_long_3d(s, a, opfn[a->size], NULL);
68
+}
69
+
70
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
71
+{
72
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
73
+}
74
+
75
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
76
+{
77
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
78
+}
79
+
80
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
81
+{
82
+ static NeonGenTwoOpWidenFn * const opfn[] = {
83
+ NULL,
84
+ gen_VQDMULL_16,
85
+ gen_VQDMULL_32,
86
+ NULL,
87
+ };
88
+ static NeonGenTwo64OpFn * const accfn[] = {
89
+ NULL,
90
+ gen_VQDMLAL_acc_16,
91
+ gen_VQDMLAL_acc_32,
92
+ NULL,
93
+ };
94
+
95
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
96
+}
97
+
98
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
99
+{
100
+ gen_helper_neon_negl_u32(rm, rm);
101
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
102
+}
103
+
104
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
105
+{
106
+ tcg_gen_neg_i64(rm, rm);
107
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
108
+}
109
+
110
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
111
+{
112
+ static NeonGenTwoOpWidenFn * const opfn[] = {
113
+ NULL,
114
+ gen_VQDMULL_16,
115
+ gen_VQDMULL_32,
116
+ NULL,
117
+ };
118
+ static NeonGenTwo64OpFn * const accfn[] = {
119
+ NULL,
120
+ gen_VQDMLSL_acc_16,
121
+ gen_VQDMLSL_acc_32,
122
+ NULL,
123
+ };
124
+
125
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
126
+}
127
diff --git a/target/arm/translate.c b/target/arm/translate.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/target/arm/translate.c
130
+++ b/target/arm/translate.c
131
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
132
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
133
{0, 0, 0, 7}, /* VABDL */
134
{0, 0, 0, 7}, /* VMLAL */
135
- {0, 0, 0, 9}, /* VQDMLAL */
136
+ {0, 0, 0, 7}, /* VQDMLAL */
137
{0, 0, 0, 7}, /* VMLSL */
138
- {0, 0, 0, 9}, /* VQDMLSL */
139
+ {0, 0, 0, 7}, /* VQDMLSL */
140
{0, 0, 0, 7}, /* Integer VMULL */
141
- {0, 0, 0, 9}, /* VQDMULL */
142
+ {0, 0, 0, 7}, /* VQDMULL */
143
{0, 0, 0, 0xa}, /* Polynomial VMULL */
144
{0, 0, 0, 7}, /* Reserved: always UNDEF */
145
};
146
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
147
}
148
return 0;
149
}
150
-
151
- /* Avoid overlapping operands. Wide source operands are
152
- always aligned so will never overlap with wide
153
- destinations in problematic ways. */
154
- if (rd == rm) {
155
- tmp = neon_load_reg(rm, 1);
156
- neon_store_scratch(2, tmp);
157
- } else if (rd == rn) {
158
- tmp = neon_load_reg(rn, 1);
159
- neon_store_scratch(2, tmp);
160
- }
161
- tmp3 = NULL;
162
- for (pass = 0; pass < 2; pass++) {
163
- if (pass == 1 && rd == rn) {
164
- tmp = neon_load_scratch(2);
165
- } else {
166
- tmp = neon_load_reg(rn, pass);
167
- }
168
- if (pass == 1 && rd == rm) {
169
- tmp2 = neon_load_scratch(2);
170
- } else {
171
- tmp2 = neon_load_reg(rm, pass);
172
- }
173
- switch (op) {
174
- case 9: case 11: case 13:
175
- /* VQDMLAL, VQDMLSL, VQDMULL */
176
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
177
- break;
178
- default: /* 15 is RESERVED: caught earlier */
179
- abort();
180
- }
181
- if (op == 13) {
182
- /* VQDMULL */
183
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
184
- neon_store_reg64(cpu_V0, rd + pass);
185
- } else {
186
- /* Accumulate. */
187
- neon_load_reg64(cpu_V1, rd + pass);
188
- switch (op) {
189
- case 9: case 11: /* VQDMLAL, VQDMLSL */
190
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
191
- if (op == 11) {
192
- gen_neon_negl(cpu_V0, size);
193
- }
194
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
195
- break;
196
- default:
197
- abort();
198
- }
199
- neon_store_reg64(cpu_V0, rd + pass);
200
- }
201
- }
202
+ abort(); /* all others handled by decodetree */
203
} else {
204
/* Two registers and a scalar. NB that for ops of this form
205
* the ARM ARM labels bit 24 as Q, but it is in our variable
206
--
207
2.20.1
208
209
diff view generated by jsdifflib
1
Usually the logging of the CPU state produced by -d cpu is sufficient
1
Convert the Neon 3-reg-diff insn polynomial VMULL. This is the last
2
to diagnose problems, but sometimes you want to see the state of
2
insn in this group to be converted.
3
the floating point registers as well. We don't want to enable that
4
by default as it adds a lot of extra data to the log; instead,
5
allow it to be optionally enabled via -d fpu.
6
3
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20180510130024.31678-1-peter.maydell@linaro.org
10
---
6
---
11
include/qemu/log.h | 1 +
7
target/arm/neon-dp.decode | 2 ++
12
accel/tcg/cpu-exec.c | 9 ++++++---
8
target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++
13
util/log.c | 2 ++
9
target/arm/translate.c | 60 ++-------------------------------
14
3 files changed, 9 insertions(+), 3 deletions(-)
10
3 files changed, 48 insertions(+), 57 deletions(-)
15
11
16
diff --git a/include/qemu/log.h b/include/qemu/log.h
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/include/qemu/log.h
14
--- a/target/arm/neon-dp.decode
19
+++ b/include/qemu/log.h
15
+++ b/target/arm/neon-dp.decode
20
@@ -XXX,XX +XXX,XX @@ static inline bool qemu_log_separate(void)
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
21
#define CPU_LOG_PAGE (1 << 14)
17
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
22
/* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
18
23
#define CPU_LOG_TB_OP_IND (1 << 16)
19
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
24
+#define CPU_LOG_TB_FPU (1 << 17)
20
+
25
21
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
26
/* Lock output for a series of related logs. Since this is not needed
22
]
27
* for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
23
}
28
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
24
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
29
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
30
--- a/accel/tcg/cpu-exec.c
26
--- a/target/arm/translate-neon.inc.c
31
+++ b/accel/tcg/cpu-exec.c
27
+++ b/target/arm/translate-neon.inc.c
32
@@ -XXX,XX +XXX,XX @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
28
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
33
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
29
34
&& qemu_log_in_addr_range(itb->pc)) {
30
return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
35
qemu_log_lock();
31
}
36
+ int flags = 0;
32
+
37
+ if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
33
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
38
+ flags |= CPU_DUMP_FPU;
34
+{
35
+ gen_helper_gvec_3 *fn_gvec;
36
+
37
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
38
+ return false;
39
+ }
40
+
41
+ /* UNDEF accesses to D16-D31 if they don't exist. */
42
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
43
+ ((a->vd | a->vn | a->vm) & 0x10)) {
44
+ return false;
45
+ }
46
+
47
+ if (a->vd & 1) {
48
+ return false;
49
+ }
50
+
51
+ switch (a->size) {
52
+ case 0:
53
+ fn_gvec = gen_helper_neon_pmull_h;
54
+ break;
55
+ case 2:
56
+ if (!dc_isar_feature(aa32_pmull, s)) {
57
+ return false;
39
+ }
58
+ }
40
#if defined(TARGET_I386)
59
+ fn_gvec = gen_helper_gvec_pmull_q;
41
- log_cpu_state(cpu, CPU_DUMP_CCOP);
60
+ break;
42
-#else
61
+ default:
43
- log_cpu_state(cpu, 0);
62
+ return false;
44
+ flags |= CPU_DUMP_CCOP;
63
+ }
45
#endif
64
+
46
+ log_cpu_state(cpu, flags);
65
+ if (!vfp_access_check(s)) {
47
qemu_log_unlock();
66
+ return true;
48
}
67
+ }
49
#endif /* DEBUG_DISAS */
68
+
50
diff --git a/util/log.c b/util/log.c
69
+ tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
70
+ neon_reg_offset(a->vn, 0),
71
+ neon_reg_offset(a->vm, 0),
72
+ 16, 16, 0, fn_gvec);
73
+ return true;
74
+}
75
diff --git a/target/arm/translate.c b/target/arm/translate.c
51
index XXXXXXX..XXXXXXX 100644
76
index XXXXXXX..XXXXXXX 100644
52
--- a/util/log.c
77
--- a/target/arm/translate.c
53
+++ b/util/log.c
78
+++ b/target/arm/translate.c
54
@@ -XXX,XX +XXX,XX @@ const QEMULogItem qemu_log_items[] = {
79
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
55
"show trace before each executed TB (lots of logs)" },
80
{
56
{ CPU_LOG_TB_CPU, "cpu",
81
int op;
57
"show CPU registers before entering a TB (lots of logs)" },
82
int q;
58
+ { CPU_LOG_TB_FPU, "fpu",
83
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
59
+ "include FPU registers in the 'cpu' logging" },
84
+ int rd, rn, rm, rd_ofs, rm_ofs;
60
{ CPU_LOG_MMU, "mmu",
85
int size;
61
"log MMU-related activities" },
86
int pass;
62
{ CPU_LOG_PCALL, "pcall",
87
int u;
88
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
89
size = (insn >> 20) & 3;
90
vec_size = q ? 16 : 8;
91
rd_ofs = neon_reg_offset(rd, 0);
92
- rn_ofs = neon_reg_offset(rn, 0);
93
rm_ofs = neon_reg_offset(rm, 0);
94
95
if ((insn & (1 << 23)) == 0) {
96
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
97
if (size != 3) {
98
op = (insn >> 8) & 0xf;
99
if ((insn & (1 << 6)) == 0) {
100
- /* Three registers of different lengths. */
101
- /* undefreq: bit 0 : UNDEF if size == 0
102
- * bit 1 : UNDEF if size == 1
103
- * bit 2 : UNDEF if size == 2
104
- * bit 3 : UNDEF if U == 1
105
- * Note that [2:0] set implies 'always UNDEF'
106
- */
107
- int undefreq;
108
- /* prewiden, src1_wide, src2_wide, undefreq */
109
- static const int neon_3reg_wide[16][4] = {
110
- {0, 0, 0, 7}, /* VADDL: handled by decodetree */
111
- {0, 0, 0, 7}, /* VADDW: handled by decodetree */
112
- {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
113
- {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
114
- {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
115
- {0, 0, 0, 7}, /* VABAL */
116
- {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
117
- {0, 0, 0, 7}, /* VABDL */
118
- {0, 0, 0, 7}, /* VMLAL */
119
- {0, 0, 0, 7}, /* VQDMLAL */
120
- {0, 0, 0, 7}, /* VMLSL */
121
- {0, 0, 0, 7}, /* VQDMLSL */
122
- {0, 0, 0, 7}, /* Integer VMULL */
123
- {0, 0, 0, 7}, /* VQDMULL */
124
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
125
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
126
- };
127
-
128
- undefreq = neon_3reg_wide[op][3];
129
-
130
- if ((undefreq & (1 << size)) ||
131
- ((undefreq & 8) && u)) {
132
- return 1;
133
- }
134
- if (rd & 1) {
135
- return 1;
136
- }
137
-
138
- /* Handle polynomial VMULL in a single pass. */
139
- if (op == 14) {
140
- if (size == 0) {
141
- /* VMULL.P8 */
142
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
143
- 0, gen_helper_neon_pmull_h);
144
- } else {
145
- /* VMULL.P64 */
146
- if (!dc_isar_feature(aa32_pmull, s)) {
147
- return 1;
148
- }
149
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
150
- 0, gen_helper_gvec_pmull_q);
151
- }
152
- return 0;
153
- }
154
- abort(); /* all others handled by decodetree */
155
+ /* Three registers of different lengths: handled by decodetree */
156
+ return 1;
157
} else {
158
/* Two registers and a scalar. NB that for ops of this form
159
* the ARM ARM labels bit 24 as Q, but it is in our variable
63
--
160
--
64
2.17.0
161
2.20.1
65
162
66
163
diff view generated by jsdifflib
1
In float-to-integer conversion, if the floating point input
1
Mark the arrays of function pointers in trans_VSHLL_S_2sh() and
2
converts exactly to the largest or smallest integer that
2
trans_VSHLL_U_2sh() as both 'static' and 'const'.
3
fits in to the result type, this is not an overflow.
4
In this situation we were producing the correct result value,
5
but were incorrectly setting the Invalid flag.
6
For example for Arm A64, "FCVTAS w0, d0" on an input of
7
0x41dfffffffc00000 should produce 0x7fffffff and set no flags.
8
3
9
Fix the boundary case to take the right half of the if()
10
statements.
11
12
This fixes a regression from 2.11 introduced by the softfloat
13
refactoring.
14
15
Cc: qemu-stable@nongnu.org
16
Fixes: ab52f973a50
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-id: 20180510140141.12120-1-peter.maydell@linaro.org
20
---
6
---
21
fpu/softfloat.c | 4 ++--
7
target/arm/translate-neon.inc.c | 4 ++--
22
1 file changed, 2 insertions(+), 2 deletions(-)
8
1 file changed, 2 insertions(+), 2 deletions(-)
23
9
24
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
25
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
26
--- a/fpu/softfloat.c
12
--- a/target/arm/translate-neon.inc.c
27
+++ b/fpu/softfloat.c
13
+++ b/target/arm/translate-neon.inc.c
28
@@ -XXX,XX +XXX,XX @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode,
14
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
29
r = UINT64_MAX;
15
30
}
16
static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
31
if (p.sign) {
17
{
32
- if (r < -(uint64_t) min) {
18
- NeonGenWidenFn *widenfn[] = {
33
+ if (r <= -(uint64_t) min) {
19
+ static NeonGenWidenFn * const widenfn[] = {
34
return -r;
20
gen_helper_neon_widen_s8,
35
} else {
21
gen_helper_neon_widen_s16,
36
s->float_exception_flags = orig_flags | float_flag_invalid;
22
tcg_gen_ext_i32_i64,
37
return min;
23
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
38
}
24
39
} else {
25
static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
40
- if (r < max) {
26
{
41
+ if (r <= max) {
27
- NeonGenWidenFn *widenfn[] = {
42
return r;
28
+ static NeonGenWidenFn * const widenfn[] = {
43
} else {
29
gen_helper_neon_widen_u8,
44
s->float_exception_flags = orig_flags | float_flag_invalid;
30
gen_helper_neon_widen_u16,
31
tcg_gen_extu_i32_i64,
45
--
32
--
46
2.17.0
33
2.20.1
47
34
48
35
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
In commit 37bfce81b10450071 we accidentally introduced a leak of a TCG
2
temporary in do_2shift_env_64(); free it.
2
3
3
We are meant to explicitly pass fpst, not cpu_env.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/translate-neon.inc.c | 1 +
8
1 file changed, 1 insertion(+)
4
9
5
Cc: qemu-stable@nongnu.org
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Tested-by: Alex Bennée <alex.bennee@linaro.org>
10
Message-id: 20180512003217.9105-12-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
target/arm/translate-a64.c | 3 ++-
14
1 file changed, 2 insertions(+), 1 deletion(-)
15
16
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.c
12
--- a/target/arm/translate-neon.inc.c
19
+++ b/target/arm/translate-a64.c
13
+++ b/target/arm/translate-neon.inc.c
20
@@ -XXX,XX +XXX,XX @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
14
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
21
tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
15
neon_load_reg64(tmp, a->vm + pass);
22
break;
16
fn(tmp, cpu_env, tmp, constimm);
23
case 0x3: /* FSQRT */
17
neon_store_reg64(tmp, a->vd + pass);
24
- gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
18
+ tcg_temp_free_i64(tmp);
25
+ fpst = get_fpstatus_ptr(true);
19
}
26
+ gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
20
tcg_temp_free_i64(constimm);
27
break;
21
return true;
28
case 0x8: /* FRINTN */
29
case 0x9: /* FRINTP */
30
--
22
--
31
2.17.0
23
2.20.1
32
24
33
25
diff view generated by jsdifflib
New patch
1
1
Convert the VMLA, VMLS and VMUL insns in the Neon "2 registers and a
2
scalar" group to decodetree. These are 32x32->32 operations where
3
one of the inputs is the scalar, followed by a possible accumulate
4
operation of the 32-bit result.
5
6
The refactoring removes some of the oddities of the old decoder:
7
* operands to the operation and accumulation were often
8
reversed (taking advantage of the fact that most of these ops
9
are commutative); the new code follows the pseudocode order
10
* the Q bit in the insn was in a local variable 'u'; in the
11
new code it is decoded into a->q
12
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
target/arm/neon-dp.decode | 15 ++++
17
target/arm/translate-neon.inc.c | 133 ++++++++++++++++++++++++++++++++
18
target/arm/translate.c | 77 ++----------------
19
3 files changed, 154 insertions(+), 71 deletions(-)
20
21
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/neon-dp.decode
24
+++ b/target/arm/neon-dp.decode
25
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
26
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
27
28
VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
29
+
30
+ ##################################################################
31
+ # 2-regs-plus-scalar grouping:
32
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
33
+ ##################################################################
34
+ &2scalar vm vn vd size q
35
+
36
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
37
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
38
+
39
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
40
+
41
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
42
+
43
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
44
]
45
}
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
51
16, 16, 0, fn_gvec);
52
return true;
53
}
54
+
55
+static void gen_neon_dup_low16(TCGv_i32 var)
56
+{
57
+ TCGv_i32 tmp = tcg_temp_new_i32();
58
+ tcg_gen_ext16u_i32(var, var);
59
+ tcg_gen_shli_i32(tmp, var, 16);
60
+ tcg_gen_or_i32(var, var, tmp);
61
+ tcg_temp_free_i32(tmp);
62
+}
63
+
64
+static void gen_neon_dup_high16(TCGv_i32 var)
65
+{
66
+ TCGv_i32 tmp = tcg_temp_new_i32();
67
+ tcg_gen_andi_i32(var, var, 0xffff0000);
68
+ tcg_gen_shri_i32(tmp, var, 16);
69
+ tcg_gen_or_i32(var, var, tmp);
70
+ tcg_temp_free_i32(tmp);
71
+}
72
+
73
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
74
+{
75
+ TCGv_i32 tmp;
76
+ if (size == 1) {
77
+ tmp = neon_load_reg(reg & 7, reg >> 4);
78
+ if (reg & 8) {
79
+ gen_neon_dup_high16(tmp);
80
+ } else {
81
+ gen_neon_dup_low16(tmp);
82
+ }
83
+ } else {
84
+ tmp = neon_load_reg(reg & 15, reg >> 4);
85
+ }
86
+ return tmp;
87
+}
88
+
89
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
90
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
91
+{
92
+ /*
93
+ * Two registers and a scalar: perform an operation between
94
+ * the input elements and the scalar, and then possibly
95
+ * perform an accumulation operation of that result into the
96
+ * destination.
97
+ */
98
+ TCGv_i32 scalar;
99
+ int pass;
100
+
101
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
102
+ return false;
103
+ }
104
+
105
+ /* UNDEF accesses to D16-D31 if they don't exist. */
106
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
107
+ ((a->vd | a->vn | a->vm) & 0x10)) {
108
+ return false;
109
+ }
110
+
111
+ if (!opfn) {
112
+ /* Bad size (including size == 3, which is a different insn group) */
113
+ return false;
114
+ }
115
+
116
+ if (a->q && ((a->vd | a->vn) & 1)) {
117
+ return false;
118
+ }
119
+
120
+ if (!vfp_access_check(s)) {
121
+ return true;
122
+ }
123
+
124
+ scalar = neon_get_scalar(a->size, a->vm);
125
+
126
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
127
+ TCGv_i32 tmp = neon_load_reg(a->vn, pass);
128
+ opfn(tmp, tmp, scalar);
129
+ if (accfn) {
130
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
131
+ accfn(tmp, rd, tmp);
132
+ tcg_temp_free_i32(rd);
133
+ }
134
+ neon_store_reg(a->vd, pass, tmp);
135
+ }
136
+ tcg_temp_free_i32(scalar);
137
+ return true;
138
+}
139
+
140
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
141
+{
142
+ static NeonGenTwoOpFn * const opfn[] = {
143
+ NULL,
144
+ gen_helper_neon_mul_u16,
145
+ tcg_gen_mul_i32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar(s, a, opfn[a->size], NULL);
150
+}
151
+
152
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
153
+{
154
+ static NeonGenTwoOpFn * const opfn[] = {
155
+ NULL,
156
+ gen_helper_neon_mul_u16,
157
+ tcg_gen_mul_i32,
158
+ NULL,
159
+ };
160
+ static NeonGenTwoOpFn * const accfn[] = {
161
+ NULL,
162
+ gen_helper_neon_add_u16,
163
+ tcg_gen_add_i32,
164
+ NULL,
165
+ };
166
+
167
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
168
+}
169
+
170
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
171
+{
172
+ static NeonGenTwoOpFn * const opfn[] = {
173
+ NULL,
174
+ gen_helper_neon_mul_u16,
175
+ tcg_gen_mul_i32,
176
+ NULL,
177
+ };
178
+ static NeonGenTwoOpFn * const accfn[] = {
179
+ NULL,
180
+ gen_helper_neon_sub_u16,
181
+ tcg_gen_sub_i32,
182
+ NULL,
183
+ };
184
+
185
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
186
+}
187
diff --git a/target/arm/translate.c b/target/arm/translate.c
188
index XXXXXXX..XXXXXXX 100644
189
--- a/target/arm/translate.c
190
+++ b/target/arm/translate.c
191
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
192
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
193
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
194
195
-static void gen_neon_dup_low16(TCGv_i32 var)
196
-{
197
- TCGv_i32 tmp = tcg_temp_new_i32();
198
- tcg_gen_ext16u_i32(var, var);
199
- tcg_gen_shli_i32(tmp, var, 16);
200
- tcg_gen_or_i32(var, var, tmp);
201
- tcg_temp_free_i32(tmp);
202
-}
203
-
204
-static void gen_neon_dup_high16(TCGv_i32 var)
205
-{
206
- TCGv_i32 tmp = tcg_temp_new_i32();
207
- tcg_gen_andi_i32(var, var, 0xffff0000);
208
- tcg_gen_shri_i32(tmp, var, 16);
209
- tcg_gen_or_i32(var, var, tmp);
210
- tcg_temp_free_i32(tmp);
211
-}
212
-
213
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
214
{
215
#ifndef CONFIG_USER_ONLY
216
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
217
218
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
219
220
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
221
-{
222
- switch (size) {
223
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
224
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
225
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
226
- default: abort();
227
- }
228
-}
229
-
230
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
231
-{
232
- switch (size) {
233
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
234
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
235
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
236
- default: return;
237
- }
238
-}
239
-
240
static TCGv_i32 neon_load_scratch(int scratch)
241
{
242
TCGv_i32 tmp = tcg_temp_new_i32();
243
@@ -XXX,XX +XXX,XX @@ static void neon_store_scratch(int scratch, TCGv_i32 var)
244
tcg_temp_free_i32(var);
245
}
246
247
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
248
-{
249
- TCGv_i32 tmp;
250
- if (size == 1) {
251
- tmp = neon_load_reg(reg & 7, reg >> 4);
252
- if (reg & 8) {
253
- gen_neon_dup_high16(tmp);
254
- } else {
255
- gen_neon_dup_low16(tmp);
256
- }
257
- } else {
258
- tmp = neon_load_reg(reg & 15, reg >> 4);
259
- }
260
- return tmp;
261
-}
262
-
263
static int gen_neon_unzip(int rd, int rm, int size, int q)
264
{
265
TCGv_ptr pd, pm;
266
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
267
return 1;
268
}
269
switch (op) {
270
+ case 0: /* Integer VMLA scalar */
271
+ case 4: /* Integer VMLS scalar */
272
+ case 8: /* Integer VMUL scalar */
273
+ return 1; /* handled by decodetree */
274
+
275
case 1: /* Float VMLA scalar */
276
case 5: /* Floating point VMLS scalar */
277
case 9: /* Floating point VMUL scalar */
278
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
279
return 1;
280
}
281
/* fall through */
282
- case 0: /* Integer VMLA scalar */
283
- case 4: /* Integer VMLS scalar */
284
- case 8: /* Integer VMUL scalar */
285
case 12: /* VQDMULH scalar */
286
case 13: /* VQRDMULH scalar */
287
if (u && ((rd | rn) & 1)) {
288
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
289
} else {
290
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
291
}
292
- } else if (op & 1) {
293
+ } else {
294
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
295
gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
296
tcg_temp_free_ptr(fpstatus);
297
- } else {
298
- switch (size) {
299
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
300
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
301
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
302
- default: abort();
303
- }
304
}
305
tcg_temp_free_i32(tmp2);
306
if (op < 8) {
307
/* Accumulate. */
308
tmp2 = neon_load_reg(rd, pass);
309
switch (op) {
310
- case 0:
311
- gen_neon_add(size, tmp, tmp2);
312
- break;
313
case 1:
314
{
315
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
316
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
317
tcg_temp_free_ptr(fpstatus);
318
break;
319
}
320
- case 4:
321
- gen_neon_rsb(size, tmp, tmp2);
322
- break;
323
case 5:
324
{
325
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
326
--
327
2.20.1
328
329
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Convert the float versions of VMLA, VMLS and VMUL in the Neon
2
2-reg-scalar group to decodetree.
2
3
3
No sense in emitting code after the exception.
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Tested-by: Alex Bennée <alex.bennee@linaro.org>
7
Message-id: 20180512003217.9105-3-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
5
---
11
target/arm/translate-a64.c | 2 +-
6
As noted in the comment on the WRAP_FP_FN macro, we could have
12
1 file changed, 1 insertion(+), 1 deletion(-)
7
had a do_2scalar_fp() function, but for 3 insns it seemed
8
simpler to just do the wrapping to get hold of the fpstatus ptr.
9
(These are the only fp insns in the group.)
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
target/arm/neon-dp.decode | 3 ++
13
target/arm/translate-neon.inc.c | 65 +++++++++++++++++++++++++++++++++
14
target/arm/translate.c | 37 ++-----------------
15
3 files changed, 71 insertions(+), 34 deletions(-)
13
16
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-a64.c
19
--- a/target/arm/neon-dp.decode
17
+++ b/target/arm/translate-a64.c
20
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
default:
22
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
/* all other sf/type/rmode combinations are invalid */
23
21
unallocated_encoding(s);
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
22
- break;
25
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
23
+ return;
26
24
}
27
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
25
28
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
26
if (!fp_access_check(s)) {
29
30
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
31
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
32
]
33
}
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-neon.inc.c
37
+++ b/target/arm/translate-neon.inc.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
39
40
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
41
}
42
+
43
+/*
44
+ * Rather than have a float-specific version of do_2scalar just for
45
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
46
+ * a NeonGenTwoOpFn.
47
+ */
48
+#define WRAP_FP_FN(WRAPNAME, FUNC) \
49
+ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
50
+ { \
51
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
52
+ FUNC(rd, rn, rm, fpstatus); \
53
+ tcg_temp_free_ptr(fpstatus); \
54
+ }
55
+
56
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
57
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
58
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
59
+
60
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
61
+{
62
+ static NeonGenTwoOpFn * const opfn[] = {
63
+ NULL,
64
+ NULL, /* TODO: fp16 support */
65
+ gen_VMUL_F_mul,
66
+ NULL,
67
+ };
68
+
69
+ return do_2scalar(s, a, opfn[a->size], NULL);
70
+}
71
+
72
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
73
+{
74
+ static NeonGenTwoOpFn * const opfn[] = {
75
+ NULL,
76
+ NULL, /* TODO: fp16 support */
77
+ gen_VMUL_F_mul,
78
+ NULL,
79
+ };
80
+ static NeonGenTwoOpFn * const accfn[] = {
81
+ NULL,
82
+ NULL, /* TODO: fp16 support */
83
+ gen_VMUL_F_add,
84
+ NULL,
85
+ };
86
+
87
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
88
+}
89
+
90
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
91
+{
92
+ static NeonGenTwoOpFn * const opfn[] = {
93
+ NULL,
94
+ NULL, /* TODO: fp16 support */
95
+ gen_VMUL_F_mul,
96
+ NULL,
97
+ };
98
+ static NeonGenTwoOpFn * const accfn[] = {
99
+ NULL,
100
+ NULL, /* TODO: fp16 support */
101
+ gen_VMUL_F_sub,
102
+ NULL,
103
+ };
104
+
105
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 0: /* Integer VMLA scalar */
113
case 4: /* Integer VMLS scalar */
114
case 8: /* Integer VMUL scalar */
115
- return 1; /* handled by decodetree */
116
-
117
case 1: /* Float VMLA scalar */
118
case 5: /* Floating point VMLS scalar */
119
case 9: /* Floating point VMUL scalar */
120
- if (size == 1) {
121
- return 1;
122
- }
123
- /* fall through */
124
+ return 1; /* handled by decodetree */
125
+
126
case 12: /* VQDMULH scalar */
127
case 13: /* VQRDMULH scalar */
128
if (u && ((rd | rn) & 1)) {
129
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
130
} else {
131
gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
132
}
133
- } else if (op == 13) {
134
+ } else {
135
if (size == 1) {
136
gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
137
} else {
138
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
139
}
140
- } else {
141
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
142
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
143
- tcg_temp_free_ptr(fpstatus);
144
}
145
tcg_temp_free_i32(tmp2);
146
- if (op < 8) {
147
- /* Accumulate. */
148
- tmp2 = neon_load_reg(rd, pass);
149
- switch (op) {
150
- case 1:
151
- {
152
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
153
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
154
- tcg_temp_free_ptr(fpstatus);
155
- break;
156
- }
157
- case 5:
158
- {
159
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
160
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
161
- tcg_temp_free_ptr(fpstatus);
162
- break;
163
- }
164
- default:
165
- abort();
166
- }
167
- tcg_temp_free_i32(tmp2);
168
- }
169
neon_store_reg(rd, pass, tmp);
170
}
171
break;
27
--
172
--
28
2.17.0
173
2.20.1
29
174
30
175
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Convert the VQDMULH and VQRDMULH insns in the 2-reg-scalar group
2
to decodetree.
2
3
3
Cc: qemu-stable@nongnu.org
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Tested-by: Alex Bennée <alex.bennee@linaro.org>
7
Message-id: 20180512003217.9105-6-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
target/arm/translate-a64.c | 30 ++++++++++++++----------------
7
target/arm/neon-dp.decode | 3 +++
11
1 file changed, 14 insertions(+), 16 deletions(-)
8
target/arm/translate-neon.inc.c | 29 +++++++++++++++++++++++
9
target/arm/translate.c | 42 ++-------------------------------
10
3 files changed, 34 insertions(+), 40 deletions(-)
12
11
13
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-a64.c
14
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/translate-a64.c
15
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
18
return v;
17
18
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
19
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
20
+
21
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
22
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
23
]
19
}
24
}
20
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
21
+static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
28
+++ b/target/arm/translate-neon.inc.c
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
30
31
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
32
}
33
+
34
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
35
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
36
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
37
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
38
+
39
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
22
+{
40
+{
23
+ TCGv_i32 v = tcg_temp_new_i32();
41
+ static NeonGenTwoOpFn * const opfn[] = {
42
+ NULL,
43
+ gen_VQDMULH_16,
44
+ gen_VQDMULH_32,
45
+ NULL,
46
+ };
24
+
47
+
25
+ tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
48
+ return do_2scalar(s, a, opfn[a->size], NULL);
26
+ return v;
27
+}
49
+}
28
+
50
+
29
/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
51
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
30
* If SVE is not enabled, then there are only 128 bits in the vector.
52
+{
31
*/
53
+ static NeonGenTwoOpFn * const opfn[] = {
32
@@ -XXX,XX +XXX,XX @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
54
+ NULL,
33
static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
55
+ gen_VQRDMULH_16,
56
+ gen_VQRDMULH_32,
57
+ NULL,
58
+ };
59
+
60
+ return do_2scalar(s, a, opfn[a->size], NULL);
61
+}
62
diff --git a/target/arm/translate.c b/target/arm/translate.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/translate.c
65
+++ b/target/arm/translate.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
67
68
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
69
70
-static TCGv_i32 neon_load_scratch(int scratch)
71
-{
72
- TCGv_i32 tmp = tcg_temp_new_i32();
73
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
74
- return tmp;
75
-}
76
-
77
-static void neon_store_scratch(int scratch, TCGv_i32 var)
78
-{
79
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
80
- tcg_temp_free_i32(var);
81
-}
82
-
83
static int gen_neon_unzip(int rd, int rm, int size, int q)
34
{
84
{
35
TCGv_ptr fpst = NULL;
85
TCGv_ptr pd, pm;
36
- TCGv_i32 tcg_op = tcg_temp_new_i32();
86
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
37
+ TCGv_i32 tcg_op = read_fp_hreg(s, rn);
87
case 1: /* Float VMLA scalar */
38
TCGv_i32 tcg_res = tcg_temp_new_i32();
88
case 5: /* Floating point VMLS scalar */
39
89
case 9: /* Floating point VMUL scalar */
40
- read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
90
- return 1; /* handled by decodetree */
41
-
91
-
42
switch (opcode) {
92
case 12: /* VQDMULH scalar */
43
case 0x0: /* FMOV */
93
case 13: /* VQRDMULH scalar */
44
tcg_gen_mov_i32(tcg_res, tcg_op);
94
- if (u && ((rd | rn) & 1)) {
45
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
95
- return 1;
46
tcg_temp_free_i64(tcg_op2);
96
- }
47
tcg_temp_free_i64(tcg_res);
97
- tmp = neon_get_scalar(size, rm);
48
} else {
98
- neon_store_scratch(0, tmp);
49
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
99
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
50
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
100
- tmp = neon_load_scratch(0);
51
+ TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
101
- tmp2 = neon_load_reg(rn, pass);
52
+ TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
102
- if (op == 12) {
53
TCGv_i64 tcg_res = tcg_temp_new_i64();
103
- if (size == 1) {
54
104
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
55
- read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
105
- } else {
56
- read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
106
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
57
-
107
- }
58
gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
108
- } else {
59
gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
109
- if (size == 1) {
60
110
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
61
@@ -XXX,XX +XXX,XX @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
111
- } else {
62
112
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
63
fpst = get_fpstatus_ptr(true);
113
- }
64
114
- }
65
- tcg_op1 = tcg_temp_new_i32();
115
- tcg_temp_free_i32(tmp2);
66
- tcg_op2 = tcg_temp_new_i32();
116
- neon_store_reg(rd, pass, tmp);
67
+ tcg_op1 = read_fp_hreg(s, rn);
117
- }
68
+ tcg_op2 = read_fp_hreg(s, rm);
118
- break;
69
tcg_res = tcg_temp_new_i32();
119
+ return 1; /* handled by decodetree */
70
120
+
71
- read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
121
case 3: /* VQDMLAL scalar */
72
- read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
122
case 7: /* VQDMLSL scalar */
73
-
123
case 11: /* VQDMULL scalar */
74
switch (fpopcode) {
75
case 0x03: /* FMULX */
76
gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
77
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
78
}
79
80
if (is_scalar) {
81
- TCGv_i32 tcg_op = tcg_temp_new_i32();
82
+ TCGv_i32 tcg_op = read_fp_hreg(s, rn);
83
TCGv_i32 tcg_res = tcg_temp_new_i32();
84
85
- read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
86
-
87
switch (fpop) {
88
case 0x1a: /* FCVTNS */
89
case 0x1b: /* FCVTMS */
90
--
124
--
91
2.17.0
125
2.20.1
92
126
93
127
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Convert the VQRDMLAH and VQRDMLSH insns in the 2-reg-scalar
2
group to decodetree.
2
3
3
We missed all of the scalar fp16 fma operations.
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/neon-dp.decode | 3 ++
8
target/arm/translate-neon.inc.c | 74 +++++++++++++++++++++++++++++++++
9
target/arm/translate.c | 38 +----------------
10
3 files changed, 79 insertions(+), 36 deletions(-)
4
11
5
Cc: qemu-stable@nongnu.org
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Tested-by: Alex Bennée <alex.bennee@linaro.org>
9
Message-id: 20180512003217.9105-8-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/translate-a64.c | 48 ++++++++++++++++++++++++++++++++++++++
13
1 file changed, 48 insertions(+)
14
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
14
--- a/target/arm/neon-dp.decode
18
+++ b/target/arm/translate-a64.c
15
+++ b/target/arm/neon-dp.decode
19
@@ -XXX,XX +XXX,XX @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
tcg_temp_free_i64(tcg_res);
17
18
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
19
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
20
+
21
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
22
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
23
]
21
}
24
}
22
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
23
+/* Floating-point data-processing (3 source) - half precision */
26
index XXXXXXX..XXXXXXX 100644
24
+static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
27
--- a/target/arm/translate-neon.inc.c
25
+ int rd, int rn, int rm, int ra)
28
+++ b/target/arm/translate-neon.inc.c
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
30
31
return do_2scalar(s, a, opfn[a->size], NULL);
32
}
33
+
34
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
35
+ NeonGenThreeOpEnvFn *opfn)
26
+{
36
+{
27
+ TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
37
+ /*
28
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
38
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
29
+ TCGv_ptr fpst = get_fpstatus_ptr(true);
39
+ * performs a kind of fused op-then-accumulate using a helper
40
+ * function that takes all of rd, rn and the scalar at once.
41
+ */
42
+ TCGv_i32 scalar;
43
+ int pass;
30
+
44
+
31
+ tcg_op1 = read_fp_hreg(s, rn);
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
32
+ tcg_op2 = read_fp_hreg(s, rm);
46
+ return false;
33
+ tcg_op3 = read_fp_hreg(s, ra);
34
+
35
+ /* These are fused multiply-add, and must be done as one
36
+ * floating point operation with no rounding between the
37
+ * multiplication and addition steps.
38
+ * NB that doing the negations here as separate steps is
39
+ * correct : an input NaN should come out with its sign bit
40
+ * flipped if it is a negated-input.
41
+ */
42
+ if (o1 == true) {
43
+ tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
44
+ }
47
+ }
45
+
48
+
46
+ if (o0 != o1) {
49
+ if (!dc_isar_feature(aa32_rdm, s)) {
47
+ tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
50
+ return false;
48
+ }
51
+ }
49
+
52
+
50
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
53
+ /* UNDEF accesses to D16-D31 if they don't exist. */
54
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
55
+ ((a->vd | a->vn | a->vm) & 0x10)) {
56
+ return false;
57
+ }
51
+
58
+
52
+ write_fp_sreg(s, rd, tcg_res);
59
+ if (!opfn) {
60
+ /* Bad size (including size == 3, which is a different insn group) */
61
+ return false;
62
+ }
53
+
63
+
54
+ tcg_temp_free_ptr(fpst);
64
+ if (a->q && ((a->vd | a->vn) & 1)) {
55
+ tcg_temp_free_i32(tcg_op1);
65
+ return false;
56
+ tcg_temp_free_i32(tcg_op2);
66
+ }
57
+ tcg_temp_free_i32(tcg_op3);
67
+
58
+ tcg_temp_free_i32(tcg_res);
68
+ if (!vfp_access_check(s)) {
69
+ return true;
70
+ }
71
+
72
+ scalar = neon_get_scalar(a->size, a->vm);
73
+
74
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
75
+ TCGv_i32 rn = neon_load_reg(a->vn, pass);
76
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
77
+ opfn(rd, cpu_env, rn, scalar, rd);
78
+ tcg_temp_free_i32(rn);
79
+ neon_store_reg(a->vd, pass, rd);
80
+ }
81
+ tcg_temp_free_i32(scalar);
82
+
83
+ return true;
59
+}
84
+}
60
+
85
+
61
/* Floating point data-processing (3 source)
86
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
62
* 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
87
+{
63
* +---+---+---+-----------+------+----+------+----+------+------+------+
88
+ static NeonGenThreeOpEnvFn *opfn[] = {
64
@@ -XXX,XX +XXX,XX @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
89
+ NULL,
65
}
90
+ gen_helper_neon_qrdmlah_s16,
66
handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
91
+ gen_helper_neon_qrdmlah_s32,
67
break;
92
+ NULL,
68
+ case 3:
93
+ };
69
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
94
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
70
+ unallocated_encoding(s);
95
+}
71
+ return;
96
+
72
+ }
97
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
73
+ if (!fp_access_check(s)) {
98
+{
74
+ return;
99
+ static NeonGenThreeOpEnvFn *opfn[] = {
75
+ }
100
+ NULL,
76
+ handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
101
+ gen_helper_neon_qrdmlsh_s16,
77
+ break;
102
+ gen_helper_neon_qrdmlsh_s32,
78
default:
103
+ NULL,
79
unallocated_encoding(s);
104
+ };
80
}
105
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 9: /* Floating point VMUL scalar */
113
case 12: /* VQDMULH scalar */
114
case 13: /* VQRDMULH scalar */
115
+ case 14: /* VQRDMLAH scalar */
116
+ case 15: /* VQRDMLSH scalar */
117
return 1; /* handled by decodetree */
118
119
case 3: /* VQDMLAL scalar */
120
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
121
neon_store_reg64(cpu_V0, rd + pass);
122
}
123
break;
124
- case 14: /* VQRDMLAH scalar */
125
- case 15: /* VQRDMLSH scalar */
126
- {
127
- NeonGenThreeOpEnvFn *fn;
128
-
129
- if (!dc_isar_feature(aa32_rdm, s)) {
130
- return 1;
131
- }
132
- if (u && ((rd | rn) & 1)) {
133
- return 1;
134
- }
135
- if (op == 14) {
136
- if (size == 1) {
137
- fn = gen_helper_neon_qrdmlah_s16;
138
- } else {
139
- fn = gen_helper_neon_qrdmlah_s32;
140
- }
141
- } else {
142
- if (size == 1) {
143
- fn = gen_helper_neon_qrdmlsh_s16;
144
- } else {
145
- fn = gen_helper_neon_qrdmlsh_s32;
146
- }
147
- }
148
-
149
- tmp2 = neon_get_scalar(size, rm);
150
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
151
- tmp = neon_load_reg(rn, pass);
152
- tmp3 = neon_load_reg(rd, pass);
153
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
154
- tcg_temp_free_i32(tmp3);
155
- neon_store_reg(rd, pass, tmp);
156
- }
157
- tcg_temp_free_i32(tmp2);
158
- }
159
- break;
160
default:
161
g_assert_not_reached();
162
}
81
--
163
--
82
2.17.0
164
2.20.1
83
165
84
166
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Convert the Neon 2-reg-scalar long multiplies to decodetree.
2
These are the last instructions in the group.
2
3
3
Per the Physical Layer Simplified Spec. "4.3.10.4 Switch Function Status":
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/neon-dp.decode | 18 ++++
8
target/arm/translate-neon.inc.c | 163 ++++++++++++++++++++++++++++
9
target/arm/translate.c | 182 ++------------------------------
10
3 files changed, 187 insertions(+), 176 deletions(-)
4
11
5
The block length is predefined to 512 bits
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
6
7
and "4.10.2 SD Status":
8
9
The SD Status contains status bits that are related to the SD Memory Card
10
proprietary features and may be used for future application-specific usage.
11
The size of the SD Status is one data block of 512 bit. The content of this
12
register is transmitted to the Host over the DAT bus along with a 16-bit CRC.
13
14
Thus the 16-bit CRC goes at offset 64.
15
16
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
17
Message-id: 20180509060104.4458-3-f4bug@amsat.org
18
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
---
21
hw/sd/sd.c | 2 +-
22
1 file changed, 1 insertion(+), 1 deletion(-)
23
24
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
25
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/sd/sd.c
14
--- a/target/arm/neon-dp.decode
27
+++ b/hw/sd/sd.c
15
+++ b/target/arm/neon-dp.decode
28
@@ -XXX,XX +XXX,XX @@ static void sd_function_switch(SDState *sd, uint32_t arg)
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
29
sd->data[14 + (i >> 1)] = new_func << ((i * 4) & 4);
17
18
@2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
19
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
+ # For the 'long' ops the Q bit is part of insn decode
21
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
22
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
23
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
25
VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
26
27
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
28
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
29
+
30
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
31
+
32
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
33
VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
34
35
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
36
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
37
+
38
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
39
+
40
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
41
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
42
43
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
44
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
45
+
46
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
47
+
48
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
49
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
50
51
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/translate-neon.inc.c
54
+++ b/target/arm/translate-neon.inc.c
55
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
56
};
57
return do_vqrdmlah_2sc(s, a, opfn[a->size]);
58
}
59
+
60
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
61
+ NeonGenTwoOpWidenFn *opfn,
62
+ NeonGenTwo64OpFn *accfn)
63
+{
64
+ /*
65
+ * Two registers and a scalar, long operations: perform an
66
+ * operation on the input elements and the scalar which produces
67
+ * a double-width result, and then possibly perform an accumulation
68
+ * operation of that result into the destination.
69
+ */
70
+ TCGv_i32 scalar, rn;
71
+ TCGv_i64 rn0_64, rn1_64;
72
+
73
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
74
+ return false;
75
+ }
76
+
77
+ /* UNDEF accesses to D16-D31 if they don't exist. */
78
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
79
+ ((a->vd | a->vn | a->vm) & 0x10)) {
80
+ return false;
81
+ }
82
+
83
+ if (!opfn) {
84
+ /* Bad size (including size == 3, which is a different insn group) */
85
+ return false;
86
+ }
87
+
88
+ if (a->vd & 1) {
89
+ return false;
90
+ }
91
+
92
+ if (!vfp_access_check(s)) {
93
+ return true;
94
+ }
95
+
96
+ scalar = neon_get_scalar(a->size, a->vm);
97
+
98
+ /* Load all inputs before writing any outputs, in case of overlap */
99
+ rn = neon_load_reg(a->vn, 0);
100
+ rn0_64 = tcg_temp_new_i64();
101
+ opfn(rn0_64, rn, scalar);
102
+ tcg_temp_free_i32(rn);
103
+
104
+ rn = neon_load_reg(a->vn, 1);
105
+ rn1_64 = tcg_temp_new_i64();
106
+ opfn(rn1_64, rn, scalar);
107
+ tcg_temp_free_i32(rn);
108
+ tcg_temp_free_i32(scalar);
109
+
110
+ if (accfn) {
111
+ TCGv_i64 t64 = tcg_temp_new_i64();
112
+ neon_load_reg64(t64, a->vd);
113
+ accfn(t64, t64, rn0_64);
114
+ neon_store_reg64(t64, a->vd);
115
+ neon_load_reg64(t64, a->vd + 1);
116
+ accfn(t64, t64, rn1_64);
117
+ neon_store_reg64(t64, a->vd + 1);
118
+ tcg_temp_free_i64(t64);
119
+ } else {
120
+ neon_store_reg64(rn0_64, a->vd);
121
+ neon_store_reg64(rn1_64, a->vd + 1);
122
+ }
123
+ tcg_temp_free_i64(rn0_64);
124
+ tcg_temp_free_i64(rn1_64);
125
+ return true;
126
+}
127
+
128
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
129
+{
130
+ static NeonGenTwoOpWidenFn * const opfn[] = {
131
+ NULL,
132
+ gen_helper_neon_mull_s16,
133
+ gen_mull_s32,
134
+ NULL,
135
+ };
136
+
137
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
138
+}
139
+
140
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
141
+{
142
+ static NeonGenTwoOpWidenFn * const opfn[] = {
143
+ NULL,
144
+ gen_helper_neon_mull_u16,
145
+ gen_mull_u32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
150
+}
151
+
152
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
153
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
154
+ { \
155
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
156
+ NULL, \
157
+ gen_helper_neon_##MULL##16, \
158
+ gen_##MULL##32, \
159
+ NULL, \
160
+ }; \
161
+ static NeonGenTwo64OpFn * const accfn[] = { \
162
+ NULL, \
163
+ gen_helper_neon_##ACC##l_u32, \
164
+ tcg_gen_##ACC##_i64, \
165
+ NULL, \
166
+ }; \
167
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
168
+ }
169
+
170
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
171
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
172
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
173
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
174
+
175
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
176
+{
177
+ static NeonGenTwoOpWidenFn * const opfn[] = {
178
+ NULL,
179
+ gen_VQDMULL_16,
180
+ gen_VQDMULL_32,
181
+ NULL,
182
+ };
183
+
184
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
185
+}
186
+
187
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
188
+{
189
+ static NeonGenTwoOpWidenFn * const opfn[] = {
190
+ NULL,
191
+ gen_VQDMULL_16,
192
+ gen_VQDMULL_32,
193
+ NULL,
194
+ };
195
+ static NeonGenTwo64OpFn * const accfn[] = {
196
+ NULL,
197
+ gen_VQDMLAL_acc_16,
198
+ gen_VQDMLAL_acc_32,
199
+ NULL,
200
+ };
201
+
202
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
203
+}
204
+
205
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
206
+{
207
+ static NeonGenTwoOpWidenFn * const opfn[] = {
208
+ NULL,
209
+ gen_VQDMULL_16,
210
+ gen_VQDMULL_32,
211
+ NULL,
212
+ };
213
+ static NeonGenTwo64OpFn * const accfn[] = {
214
+ NULL,
215
+ gen_VQDMLSL_acc_16,
216
+ gen_VQDMLSL_acc_32,
217
+ NULL,
218
+ };
219
+
220
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
221
+}
222
diff --git a/target/arm/translate.c b/target/arm/translate.c
223
index XXXXXXX..XXXXXXX 100644
224
--- a/target/arm/translate.c
225
+++ b/target/arm/translate.c
226
@@ -XXX,XX +XXX,XX @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
227
tcg_gen_ext16s_i32(dest, var);
228
}
229
230
-/* 32x32->64 multiply. Marks inputs as dead. */
231
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
232
-{
233
- TCGv_i32 lo = tcg_temp_new_i32();
234
- TCGv_i32 hi = tcg_temp_new_i32();
235
- TCGv_i64 ret;
236
-
237
- tcg_gen_mulu2_i32(lo, hi, a, b);
238
- tcg_temp_free_i32(a);
239
- tcg_temp_free_i32(b);
240
-
241
- ret = tcg_temp_new_i64();
242
- tcg_gen_concat_i32_i64(ret, lo, hi);
243
- tcg_temp_free_i32(lo);
244
- tcg_temp_free_i32(hi);
245
-
246
- return ret;
247
-}
248
-
249
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
250
-{
251
- TCGv_i32 lo = tcg_temp_new_i32();
252
- TCGv_i32 hi = tcg_temp_new_i32();
253
- TCGv_i64 ret;
254
-
255
- tcg_gen_muls2_i32(lo, hi, a, b);
256
- tcg_temp_free_i32(a);
257
- tcg_temp_free_i32(b);
258
-
259
- ret = tcg_temp_new_i64();
260
- tcg_gen_concat_i32_i64(ret, lo, hi);
261
- tcg_temp_free_i32(lo);
262
- tcg_temp_free_i32(hi);
263
-
264
- return ret;
265
-}
266
-
267
/* Swap low and high halfwords. */
268
static void gen_swap_half(TCGv_i32 var)
269
{
270
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
30
}
271
}
31
memset(&sd->data[17], 0, 47);
32
- stw_be_p(sd->data + 65, sd_crc16(sd->data, 64));
33
+ stw_be_p(sd->data + 64, sd_crc16(sd->data, 64));
34
}
272
}
35
273
36
static inline bool sd_wp_addr(SDState *sd, uint64_t addr)
274
-static inline void gen_neon_negl(TCGv_i64 var, int size)
275
-{
276
- switch (size) {
277
- case 0: gen_helper_neon_negl_u16(var, var); break;
278
- case 1: gen_helper_neon_negl_u32(var, var); break;
279
- case 2:
280
- tcg_gen_neg_i64(var, var);
281
- break;
282
- default: abort();
283
- }
284
-}
285
-
286
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
287
-{
288
- switch (size) {
289
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
290
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
291
- default: abort();
292
- }
293
-}
294
-
295
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
296
- int size, int u)
297
-{
298
- TCGv_i64 tmp;
299
-
300
- switch ((size << 1) | u) {
301
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
302
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
303
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
304
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
305
- case 4:
306
- tmp = gen_muls_i64_i32(a, b);
307
- tcg_gen_mov_i64(dest, tmp);
308
- tcg_temp_free_i64(tmp);
309
- break;
310
- case 5:
311
- tmp = gen_mulu_i64_i32(a, b);
312
- tcg_gen_mov_i64(dest, tmp);
313
- tcg_temp_free_i64(tmp);
314
- break;
315
- default: abort();
316
- }
317
-
318
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
319
- Don't forget to clean them now. */
320
- if (size < 2) {
321
- tcg_temp_free_i32(a);
322
- tcg_temp_free_i32(b);
323
- }
324
-}
325
-
326
static void gen_neon_narrow_op(int op, int u, int size,
327
TCGv_i32 dest, TCGv_i64 src)
328
{
329
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
330
int u;
331
int vec_size;
332
uint32_t imm;
333
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
334
+ TCGv_i32 tmp, tmp2, tmp3, tmp5;
335
TCGv_ptr ptr1;
336
TCGv_i64 tmp64;
337
338
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
339
return 1;
340
} else { /* (insn & 0x00800010 == 0x00800000) */
341
if (size != 3) {
342
- op = (insn >> 8) & 0xf;
343
- if ((insn & (1 << 6)) == 0) {
344
- /* Three registers of different lengths: handled by decodetree */
345
- return 1;
346
- } else {
347
- /* Two registers and a scalar. NB that for ops of this form
348
- * the ARM ARM labels bit 24 as Q, but it is in our variable
349
- * 'u', not 'q'.
350
- */
351
- if (size == 0) {
352
- return 1;
353
- }
354
- switch (op) {
355
- case 0: /* Integer VMLA scalar */
356
- case 4: /* Integer VMLS scalar */
357
- case 8: /* Integer VMUL scalar */
358
- case 1: /* Float VMLA scalar */
359
- case 5: /* Floating point VMLS scalar */
360
- case 9: /* Floating point VMUL scalar */
361
- case 12: /* VQDMULH scalar */
362
- case 13: /* VQRDMULH scalar */
363
- case 14: /* VQRDMLAH scalar */
364
- case 15: /* VQRDMLSH scalar */
365
- return 1; /* handled by decodetree */
366
-
367
- case 3: /* VQDMLAL scalar */
368
- case 7: /* VQDMLSL scalar */
369
- case 11: /* VQDMULL scalar */
370
- if (u == 1) {
371
- return 1;
372
- }
373
- /* fall through */
374
- case 2: /* VMLAL sclar */
375
- case 6: /* VMLSL scalar */
376
- case 10: /* VMULL scalar */
377
- if (rd & 1) {
378
- return 1;
379
- }
380
- tmp2 = neon_get_scalar(size, rm);
381
- /* We need a copy of tmp2 because gen_neon_mull
382
- * deletes it during pass 0. */
383
- tmp4 = tcg_temp_new_i32();
384
- tcg_gen_mov_i32(tmp4, tmp2);
385
- tmp3 = neon_load_reg(rn, 1);
386
-
387
- for (pass = 0; pass < 2; pass++) {
388
- if (pass == 0) {
389
- tmp = neon_load_reg(rn, 0);
390
- } else {
391
- tmp = tmp3;
392
- tmp2 = tmp4;
393
- }
394
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
395
- if (op != 11) {
396
- neon_load_reg64(cpu_V1, rd + pass);
397
- }
398
- switch (op) {
399
- case 6:
400
- gen_neon_negl(cpu_V0, size);
401
- /* Fall through */
402
- case 2:
403
- gen_neon_addl(size);
404
- break;
405
- case 3: case 7:
406
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
407
- if (op == 7) {
408
- gen_neon_negl(cpu_V0, size);
409
- }
410
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
411
- break;
412
- case 10:
413
- /* no-op */
414
- break;
415
- case 11:
416
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
417
- break;
418
- default:
419
- abort();
420
- }
421
- neon_store_reg64(cpu_V0, rd + pass);
422
- }
423
- break;
424
- default:
425
- g_assert_not_reached();
426
- }
427
- }
428
+ /*
429
+ * Three registers of different lengths, or two registers and
430
+ * a scalar: handled by decodetree
431
+ */
432
+ return 1;
433
} else { /* size == 3 */
434
if (!u) {
435
/* Extract. */
37
--
436
--
38
2.17.0
437
2.20.1
39
438
40
439
diff view generated by jsdifflib
New patch
1
1
Convert the Neon VEXT insn to decodetree. Rather than keeping the
2
old implementation which used fixed temporaries cpu_V0 and cpu_V1
3
and did the extraction with by-hand shift and logic ops, we use
4
the TCG extract2 insn.
5
6
We don't need to special case 0 or 8 immediates any more as the
7
optimizer is smart enough to throw away the dead code.
8
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
target/arm/neon-dp.decode | 8 +++-
13
target/arm/translate-neon.inc.c | 76 +++++++++++++++++++++++++++++++++
14
target/arm/translate.c | 58 +------------------------
15
3 files changed, 85 insertions(+), 57 deletions(-)
16
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/neon-dp.decode
20
+++ b/target/arm/neon-dp.decode
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
22
# return false for size==3.
23
######################################################################
24
{
25
- # 0b11 subgroup will go here
26
+ [
27
+ ##################################################################
28
+ # Miscellaneous size=0b11 insns
29
+ ##################################################################
30
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
31
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
32
+ ]
33
34
# Subgroup for size != 0b11
35
[
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-neon.inc.c
39
+++ b/target/arm/translate-neon.inc.c
40
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
41
42
return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
43
}
44
+
45
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
46
+{
47
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
48
+ return false;
49
+ }
50
+
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
53
+ ((a->vd | a->vn | a->vm) & 0x10)) {
54
+ return false;
55
+ }
56
+
57
+ if ((a->vn | a->vm | a->vd) & a->q) {
58
+ return false;
59
+ }
60
+
61
+ if (a->imm > 7 && !a->q) {
62
+ return false;
63
+ }
64
+
65
+ if (!vfp_access_check(s)) {
66
+ return true;
67
+ }
68
+
69
+ if (!a->q) {
70
+ /* Extract 64 bits from <Vm:Vn> */
71
+ TCGv_i64 left, right, dest;
72
+
73
+ left = tcg_temp_new_i64();
74
+ right = tcg_temp_new_i64();
75
+ dest = tcg_temp_new_i64();
76
+
77
+ neon_load_reg64(right, a->vn);
78
+ neon_load_reg64(left, a->vm);
79
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
80
+ neon_store_reg64(dest, a->vd);
81
+
82
+ tcg_temp_free_i64(left);
83
+ tcg_temp_free_i64(right);
84
+ tcg_temp_free_i64(dest);
85
+ } else {
86
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
87
+ TCGv_i64 left, middle, right, destleft, destright;
88
+
89
+ left = tcg_temp_new_i64();
90
+ middle = tcg_temp_new_i64();
91
+ right = tcg_temp_new_i64();
92
+ destleft = tcg_temp_new_i64();
93
+ destright = tcg_temp_new_i64();
94
+
95
+ if (a->imm < 8) {
96
+ neon_load_reg64(right, a->vn);
97
+ neon_load_reg64(middle, a->vn + 1);
98
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
99
+ neon_load_reg64(left, a->vm);
100
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
101
+ } else {
102
+ neon_load_reg64(right, a->vn + 1);
103
+ neon_load_reg64(middle, a->vm);
104
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
105
+ neon_load_reg64(left, a->vm + 1);
106
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
107
+ }
108
+
109
+ neon_store_reg64(destright, a->vd);
110
+ neon_store_reg64(destleft, a->vd + 1);
111
+
112
+ tcg_temp_free_i64(destright);
113
+ tcg_temp_free_i64(destleft);
114
+ tcg_temp_free_i64(right);
115
+ tcg_temp_free_i64(middle);
116
+ tcg_temp_free_i64(left);
117
+ }
118
+ return true;
119
+}
120
diff --git a/target/arm/translate.c b/target/arm/translate.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate.c
123
+++ b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
125
int pass;
126
int u;
127
int vec_size;
128
- uint32_t imm;
129
TCGv_i32 tmp, tmp2, tmp3, tmp5;
130
TCGv_ptr ptr1;
131
- TCGv_i64 tmp64;
132
133
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
134
return 1;
135
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
136
return 1;
137
} else { /* size == 3 */
138
if (!u) {
139
- /* Extract. */
140
- imm = (insn >> 8) & 0xf;
141
-
142
- if (imm > 7 && !q)
143
- return 1;
144
-
145
- if (q && ((rd | rn | rm) & 1)) {
146
- return 1;
147
- }
148
-
149
- if (imm == 0) {
150
- neon_load_reg64(cpu_V0, rn);
151
- if (q) {
152
- neon_load_reg64(cpu_V1, rn + 1);
153
- }
154
- } else if (imm == 8) {
155
- neon_load_reg64(cpu_V0, rn + 1);
156
- if (q) {
157
- neon_load_reg64(cpu_V1, rm);
158
- }
159
- } else if (q) {
160
- tmp64 = tcg_temp_new_i64();
161
- if (imm < 8) {
162
- neon_load_reg64(cpu_V0, rn);
163
- neon_load_reg64(tmp64, rn + 1);
164
- } else {
165
- neon_load_reg64(cpu_V0, rn + 1);
166
- neon_load_reg64(tmp64, rm);
167
- }
168
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
169
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
170
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
171
- if (imm < 8) {
172
- neon_load_reg64(cpu_V1, rm);
173
- } else {
174
- neon_load_reg64(cpu_V1, rm + 1);
175
- imm -= 8;
176
- }
177
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
178
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
179
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
180
- tcg_temp_free_i64(tmp64);
181
- } else {
182
- /* BUGFIX */
183
- neon_load_reg64(cpu_V0, rn);
184
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
185
- neon_load_reg64(cpu_V1, rm);
186
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
187
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
188
- }
189
- neon_store_reg64(cpu_V0, rd);
190
- if (q) {
191
- neon_store_reg64(cpu_V1, rd + 1);
192
- }
193
+ /* Extract: handled by decodetree */
194
+ return 1;
195
} else if ((insn & (1 << 11)) == 0) {
196
/* Two register misc. */
197
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
198
--
199
2.20.1
200
201
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
Convert the Neon VTBL, VTBX instructions to decodetree. The actual
2
implementation of the insn is copied across to the new trans function
3
unchanged except for renaming 'tmp5' to 'tmp4'.
2
4
3
These were missed out from the rest of the half-precision work.
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/neon-dp.decode | 3 ++
9
target/arm/translate-neon.inc.c | 56 +++++++++++++++++++++++++++++++++
10
target/arm/translate.c | 41 +++---------------------
11
3 files changed, 63 insertions(+), 37 deletions(-)
4
12
5
Cc: qemu-stable@nongnu.org
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
8
Tested-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20180512003217.9105-10-richard.henderson@linaro.org
11
[rth: Fix erroneous check vs type]
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
target/arm/translate-a64.c | 31 +++++++++++++++++++++++++------
16
1 file changed, 25 insertions(+), 6 deletions(-)
17
18
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate-a64.c
15
--- a/target/arm/neon-dp.decode
21
+++ b/target/arm/translate-a64.c
16
+++ b/target/arm/neon-dp.decode
22
@@ -XXX,XX +XXX,XX @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
23
unsigned int mos, type, rm, cond, rn, rd;
18
##################################################################
24
TCGv_i64 t_true, t_false, t_zero;
19
VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
25
DisasCompare64 c;
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
26
+ TCGMemOp sz;
21
+
27
22
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
28
mos = extract32(insn, 29, 3);
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
29
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
24
]
30
+ type = extract32(insn, 22, 2);
25
31
rm = extract32(insn, 16, 5);
26
# Subgroup for size != 0b11
32
cond = extract32(insn, 12, 4);
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
33
rn = extract32(insn, 5, 5);
28
index XXXXXXX..XXXXXXX 100644
34
rd = extract32(insn, 0, 5);
29
--- a/target/arm/translate-neon.inc.c
35
30
+++ b/target/arm/translate-neon.inc.c
36
- if (mos || type > 1) {
31
@@ -XXX,XX +XXX,XX @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
37
+ if (mos) {
32
}
38
+ unallocated_encoding(s);
33
return true;
39
+ return;
34
}
35
+
36
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
37
+{
38
+ int n;
39
+ TCGv_i32 tmp, tmp2, tmp3, tmp4;
40
+ TCGv_ptr ptr1;
41
+
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
43
+ return false;
40
+ }
44
+ }
41
+
45
+
42
+ switch (type) {
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
43
+ case 0:
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
44
+ sz = MO_32;
48
+ ((a->vd | a->vn | a->vm) & 0x10)) {
45
+ break;
49
+ return false;
46
+ case 1:
50
+ }
47
+ sz = MO_64;
51
+
48
+ break;
52
+ if (!vfp_access_check(s)) {
49
+ case 3:
53
+ return true;
50
+ sz = MO_16;
54
+ }
51
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
55
+
52
+ break;
56
+ n = a->len + 1;
53
+ }
57
+ if ((a->vn + n) > 32) {
54
+ /* fallthru */
58
+ /*
55
+ default:
59
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
56
unallocated_encoding(s);
60
+ * helper function running off the end of the register file.
57
return;
61
+ */
58
}
62
+ return false;
59
@@ -XXX,XX +XXX,XX @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
63
+ }
60
return;
64
+ n <<= 3;
61
}
65
+ if (a->op) {
62
66
+ tmp = neon_load_reg(a->vd, 0);
63
- /* Zero extend sreg inputs to 64 bits now. */
67
+ } else {
64
+ /* Zero extend sreg & hreg inputs to 64 bits now. */
68
+ tmp = tcg_temp_new_i32();
65
t_true = tcg_temp_new_i64();
69
+ tcg_gen_movi_i32(tmp, 0);
66
t_false = tcg_temp_new_i64();
70
+ }
67
- read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
71
+ tmp2 = neon_load_reg(a->vm, 0);
68
- read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
72
+ ptr1 = vfp_reg_ptr(true, a->vn);
69
+ read_vec_element(s, t_true, rn, 0, sz);
73
+ tmp4 = tcg_const_i32(n);
70
+ read_vec_element(s, t_false, rm, 0, sz);
74
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
71
75
+ tcg_temp_free_i32(tmp);
72
a64_test_cc(&c, cond);
76
+ if (a->op) {
73
t_zero = tcg_const_i64(0);
77
+ tmp = neon_load_reg(a->vd, 1);
74
@@ -XXX,XX +XXX,XX @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
78
+ } else {
75
tcg_temp_free_i64(t_false);
79
+ tmp = tcg_temp_new_i32();
76
a64_free_cc(&c);
80
+ tcg_gen_movi_i32(tmp, 0);
77
81
+ }
78
- /* Note that sregs write back zeros to the high bits,
82
+ tmp3 = neon_load_reg(a->vm, 1);
79
+ /* Note that sregs & hregs write back zeros to the high bits,
83
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
80
and we've already done the zero-extension. */
84
+ tcg_temp_free_i32(tmp4);
81
write_fp_dreg(s, rd, t_true);
85
+ tcg_temp_free_ptr(ptr1);
82
tcg_temp_free_i64(t_true);
86
+ neon_store_reg(a->vd, 0, tmp2);
87
+ neon_store_reg(a->vd, 1, tmp3);
88
+ tcg_temp_free_i32(tmp);
89
+ return true;
90
+}
91
diff --git a/target/arm/translate.c b/target/arm/translate.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/arm/translate.c
94
+++ b/target/arm/translate.c
95
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
96
{
97
int op;
98
int q;
99
- int rd, rn, rm, rd_ofs, rm_ofs;
100
+ int rd, rm, rd_ofs, rm_ofs;
101
int size;
102
int pass;
103
int u;
104
int vec_size;
105
- TCGv_i32 tmp, tmp2, tmp3, tmp5;
106
- TCGv_ptr ptr1;
107
+ TCGv_i32 tmp, tmp2, tmp3;
108
109
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
110
return 1;
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
q = (insn & (1 << 6)) != 0;
113
u = (insn >> 24) & 1;
114
VFP_DREG_D(rd, insn);
115
- VFP_DREG_N(rn, insn);
116
VFP_DREG_M(rm, insn);
117
size = (insn >> 20) & 3;
118
vec_size = q ? 16 : 8;
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
break;
121
}
122
} else if ((insn & (1 << 10)) == 0) {
123
- /* VTBL, VTBX. */
124
- int n = ((insn >> 8) & 3) + 1;
125
- if ((rn + n) > 32) {
126
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
127
- * helper function running off the end of the register file.
128
- */
129
- return 1;
130
- }
131
- n <<= 3;
132
- if (insn & (1 << 6)) {
133
- tmp = neon_load_reg(rd, 0);
134
- } else {
135
- tmp = tcg_temp_new_i32();
136
- tcg_gen_movi_i32(tmp, 0);
137
- }
138
- tmp2 = neon_load_reg(rm, 0);
139
- ptr1 = vfp_reg_ptr(true, rn);
140
- tmp5 = tcg_const_i32(n);
141
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
142
- tcg_temp_free_i32(tmp);
143
- if (insn & (1 << 6)) {
144
- tmp = neon_load_reg(rd, 1);
145
- } else {
146
- tmp = tcg_temp_new_i32();
147
- tcg_gen_movi_i32(tmp, 0);
148
- }
149
- tmp3 = neon_load_reg(rm, 1);
150
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
151
- tcg_temp_free_i32(tmp5);
152
- tcg_temp_free_ptr(ptr1);
153
- neon_store_reg(rd, 0, tmp2);
154
- neon_store_reg(rd, 1, tmp3);
155
- tcg_temp_free_i32(tmp);
156
+ /* VTBL, VTBX: handled by decodetree */
157
+ return 1;
158
} else if ((insn & 0x380) == 0) {
159
/* VDUP */
160
int element;
83
--
161
--
84
2.17.0
162
2.20.1
85
163
86
164
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
Convert the Neon VDUP (scalar) insn to decodetree. (Note that we
2
can't call this just "VDUP" as we used that already in vfp.decode for
3
the "VDUP (general purpose register" insn.)
2
4
3
Cc: qemu-stable@nongnu.org
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Tested-by: Alex Bennée <alex.bennee@linaro.org>
7
Message-id: 20180512003217.9105-5-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
target/arm/translate-a64.c | 17 +++++++++++++++--
8
target/arm/neon-dp.decode | 7 +++++++
11
1 file changed, 15 insertions(+), 2 deletions(-)
9
target/arm/translate-neon.inc.c | 26 ++++++++++++++++++++++++++
10
target/arm/translate.c | 25 +------------------------
11
3 files changed, 34 insertions(+), 24 deletions(-)
12
12
13
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-a64.c
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/translate-a64.c
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
18
bool sf = extract32(insn, 31, 1);
18
19
bool itof;
19
VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
20
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
21
- if (sbit || (type > 1)
21
+
22
- || (!sf && scale < 32)) {
22
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
23
+ if (sbit || (!sf && scale < 32)) {
23
+ vm=%vm_dp vd=%vd_dp size=0
24
+ unallocated_encoding(s);
24
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
25
+ return;
25
+ vm=%vm_dp vd=%vd_dp size=1
26
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
27
+ vm=%vm_dp vd=%vd_dp size=2
28
]
29
30
# Subgroup for size != 0b11
31
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/translate-neon.inc.c
34
+++ b/target/arm/translate-neon.inc.c
35
@@ -XXX,XX +XXX,XX @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
36
tcg_temp_free_i32(tmp);
37
return true;
38
}
39
+
40
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
41
+{
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
43
+ return false;
26
+ }
44
+ }
27
+
45
+
28
+ switch (type) {
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
29
+ case 0: /* float32 */
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
30
+ case 1: /* float64 */
48
+ ((a->vd | a->vm) & 0x10)) {
31
+ break;
49
+ return false;
32
+ case 3: /* float16 */
50
+ }
33
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
51
+
34
+ break;
52
+ if (a->vd & a->q) {
35
+ }
53
+ return false;
36
+ /* fallthru */
54
+ }
37
+ default:
55
+
38
unallocated_encoding(s);
56
+ if (!vfp_access_check(s)) {
39
return;
57
+ return true;
40
}
58
+ }
59
+
60
+ tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
61
+ neon_element_offset(a->vm, a->index, a->size),
62
+ a->q ? 16 : 8, a->q ? 16 : 8);
63
+ return true;
64
+}
65
diff --git a/target/arm/translate.c b/target/arm/translate.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/arm/translate.c
68
+++ b/target/arm/translate.c
69
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
70
}
71
break;
72
}
73
- } else if ((insn & (1 << 10)) == 0) {
74
- /* VTBL, VTBX: handled by decodetree */
75
- return 1;
76
- } else if ((insn & 0x380) == 0) {
77
- /* VDUP */
78
- int element;
79
- MemOp size;
80
-
81
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
82
- return 1;
83
- }
84
- if (insn & (1 << 16)) {
85
- size = MO_8;
86
- element = (insn >> 17) & 7;
87
- } else if (insn & (1 << 17)) {
88
- size = MO_16;
89
- element = (insn >> 18) & 3;
90
- } else {
91
- size = MO_32;
92
- element = (insn >> 19) & 1;
93
- }
94
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
95
- neon_element_offset(rm, element, size),
96
- q ? 16 : 8, q ? 16 : 8);
97
} else {
98
+ /* VTBL, VTBX, VDUP: handled by decodetree */
99
return 1;
100
}
101
}
41
--
102
--
42
2.17.0
103
2.20.1
43
104
44
105
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
2
2
3
Reported by Coverity (CID1390635). We ensure this for uint_to_float
3
Some bits of the CCM registers are non writable.
4
later on so we might as well mirror that.
5
4
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
5
This was left undone in the initial commit (all bits of registers were
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
writable).
7
8
This patch adds the required code to protect the non writable bits.
9
10
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
11
Message-id: 20200608133508.550046-1-jcd@tribudubois.net
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
14
---
11
fpu/softfloat.c | 2 +-
15
hw/misc/imx6ul_ccm.c | 76 ++++++++++++++++++++++++++++++++++++--------
12
1 file changed, 1 insertion(+), 1 deletion(-)
16
1 file changed, 63 insertions(+), 13 deletions(-)
13
17
14
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
18
diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/fpu/softfloat.c
20
--- a/hw/misc/imx6ul_ccm.c
17
+++ b/fpu/softfloat.c
21
+++ b/hw/misc/imx6ul_ccm.c
18
@@ -XXX,XX +XXX,XX @@ FLOAT_TO_UINT(64, 64)
22
@@ -XXX,XX +XXX,XX @@
19
23
20
static FloatParts int_to_float(int64_t a, float_status *status)
24
#include "trace.h"
25
26
+static const uint32_t ccm_mask[CCM_MAX] = {
27
+ [CCM_CCR] = 0xf01fef80,
28
+ [CCM_CCDR] = 0xfffeffff,
29
+ [CCM_CSR] = 0xffffffff,
30
+ [CCM_CCSR] = 0xfffffef2,
31
+ [CCM_CACRR] = 0xfffffff8,
32
+ [CCM_CBCDR] = 0xc1f8e000,
33
+ [CCM_CBCMR] = 0xfc03cfff,
34
+ [CCM_CSCMR1] = 0x80700000,
35
+ [CCM_CSCMR2] = 0xe01ff003,
36
+ [CCM_CSCDR1] = 0xfe00c780,
37
+ [CCM_CS1CDR] = 0xfe00fe00,
38
+ [CCM_CS2CDR] = 0xf8007000,
39
+ [CCM_CDCDR] = 0xf00fffff,
40
+ [CCM_CHSCCDR] = 0xfffc01ff,
41
+ [CCM_CSCDR2] = 0xfe0001ff,
42
+ [CCM_CSCDR3] = 0xffffc1ff,
43
+ [CCM_CDHIPR] = 0xffffffff,
44
+ [CCM_CTOR] = 0x00000000,
45
+ [CCM_CLPCR] = 0xf39ff01c,
46
+ [CCM_CISR] = 0xfb85ffbe,
47
+ [CCM_CIMR] = 0xfb85ffbf,
48
+ [CCM_CCOSR] = 0xfe00fe00,
49
+ [CCM_CGPR] = 0xfffc3fea,
50
+ [CCM_CCGR0] = 0x00000000,
51
+ [CCM_CCGR1] = 0x00000000,
52
+ [CCM_CCGR2] = 0x00000000,
53
+ [CCM_CCGR3] = 0x00000000,
54
+ [CCM_CCGR4] = 0x00000000,
55
+ [CCM_CCGR5] = 0x00000000,
56
+ [CCM_CCGR6] = 0x00000000,
57
+ [CCM_CMEOR] = 0xafffff1f,
58
+};
59
+
60
+static const uint32_t analog_mask[CCM_ANALOG_MAX] = {
61
+ [CCM_ANALOG_PLL_ARM] = 0xfff60f80,
62
+ [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc,
63
+ [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc,
64
+ [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe,
65
+ [CCM_ANALOG_PLL_SYS_SS] = 0x00000000,
66
+ [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000,
67
+ [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000,
68
+ [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80,
69
+ [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000,
70
+ [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000,
71
+ [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80,
72
+ [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000,
73
+ [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000,
74
+ [CCM_ANALOG_PLL_ENET] = 0xffc20ff0,
75
+ [CCM_ANALOG_PFD_480] = 0x40404040,
76
+ [CCM_ANALOG_PFD_528] = 0x40404040,
77
+ [PMU_MISC0] = 0x01fe8306,
78
+ [PMU_MISC1] = 0x07fcede0,
79
+ [PMU_MISC2] = 0x005f5f5f,
80
+};
81
+
82
static const char *imx6ul_ccm_reg_name(uint32_t reg)
21
{
83
{
22
- FloatParts r;
84
static char unknown[20];
23
+ FloatParts r = {};
85
@@ -XXX,XX +XXX,XX @@ static void imx6ul_ccm_write(void *opaque, hwaddr offset, uint64_t value,
24
if (a == 0) {
86
25
r.cls = float_class_zero;
87
trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value);
26
r.sign = false;
88
89
- /*
90
- * We will do a better implementation later. In particular some bits
91
- * cannot be written to.
92
- */
93
- s->ccm[index] = (uint32_t)value;
94
+ s->ccm[index] = (s->ccm[index] & ccm_mask[index]) |
95
+ ((uint32_t)value & ~ccm_mask[index]);
96
}
97
98
static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size)
99
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
100
* the REG_NAME register. So we change the value of the
101
* REG_NAME register, setting bits passed in the value.
102
*/
103
- s->analog[index - 1] |= value;
104
+ s->analog[index - 1] |= (value & ~analog_mask[index - 1]);
105
break;
106
case CCM_ANALOG_PLL_ARM_CLR:
107
case CCM_ANALOG_PLL_USB1_CLR:
108
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
109
* the REG_NAME register. So we change the value of the
110
* REG_NAME register, unsetting bits passed in the value.
111
*/
112
- s->analog[index - 2] &= ~value;
113
+ s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]);
114
break;
115
case CCM_ANALOG_PLL_ARM_TOG:
116
case CCM_ANALOG_PLL_USB1_TOG:
117
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
118
* the REG_NAME register. So we change the value of the
119
* REG_NAME register, toggling bits passed in the value.
120
*/
121
- s->analog[index - 3] ^= value;
122
+ s->analog[index - 3] ^= (value & ~analog_mask[index - 3]);
123
break;
124
default:
125
- /*
126
- * We will do a better implementation later. In particular some bits
127
- * cannot be written to.
128
- */
129
- s->analog[index] = value;
130
+ s->analog[index] = (s->analog[index] & analog_mask[index]) |
131
+ (value & ~analog_mask[index]);
132
break;
133
}
134
}
27
--
135
--
28
2.17.0
136
2.20.1
29
137
30
138
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Erik Smit <erik.lucas.smit@gmail.com>
2
2
3
Adding the fp16 moves to/from general registers.
3
The hardware supports configurable descriptor sizes, configured in the DBLAC
4
register.
4
5
5
Cc: qemu-stable@nongnu.org
6
Most drivers use the default 4 word descriptor, which is currently hardcoded,
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
but Aspeed SDK configures 8 words to store extra data.
7
Tested-by: Alex Bennée <alex.bennee@linaro.org>
8
8
Message-id: 20180512003217.9105-2-richard.henderson@linaro.org
9
Signed-off-by: Erik Smit <erik.lucas.smit@gmail.com>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Cédric Le Goater <clg@kaod.org>
11
[PMM: removed unnecessary parens]
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
13
---
12
target/arm/translate-a64.c | 21 +++++++++++++++++++++
14
hw/net/ftgmac100.c | 26 ++++++++++++++++++++++++--
13
1 file changed, 21 insertions(+)
15
1 file changed, 24 insertions(+), 2 deletions(-)
14
16
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
19
--- a/hw/net/ftgmac100.c
18
+++ b/target/arm/translate-a64.c
20
+++ b/hw/net/ftgmac100.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
21
@@ -XXX,XX +XXX,XX @@
20
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
22
#define FTGMAC100_APTC_TXPOLL_CNT(x) (((x) >> 8) & 0xf)
21
clear_vec_high(s, true, rd);
23
#define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12)
22
break;
24
23
+ case 3:
25
+/*
24
+ /* 16 bit */
26
+ * DMA burst length and arbitration control register
25
+ tmp = tcg_temp_new_i64();
27
+ */
26
+ tcg_gen_ext16u_i64(tmp, tcg_rn);
28
+#define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3)
27
+ write_fp_dreg(s, rd, tmp);
29
+#define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3)
28
+ tcg_temp_free_i64(tmp);
30
+#define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8)
29
+ break;
31
+#define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8)
30
+ default:
32
+#define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7)
31
+ g_assert_not_reached();
33
+#define FTGMAC100_DBLAC_IFG_INC (1 << 23)
32
}
34
+
33
} else {
35
/*
34
TCGv_i64 tcg_rd = cpu_reg(s, rd);
36
* PHY control register
35
@@ -XXX,XX +XXX,XX @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
37
*/
36
/* 64 bits from top half */
38
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
37
tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
39
if (bd.des0 & s->txdes0_edotr) {
38
break;
40
addr = tx_ring;
39
+ case 3:
41
} else {
40
+ /* 16 bit */
42
- addr += sizeof(FTGMAC100Desc);
41
+ tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
43
+ addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac);
42
+ break;
43
+ default:
44
+ g_assert_not_reached();
45
}
44
}
46
}
45
}
47
}
46
48
@@ -XXX,XX +XXX,XX @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
47
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_write(void *opaque, hwaddr addr,
49
case 0xa: /* 64 bit */
48
s->phydata = value & 0xffff;
50
case 0xd: /* 64 bit to top half of quad */
49
break;
51
break;
50
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
52
+ case 0x6: /* 16-bit float, 32-bit int */
51
+ if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
53
+ case 0xe: /* 16-bit float, 64-bit int */
52
+ qemu_log_mask(LOG_GUEST_ERROR,
54
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
53
+ "%s: transmit descriptor too small : %d bytes\n",
55
+ break;
54
+ __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
56
+ }
55
+ break;
57
+ /* fallthru */
56
+ }
58
default:
57
+ if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
59
/* all other sf/type/rmode combinations are invalid */
58
+ qemu_log_mask(LOG_GUEST_ERROR,
60
unallocated_encoding(s);
59
+ "%s: receive descriptor too small : %d bytes\n",
60
+ __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
61
+ break;
62
+ }
63
s->dblac = value;
64
break;
65
case FTGMAC100_REVR: /* Feature Register */
66
@@ -XXX,XX +XXX,XX @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
67
if (bd.des0 & s->rxdes0_edorr) {
68
addr = s->rx_ring;
69
} else {
70
- addr += sizeof(FTGMAC100Desc);
71
+ addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac);
72
}
73
}
74
s->rx_descriptor = addr;
61
--
75
--
62
2.17.0
76
2.20.1
63
77
64
78
diff view generated by jsdifflib
1
In commit d81ce0ef2c4f105 we added an extra float_status field
1
From: fangying <fangying1@huawei.com>
2
fp_status_fp16 for Arm, but forgot to initialize it correctly
3
by setting it to float_tininess_before_rounding. This currently
4
will only cause problems for the new V8_FP16 feature, since the
5
float-to-float conversion code doesn't use it yet. The effect
6
would be that we failed to set the Underflow IEEE exception flag
7
in all the cases where we should.
8
2
9
Add the missing initialization.
3
Virtual time adjustment was implemented for virt-5.0 machine type,
4
but the cpu property was enabled only for host-passthrough and max
5
cpu model. Let's add it for any KVM arm cpu which has the generic
6
timer feature enabled.
10
7
11
Fixes: d81ce0ef2c4f105
8
Signed-off-by: Ying Fang <fangying1@huawei.com>
12
Cc: qemu-stable@nongnu.org
9
Reviewed-by: Andrew Jones <drjones@redhat.com>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200608121243.2076-1-fangying1@huawei.com
14
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
[PMM: minor commit message tweak, removed inaccurate
12
suggested-by tag]
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Message-id: 20180512004311.9299-16-richard.henderson@linaro.org
17
---
14
---
18
target/arm/cpu.c | 2 ++
15
target/arm/cpu.c | 6 ++++--
19
1 file changed, 2 insertions(+)
16
target/arm/cpu64.c | 1 -
17
target/arm/kvm.c | 21 +++++++++++----------
18
3 files changed, 15 insertions(+), 13 deletions(-)
20
19
21
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
20
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
22
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/cpu.c
22
--- a/target/arm/cpu.c
24
+++ b/target/arm/cpu.c
23
+++ b/target/arm/cpu.c
25
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(CPUState *s)
24
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj)
26
&env->vfp.fp_status);
25
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
27
set_float_detect_tininess(float_tininess_before_rounding,
26
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
28
&env->vfp.standard_fp_status);
27
}
29
+ set_float_detect_tininess(float_tininess_before_rounding,
28
+
30
+ &env->vfp.fp_status_f16);
29
+ if (kvm_enabled()) {
31
#ifndef CONFIG_USER_ONLY
30
+ kvm_arm_add_vcpu_properties(obj);
31
+ }
32
}
33
34
static void arm_cpu_finalizefn(Object *obj)
35
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
36
32
if (kvm_enabled()) {
37
if (kvm_enabled()) {
33
kvm_arm_reset_vcpu(cpu);
38
kvm_arm_set_cpu_features_from_host(cpu);
39
- kvm_arm_add_vcpu_properties(obj);
40
} else {
41
cortex_a15_initfn(obj);
42
43
@@ -XXX,XX +XXX,XX @@ static void arm_host_initfn(Object *obj)
44
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
45
aarch64_add_sve_properties(obj);
46
}
47
- kvm_arm_add_vcpu_properties(obj);
48
arm_cpu_post_init(obj);
49
}
50
51
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/cpu64.c
54
+++ b/target/arm/cpu64.c
55
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
56
57
if (kvm_enabled()) {
58
kvm_arm_set_cpu_features_from_host(cpu);
59
- kvm_arm_add_vcpu_properties(obj);
60
} else {
61
uint64_t t;
62
uint32_t u;
63
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/kvm.c
66
+++ b/target/arm/kvm.c
67
@@ -XXX,XX +XXX,XX @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
68
/* KVM VCPU properties should be prefixed with "kvm-". */
69
void kvm_arm_add_vcpu_properties(Object *obj)
70
{
71
- if (!kvm_enabled()) {
72
- return;
73
- }
74
+ ARMCPU *cpu = ARM_CPU(obj);
75
+ CPUARMState *env = &cpu->env;
76
77
- ARM_CPU(obj)->kvm_adjvtime = true;
78
- object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
79
- kvm_no_adjvtime_set);
80
- object_property_set_description(obj, "kvm-no-adjvtime",
81
- "Set on to disable the adjustment of "
82
- "the virtual counter. VM stopped time "
83
- "will be counted.");
84
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
85
+ cpu->kvm_adjvtime = true;
86
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
87
+ kvm_no_adjvtime_set);
88
+ object_property_set_description(obj, "kvm-no-adjvtime",
89
+ "Set on to disable the adjustment of "
90
+ "the virtual counter. VM stopped time "
91
+ "will be counted.");
92
+ }
93
}
94
95
bool kvm_arm_pmu_supported(CPUState *cpu)
34
--
96
--
35
2.17.0
97
2.20.1
36
98
37
99
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
2
2
3
All the hard work is already done by vfp_expand_imm, we just need to
3
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
4
make sure we pick up the correct size.
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Cc: qemu-stable@nongnu.org
6
[PMD: Fixed 32-bit format string using PRIx32/PRIx64]
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
9
Tested-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20180512003217.9105-11-richard.henderson@linaro.org
12
[rth: Merge unallocated_encoding check with TCGMemOp conversion.]
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
9
---
16
target/arm/translate-a64.c | 20 +++++++++++++++++---
10
hw/net/imx_fec.c | 106 +++++++++++++++++++-------------------------
17
1 file changed, 17 insertions(+), 3 deletions(-)
11
hw/net/trace-events | 18 ++++++++
12
2 files changed, 63 insertions(+), 61 deletions(-)
18
13
19
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate-a64.c
16
--- a/hw/net/imx_fec.c
22
+++ b/target/arm/translate-a64.c
17
+++ b/hw/net/imx_fec.c
23
@@ -XXX,XX +XXX,XX @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
18
@@ -XXX,XX +XXX,XX @@
24
{
19
#include "qemu/module.h"
25
int rd = extract32(insn, 0, 5);
20
#include "net/checksum.h"
26
int imm8 = extract32(insn, 13, 8);
21
#include "net/eth.h"
27
- int is_double = extract32(insn, 22, 2);
22
+#include "trace.h"
28
+ int type = extract32(insn, 22, 2);
23
29
uint64_t imm;
24
/* For crc32 */
30
TCGv_i64 tcg_res;
25
#include <zlib.h>
31
+ TCGMemOp sz;
26
32
27
-#ifndef DEBUG_IMX_FEC
33
- if (is_double > 1) {
28
-#define DEBUG_IMX_FEC 0
34
+ switch (type) {
29
-#endif
35
+ case 0:
30
-
36
+ sz = MO_32;
31
-#define FEC_PRINTF(fmt, args...) \
37
+ break;
32
- do { \
38
+ case 1:
33
- if (DEBUG_IMX_FEC) { \
39
+ sz = MO_64;
34
- fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
40
+ break;
35
- __func__, ##args); \
41
+ case 3:
36
- } \
42
+ sz = MO_16;
37
- } while (0)
43
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
38
-
44
+ break;
39
-#ifndef DEBUG_IMX_PHY
45
+ }
40
-#define DEBUG_IMX_PHY 0
46
+ /* fallthru */
41
-#endif
47
+ default:
42
-
48
unallocated_encoding(s);
43
-#define PHY_PRINTF(fmt, args...) \
49
return;
44
- do { \
45
- if (DEBUG_IMX_PHY) { \
46
- fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
47
- __func__, ##args); \
48
- } \
49
- } while (0)
50
-
51
#define IMX_MAX_DESC 1024
52
53
static const char *imx_default_reg_name(IMXFECState *s, uint32_t index)
54
@@ -XXX,XX +XXX,XX @@ static void imx_eth_update(IMXFECState *s);
55
* For now we don't handle any GPIO/interrupt line, so the OS will
56
* have to poll for the PHY status.
57
*/
58
-static void phy_update_irq(IMXFECState *s)
59
+static void imx_phy_update_irq(IMXFECState *s)
60
{
61
imx_eth_update(s);
62
}
63
64
-static void phy_update_link(IMXFECState *s)
65
+static void imx_phy_update_link(IMXFECState *s)
66
{
67
/* Autonegotiation status mirrors link status. */
68
if (qemu_get_queue(s->nic)->link_down) {
69
- PHY_PRINTF("link is down\n");
70
+ trace_imx_phy_update_link("down");
71
s->phy_status &= ~0x0024;
72
s->phy_int |= PHY_INT_DOWN;
73
} else {
74
- PHY_PRINTF("link is up\n");
75
+ trace_imx_phy_update_link("up");
76
s->phy_status |= 0x0024;
77
s->phy_int |= PHY_INT_ENERGYON;
78
s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
50
}
79
}
51
@@ -XXX,XX +XXX,XX @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
80
- phy_update_irq(s);
52
return;
81
+ imx_phy_update_irq(s);
82
}
83
84
static void imx_eth_set_link(NetClientState *nc)
85
{
86
- phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
87
+ imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
88
}
89
90
-static void phy_reset(IMXFECState *s)
91
+static void imx_phy_reset(IMXFECState *s)
92
{
93
+ trace_imx_phy_reset();
94
+
95
s->phy_status = 0x7809;
96
s->phy_control = 0x3000;
97
s->phy_advertise = 0x01e1;
98
s->phy_int_mask = 0;
99
s->phy_int = 0;
100
- phy_update_link(s);
101
+ imx_phy_update_link(s);
102
}
103
104
-static uint32_t do_phy_read(IMXFECState *s, int reg)
105
+static uint32_t imx_phy_read(IMXFECState *s, int reg)
106
{
107
uint32_t val;
108
109
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
110
case 29: /* Interrupt source. */
111
val = s->phy_int;
112
s->phy_int = 0;
113
- phy_update_irq(s);
114
+ imx_phy_update_irq(s);
115
break;
116
case 30: /* Interrupt mask */
117
val = s->phy_int_mask;
118
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
119
break;
53
}
120
}
54
121
55
- imm = vfp_expand_imm(MO_32 + is_double, imm8);
122
- PHY_PRINTF("read 0x%04x @ %d\n", val, reg);
56
+ imm = vfp_expand_imm(sz, imm8);
123
+ trace_imx_phy_read(val, reg);
57
124
58
tcg_res = tcg_const_i64(imm);
125
return val;
59
write_fp_dreg(s, rd, tcg_res);
126
}
127
128
-static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
129
+static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
130
{
131
- PHY_PRINTF("write 0x%04x @ %d\n", val, reg);
132
+ trace_imx_phy_write(val, reg);
133
134
if (reg > 31) {
135
/* we only advertise one phy */
136
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
137
switch (reg) {
138
case 0: /* Basic Control */
139
if (val & 0x8000) {
140
- phy_reset(s);
141
+ imx_phy_reset(s);
142
} else {
143
s->phy_control = val & 0x7980;
144
/* Complete autonegotiation immediately. */
145
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
146
break;
147
case 30: /* Interrupt mask */
148
s->phy_int_mask = val & 0xff;
149
- phy_update_irq(s);
150
+ imx_phy_update_irq(s);
151
break;
152
case 17:
153
case 18:
154
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
155
static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr)
156
{
157
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
158
+
159
+ trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data);
160
}
161
162
static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
163
@@ -XXX,XX +XXX,XX @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
164
static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr)
165
{
166
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
167
+
168
+ trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data,
169
+ bd->option, bd->status);
170
}
171
172
static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr)
173
@@ -XXX,XX +XXX,XX @@ static void imx_fec_do_tx(IMXFECState *s)
174
int len;
175
176
imx_fec_read_bd(&bd, addr);
177
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n",
178
- addr, bd.flags, bd.length, bd.data);
179
if ((bd.flags & ENET_BD_R) == 0) {
180
+
181
/* Run out of descriptors to transmit. */
182
- FEC_PRINTF("tx_bd ran out of descriptors to transmit\n");
183
+ trace_imx_eth_tx_bd_busy();
184
+
185
break;
186
}
187
len = bd.length;
188
@@ -XXX,XX +XXX,XX @@ static void imx_enet_do_tx(IMXFECState *s, uint32_t index)
189
int len;
190
191
imx_enet_read_bd(&bd, addr);
192
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x "
193
- "status %04x\n", addr, bd.flags, bd.length, bd.data,
194
- bd.option, bd.status);
195
if ((bd.flags & ENET_BD_R) == 0) {
196
/* Run out of descriptors to transmit. */
197
+
198
+ trace_imx_eth_tx_bd_busy();
199
+
200
break;
201
}
202
len = bd.length;
203
@@ -XXX,XX +XXX,XX @@ static void imx_eth_enable_rx(IMXFECState *s, bool flush)
204
s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0;
205
206
if (!s->regs[ENET_RDAR]) {
207
- FEC_PRINTF("RX buffer full\n");
208
+ trace_imx_eth_rx_bd_full();
209
} else if (flush) {
210
qemu_flush_queued_packets(qemu_get_queue(s->nic));
211
}
212
@@ -XXX,XX +XXX,XX @@ static void imx_eth_reset(DeviceState *d)
213
memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor));
214
215
/* We also reset the PHY */
216
- phy_reset(s);
217
+ imx_phy_reset(s);
218
}
219
220
static uint32_t imx_default_read(IMXFECState *s, uint32_t index)
221
@@ -XXX,XX +XXX,XX @@ static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size)
222
break;
223
}
224
225
- FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
226
- value);
227
+ trace_imx_eth_read(index, imx_eth_reg_name(s, index), value);
228
229
return value;
230
}
231
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
232
const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s);
233
uint32_t index = offset >> 2;
234
235
- FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
236
- (uint32_t)value);
237
+ trace_imx_eth_write(index, imx_eth_reg_name(s, index), value);
238
239
switch (index) {
240
case ENET_EIR:
241
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
242
if (extract32(value, 29, 1)) {
243
/* This is a read operation */
244
s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16,
245
- do_phy_read(s,
246
+ imx_phy_read(s,
247
extract32(value,
248
18, 10)));
249
} else {
250
/* This a write operation */
251
- do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
252
+ imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
253
}
254
/* raise the interrupt as the PHY operation is done */
255
s->regs[ENET_EIR] |= ENET_INT_MII;
256
@@ -XXX,XX +XXX,XX @@ static bool imx_eth_can_receive(NetClientState *nc)
257
{
258
IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc));
259
260
- FEC_PRINTF("\n");
261
-
262
return !!s->regs[ENET_RDAR];
263
}
264
265
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
266
unsigned int buf_len;
267
size_t size = len;
268
269
- FEC_PRINTF("len %d\n", (int)size);
270
+ trace_imx_fec_receive(size);
271
272
if (!s->regs[ENET_RDAR]) {
273
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
274
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
275
bd.length = buf_len;
276
size -= buf_len;
277
278
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
279
+ trace_imx_fec_receive_len(addr, bd.length);
280
281
/* The last 4 bytes are the CRC. */
282
if (size < 4) {
283
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
284
if (size == 0) {
285
/* Last buffer in frame. */
286
bd.flags |= flags | ENET_BD_L;
287
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
288
+
289
+ trace_imx_fec_receive_last(bd.flags);
290
+
291
s->regs[ENET_EIR] |= ENET_INT_RXF;
292
} else {
293
s->regs[ENET_EIR] |= ENET_INT_RXB;
294
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
295
size_t size = len;
296
bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16;
297
298
- FEC_PRINTF("len %d\n", (int)size);
299
+ trace_imx_enet_receive(size);
300
301
if (!s->regs[ENET_RDAR]) {
302
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
303
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
304
bd.length = buf_len;
305
size -= buf_len;
306
307
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
308
+ trace_imx_enet_receive_len(addr, bd.length);
309
310
/* The last 4 bytes are the CRC. */
311
if (size < 4) {
312
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
313
if (size == 0) {
314
/* Last buffer in frame. */
315
bd.flags |= flags | ENET_BD_L;
316
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
317
+
318
+ trace_imx_enet_receive_last(bd.flags);
319
+
320
/* Indicate that we've updated the last buffer descriptor. */
321
bd.last_buffer = ENET_BD_BDU;
322
if (bd.option & ENET_BD_RX_INT) {
323
diff --git a/hw/net/trace-events b/hw/net/trace-events
324
index XXXXXXX..XXXXXXX 100644
325
--- a/hw/net/trace-events
326
+++ b/hw/net/trace-events
327
@@ -XXX,XX +XXX,XX @@ i82596_receive_packet(size_t sz) "len=%zu"
328
i82596_new_mac(const char *id_with_mac) "New MAC for: %s"
329
i82596_set_multicast(uint16_t count) "Added %d multicast entries"
330
i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
331
+
332
+# imx_fec.c
333
+imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]"
334
+imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]"
335
+imx_phy_update_link(const char *s) "%s"
336
+imx_phy_reset(void) ""
337
+imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
338
+imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x"
339
+imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit"
340
+imx_eth_rx_bd_full(void) "RX buffer is full"
341
+imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32
342
+imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64
343
+imx_fec_receive(size_t size) "len %zu"
344
+imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
345
+imx_fec_receive_last(int last) "rx frame flags 0x%04x"
346
+imx_enet_receive(size_t size) "len %zu"
347
+imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
348
+imx_enet_receive_last(int last) "rx frame flags 0x%04x"
60
--
349
--
61
2.17.0
350
2.20.1
62
351
63
352
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
From: Guenter Roeck <linux@roeck-us.net>
2
2
3
These where missed out from the rest of the half-precision work.
3
The Linux kernel's IMX code now uses vendor specific commands.
4
This results in endless warnings when booting the Linux kernel.
4
5
5
Cc: qemu-stable@nongnu.org
6
sdhci-esdhc-imx 2194000.usdhc: esdhc_wait_for_card_clock_gate_off:
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
    card clock still not gate off in 100us!.
7
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
8
8
Tested-by: Alex Bennée <alex.bennee@linaro.org>
9
Implement support for the vendor specific command implemented in IMX hardware
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
to be able to avoid this warning.
10
Message-id: 20180512003217.9105-9-richard.henderson@linaro.org
11
11
[rth: Diagnose lack of FP16 before fp_access_check]
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
15
Message-id: 20200603145258.195920-2-linux@roeck-us.net
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
17
---
15
target/arm/helper-a64.h | 2 +
18
hw/sd/sdhci-internal.h | 5 +++++
16
target/arm/helper-a64.c | 10 +++++
19
include/hw/sd/sdhci.h | 5 +++++
17
target/arm/translate-a64.c | 88 ++++++++++++++++++++++++++++++--------
20
hw/sd/sdhci.c | 18 +++++++++++++++++-
18
3 files changed, 83 insertions(+), 17 deletions(-)
21
3 files changed, 27 insertions(+), 1 deletion(-)
19
22
20
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
23
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
21
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper-a64.h
25
--- a/hw/sd/sdhci-internal.h
23
+++ b/target/arm/helper-a64.h
26
+++ b/hw/sd/sdhci-internal.h
24
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@
25
DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
28
#define SDHC_CMD_INHIBIT 0x00000001
26
DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
29
#define SDHC_DATA_INHIBIT 0x00000002
27
DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
30
#define SDHC_DAT_LINE_ACTIVE 0x00000004
28
+DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr)
31
+#define SDHC_IMX_CLOCK_GATE_OFF 0x00000080
29
+DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr)
32
#define SDHC_DOING_WRITE 0x00000100
30
DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
33
#define SDHC_DOING_READ 0x00000200
31
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
34
#define SDHC_SPACE_AVAILABLE 0x00000400
32
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
35
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
33
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
36
37
38
#define ESDHC_MIX_CTRL 0x48
39
+
40
#define ESDHC_VENDOR_SPEC 0xc0
41
+#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8)
42
+
43
#define ESDHC_DLL_CTRL 0x60
44
45
#define ESDHC_TUNING_CTRL 0xcc
46
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
47
#define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \
48
DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \
49
DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \
50
+ DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \
51
\
52
/* Capabilities registers provide information on supported
53
* features of this specific host controller implementation */ \
54
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
34
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/helper-a64.c
56
--- a/include/hw/sd/sdhci.h
36
+++ b/target/arm/helper-a64.c
57
+++ b/include/hw/sd/sdhci.h
37
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float_rel_to_flags(int res)
58
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
38
return flags;
59
uint16_t acmd12errsts; /* Auto CMD12 error status register */
39
}
60
uint16_t hostctl2; /* Host Control 2 */
40
61
uint64_t admasysaddr; /* ADMA System Address Register */
41
+uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status)
62
+ uint16_t vendor_spec; /* Vendor specific register */
42
+{
63
43
+ return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
64
/* Read-only registers */
44
+}
65
uint64_t capareg; /* Capabilities Register */
66
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
67
uint32_t quirks;
68
uint8_t sd_spec_version;
69
uint8_t uhs_mode;
70
+ uint8_t vendor; /* For vendor specific functionality */
71
} SDHCIState;
72
73
+#define SDHCI_VENDOR_NONE 0
74
+#define SDHCI_VENDOR_IMX 1
45
+
75
+
46
+uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status)
76
/*
47
+{
77
* Controller does not provide transfer-complete interrupt when not
48
+ return float_rel_to_flags(float16_compare(x, y, fp_status));
78
* busy.
49
+}
79
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/hw/sd/sdhci.c
82
+++ b/hw/sd/sdhci.c
83
@@ -XXX,XX +XXX,XX @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
84
}
85
break;
86
87
+ case ESDHC_VENDOR_SPEC:
88
+ ret = s->vendor_spec;
89
+ break;
90
case ESDHC_DLL_CTRL:
91
case ESDHC_TUNE_CTRL_STATUS:
92
case ESDHC_UNDOCUMENTED_REG27:
93
case ESDHC_TUNING_CTRL:
94
- case ESDHC_VENDOR_SPEC:
95
case ESDHC_MIX_CTRL:
96
case ESDHC_WTMK_LVL:
97
ret = 0;
98
@@ -XXX,XX +XXX,XX @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
99
case ESDHC_UNDOCUMENTED_REG27:
100
case ESDHC_TUNING_CTRL:
101
case ESDHC_WTMK_LVL:
102
+ break;
50
+
103
+
51
uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
104
case ESDHC_VENDOR_SPEC:
52
{
105
+ s->vendor_spec = value;
53
return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
106
+ switch (s->vendor) {
54
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
107
+ case SDHCI_VENDOR_IMX:
55
index XXXXXXX..XXXXXXX 100644
108
+ if (value & ESDHC_IMX_FRC_SDCLK_ON) {
56
--- a/target/arm/translate-a64.c
109
+ s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
57
+++ b/target/arm/translate-a64.c
58
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
59
}
60
}
61
62
-static void handle_fp_compare(DisasContext *s, bool is_double,
63
+static void handle_fp_compare(DisasContext *s, int size,
64
unsigned int rn, unsigned int rm,
65
bool cmp_with_zero, bool signal_all_nans)
66
{
67
TCGv_i64 tcg_flags = tcg_temp_new_i64();
68
- TCGv_ptr fpst = get_fpstatus_ptr(false);
69
+ TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
70
71
- if (is_double) {
72
+ if (size == MO_64) {
73
TCGv_i64 tcg_vn, tcg_vm;
74
75
tcg_vn = read_fp_dreg(s, rn);
76
@@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, bool is_double,
77
tcg_temp_free_i64(tcg_vn);
78
tcg_temp_free_i64(tcg_vm);
79
} else {
80
- TCGv_i32 tcg_vn, tcg_vm;
81
+ TCGv_i32 tcg_vn = tcg_temp_new_i32();
82
+ TCGv_i32 tcg_vm = tcg_temp_new_i32();
83
84
- tcg_vn = read_fp_sreg(s, rn);
85
+ read_vec_element_i32(s, tcg_vn, rn, 0, size);
86
if (cmp_with_zero) {
87
- tcg_vm = tcg_const_i32(0);
88
+ tcg_gen_movi_i32(tcg_vm, 0);
89
} else {
90
- tcg_vm = read_fp_sreg(s, rm);
91
+ read_vec_element_i32(s, tcg_vm, rm, 0, size);
92
}
93
- if (signal_all_nans) {
94
- gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
95
- } else {
96
- gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
97
+
98
+ switch (size) {
99
+ case MO_32:
100
+ if (signal_all_nans) {
101
+ gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
102
+ } else {
110
+ } else {
103
+ gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
111
+ s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
104
+ }
105
+ break;
106
+ case MO_16:
107
+ if (signal_all_nans) {
108
+ gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
109
+ } else {
110
+ gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
111
+ }
112
+ }
112
+ break;
113
+ break;
113
+ default:
114
+ default:
114
+ g_assert_not_reached();
115
}
116
+
117
tcg_temp_free_i32(tcg_vn);
118
tcg_temp_free_i32(tcg_vm);
119
}
120
@@ -XXX,XX +XXX,XX @@ static void handle_fp_compare(DisasContext *s, bool is_double,
121
static void disas_fp_compare(DisasContext *s, uint32_t insn)
122
{
123
unsigned int mos, type, rm, op, rn, opc, op2r;
124
+ int size;
125
126
mos = extract32(insn, 29, 3);
127
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
128
+ type = extract32(insn, 22, 2);
129
rm = extract32(insn, 16, 5);
130
op = extract32(insn, 14, 2);
131
rn = extract32(insn, 5, 5);
132
opc = extract32(insn, 3, 2);
133
op2r = extract32(insn, 0, 3);
134
135
- if (mos || op || op2r || type > 1) {
136
+ if (mos || op || op2r) {
137
+ unallocated_encoding(s);
138
+ return;
139
+ }
140
+
141
+ switch (type) {
142
+ case 0:
143
+ size = MO_32;
144
+ break;
145
+ case 1:
146
+ size = MO_64;
147
+ break;
148
+ case 3:
149
+ size = MO_16;
150
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
151
+ break;
115
+ break;
152
+ }
116
+ }
153
+ /* fallthru */
117
break;
154
+ default:
118
155
unallocated_encoding(s);
119
case SDHC_HOSTCTL:
156
return;
157
}
158
@@ -XXX,XX +XXX,XX @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
159
return;
160
}
161
162
- handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
163
+ handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
164
}
165
166
/* Floating point conditional compare
167
@@ -XXX,XX +XXX,XX @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
168
unsigned int mos, type, rm, cond, rn, op, nzcv;
169
TCGv_i64 tcg_flags;
170
TCGLabel *label_continue = NULL;
171
+ int size;
172
173
mos = extract32(insn, 29, 3);
174
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
175
+ type = extract32(insn, 22, 2);
176
rm = extract32(insn, 16, 5);
177
cond = extract32(insn, 12, 4);
178
rn = extract32(insn, 5, 5);
179
op = extract32(insn, 4, 1);
180
nzcv = extract32(insn, 0, 4);
181
182
- if (mos || type > 1) {
183
+ if (mos) {
184
+ unallocated_encoding(s);
185
+ return;
186
+ }
187
+
188
+ switch (type) {
189
+ case 0:
190
+ size = MO_32;
191
+ break;
192
+ case 1:
193
+ size = MO_64;
194
+ break;
195
+ case 3:
196
+ size = MO_16;
197
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
198
+ break;
199
+ }
200
+ /* fallthru */
201
+ default:
202
unallocated_encoding(s);
203
return;
204
}
205
@@ -XXX,XX +XXX,XX @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
206
gen_set_label(label_match);
207
}
208
209
- handle_fp_compare(s, type, rn, rm, false, op);
210
+ handle_fp_compare(s, size, rn, rm, false, op);
211
212
if (cond < 0x0e) {
213
gen_set_label(label_continue);
214
--
120
--
215
2.17.0
121
2.20.1
216
122
217
123
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Guenter Roeck <linux@roeck-us.net>
2
2
3
We missed all of the scalar fp16 binary operations.
3
Set vendor property to IMX to enable IMX specific functionality
4
in sdhci code.
4
5
5
Cc: qemu-stable@nongnu.org
6
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Tested-by: Alex Bennée <alex.bennee@linaro.org>
9
Message-id: 20200603145258.195920-3-linux@roeck-us.net
9
Message-id: 20180512003217.9105-7-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
target/arm/translate-a64.c | 65 ++++++++++++++++++++++++++++++++++++++
12
hw/arm/fsl-imx25.c | 6 ++++++
13
1 file changed, 65 insertions(+)
13
hw/arm/fsl-imx6.c | 6 ++++++
14
hw/arm/fsl-imx6ul.c | 2 ++
15
hw/arm/fsl-imx7.c | 2 ++
16
4 files changed, 16 insertions(+)
14
17
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
18
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
20
--- a/hw/arm/fsl-imx25.c
18
+++ b/target/arm/translate-a64.c
21
+++ b/hw/arm/fsl-imx25.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
22
@@ -XXX,XX +XXX,XX @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
20
tcg_temp_free_i64(tcg_res);
23
&err);
21
}
24
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES,
22
25
"capareg", &err);
23
+/* Floating-point data-processing (2 source) - half precision */
26
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
24
+static void handle_fp_2src_half(DisasContext *s, int opcode,
27
+ "vendor", &err);
25
+ int rd, int rn, int rm)
28
+ if (err) {
26
+{
29
+ error_propagate(errp, err);
27
+ TCGv_i32 tcg_op1;
28
+ TCGv_i32 tcg_op2;
29
+ TCGv_i32 tcg_res;
30
+ TCGv_ptr fpst;
31
+
32
+ tcg_res = tcg_temp_new_i32();
33
+ fpst = get_fpstatus_ptr(true);
34
+ tcg_op1 = read_fp_hreg(s, rn);
35
+ tcg_op2 = read_fp_hreg(s, rm);
36
+
37
+ switch (opcode) {
38
+ case 0x0: /* FMUL */
39
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
40
+ break;
41
+ case 0x1: /* FDIV */
42
+ gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
43
+ break;
44
+ case 0x2: /* FADD */
45
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
46
+ break;
47
+ case 0x3: /* FSUB */
48
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
49
+ break;
50
+ case 0x4: /* FMAX */
51
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
52
+ break;
53
+ case 0x5: /* FMIN */
54
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
55
+ break;
56
+ case 0x6: /* FMAXNM */
57
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
58
+ break;
59
+ case 0x7: /* FMINNM */
60
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
61
+ break;
62
+ case 0x8: /* FNMUL */
63
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
64
+ tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
65
+ break;
66
+ default:
67
+ g_assert_not_reached();
68
+ }
69
+
70
+ write_fp_sreg(s, rd, tcg_res);
71
+
72
+ tcg_temp_free_ptr(fpst);
73
+ tcg_temp_free_i32(tcg_op1);
74
+ tcg_temp_free_i32(tcg_op2);
75
+ tcg_temp_free_i32(tcg_res);
76
+}
77
+
78
/* Floating point data-processing (2 source)
79
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
80
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
81
@@ -XXX,XX +XXX,XX @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
82
}
83
handle_fp_2src_double(s, opcode, rd, rn, rm);
84
break;
85
+ case 3:
86
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
87
+ unallocated_encoding(s);
88
+ return;
30
+ return;
89
+ }
31
+ }
90
+ if (!fp_access_check(s)) {
32
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
33
if (err) {
34
error_propagate(errp, err);
35
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/arm/fsl-imx6.c
38
+++ b/hw/arm/fsl-imx6.c
39
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
40
&err);
41
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES,
42
"capareg", &err);
43
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
44
+ "vendor", &err);
45
+ if (err) {
46
+ error_propagate(errp, err);
91
+ return;
47
+ return;
92
+ }
48
+ }
93
+ handle_fp_2src_half(s, opcode, rd, rn, rm);
49
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
94
+ break;
50
if (err) {
95
default:
51
error_propagate(errp, err);
96
unallocated_encoding(s);
52
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
97
}
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/arm/fsl-imx6ul.c
55
+++ b/hw/arm/fsl-imx6ul.c
56
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
57
FSL_IMX6UL_USDHC2_IRQ,
58
};
59
60
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
61
+ "vendor", &error_abort);
62
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
63
&error_abort);
64
65
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/arm/fsl-imx7.c
68
+++ b/hw/arm/fsl-imx7.c
69
@@ -XXX,XX +XXX,XX @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
70
FSL_IMX7_USDHC3_IRQ,
71
};
72
73
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
74
+ "vendor", &error_abort);
75
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
76
&error_abort);
77
98
--
78
--
99
2.17.0
79
2.20.1
100
80
101
81
diff view generated by jsdifflib