1
Arm queue; the bulk of this is the VFP decodetree conversion...
1
Most of this is the Neon decodetree patches, followed by Edgar's versal cleanups.
2
2
3
thanks
3
thanks
4
-- PMM
4
-- PMM
5
5
6
The following changes since commit 4747524f9f243ca5ff1f146d37e423c00e923ee1:
7
6
8
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2019-06-12' into staging (2019-06-13 11:58:00 +0100)
7
The following changes since commit 2ef486e76d64436be90f7359a3071fb2a56ce835:
8
9
Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging (2020-05-03 14:12:56 +0100)
9
10
10
are available in the Git repository at:
11
are available in the Git repository at:
11
12
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20190613
13
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200504
13
14
14
for you to fetch changes up to 07e4c7f769120c9a5bd6a26c2dc1421f2f838d80:
15
for you to fetch changes up to 9aefc6cf9b73f66062d2f914a0136756e7a28211:
15
16
16
target/arm: Fix short-vector increment behaviour (2019-06-13 12:57:37 +0100)
17
target/arm: Move gen_ function typedefs to translate.h (2020-05-04 12:59:26 +0100)
17
18
18
----------------------------------------------------------------
19
----------------------------------------------------------------
19
target-arm queue:
20
target-arm queue:
20
* convert aarch32 VFP decoder to decodetree
21
* Start of conversion of Neon insns to decodetree
21
(includes tightening up decode in a few places)
22
* versal board: support SD and RTC
22
* fix minor bugs in VFP short-vector handling
23
* Implement ARMv8.2-TTS2UXN
23
* hw/core/bus.c: Only the main system bus can have no parent
24
* Make VQDMULL undefined when U=1
24
* smmuv3: Fix decoding of ID register range
25
* Some minor code cleanups
25
* Implement NSACR gating of floating point
26
* Use tcg_gen_gvec_bitsel
27
* Vectorize USHL and SSHL
28
26
29
----------------------------------------------------------------
27
----------------------------------------------------------------
30
Peter Maydell (44):
28
Edgar E. Iglesias (11):
31
target/arm: Implement NSACR gating of floating point
29
hw/arm: versal: Remove inclusion of arm_gicv3_common.h
32
hw/arm/smmuv3: Fix decoding of ID register range
30
hw/arm: versal: Move misplaced comment
33
hw/core/bus.c: Only the main system bus can have no parent
31
hw/arm: versal-virt: Fix typo xlnx-ve -> xlnx-versal
34
target/arm: Add stubs for AArch32 VFP decodetree
32
hw/arm: versal: Embed the UARTs into the SoC type
35
target/arm: Factor out VFP access checking code
33
hw/arm: versal: Embed the GEMs into the SoC type
36
target/arm: Fix Cortex-R5F MVFR values
34
hw/arm: versal: Embed the ADMAs into the SoC type
37
target/arm: Explicitly enable VFP short-vectors for aarch32 -cpu max
35
hw/arm: versal: Embed the APUs into the SoC type
38
target/arm: Convert the VSEL instructions to decodetree
36
hw/arm: versal: Add support for SD
39
target/arm: Convert VMINNM, VMAXNM to decodetree
37
hw/arm: versal: Add support for the RTC
40
target/arm: Convert VRINTA/VRINTN/VRINTP/VRINTM to decodetree
38
hw/arm: versal-virt: Add support for SD
41
target/arm: Convert VCVTA/VCVTN/VCVTP/VCVTM to decodetree
39
hw/arm: versal-virt: Add support for the RTC
42
target/arm: Move the VFP trans_* functions to translate-vfp.inc.c
43
target/arm: Add helpers for VFP register loads and stores
44
target/arm: Convert "double-precision" register moves to decodetree
45
target/arm: Convert "single-precision" register moves to decodetree
46
target/arm: Convert VFP two-register transfer insns to decodetree
47
target/arm: Convert VFP VLDR and VSTR to decodetree
48
target/arm: Convert the VFP load/store multiple insns to decodetree
49
target/arm: Remove VLDR/VSTR/VLDM/VSTM use of cpu_F0s and cpu_F0d
50
target/arm: Convert VFP VMLA to decodetree
51
target/arm: Convert VFP VMLS to decodetree
52
target/arm: Convert VFP VNMLS to decodetree
53
target/arm: Convert VFP VNMLA to decodetree
54
target/arm: Convert VMUL to decodetree
55
target/arm: Convert VNMUL to decodetree
56
target/arm: Convert VADD to decodetree
57
target/arm: Convert VSUB to decodetree
58
target/arm: Convert VDIV to decodetree
59
target/arm: Convert VFP fused multiply-add insns to decodetree
60
target/arm: Convert VMOV (imm) to decodetree
61
target/arm: Convert VABS to decodetree
62
target/arm: Convert VNEG to decodetree
63
target/arm: Convert VSQRT to decodetree
64
target/arm: Convert VMOV (register) to decodetree
65
target/arm: Convert VFP comparison insns to decodetree
66
target/arm: Convert the VCVT-from-f16 insns to decodetree
67
target/arm: Convert the VCVT-to-f16 insns to decodetree
68
target/arm: Convert VFP round insns to decodetree
69
target/arm: Convert double-single precision conversion insns to decodetree
70
target/arm: Convert integer-to-float insns to decodetree
71
target/arm: Convert VJCVT to decodetree
72
target/arm: Convert VCVT fp/fixed-point conversion insns to decodetree
73
target/arm: Convert float-to-integer VCVT insns to decodetree
74
target/arm: Fix short-vector increment behaviour
75
40
76
Richard Henderson (4):
41
Fredrik Strupe (1):
77
target/arm: Vectorize USHL and SSHL
42
target/arm: Make VQDMULL undefined when U=1
78
target/arm: Use tcg_gen_gvec_bitsel
79
target/arm: Fix output of PAuth Auth
80
decodetree: Fix comparison of Field
81
43
82
target/arm/Makefile.objs | 13 +
44
Peter Maydell (25):
83
tests/tcg/aarch64/Makefile.target | 2 +-
45
target/arm: Don't use a TLB for ARMMMUIdx_Stage2
84
target/arm/cpu.h | 11 +
46
target/arm: Use enum constant in get_phys_addr_lpae() call
85
target/arm/helper.h | 11 +-
47
target/arm: Add new 's1_is_el0' argument to get_phys_addr_lpae()
86
target/arm/translate-a64.h | 2 +
48
target/arm: Implement ARMv8.2-TTS2UXN
87
target/arm/translate.h | 9 +-
49
target/arm: Use correct variable for setting 'max' cpu's ID_AA64DFR0
88
hw/arm/smmuv3.c | 2 +-
50
target/arm/translate-vfp.inc.c: Remove duplicate simd_r32 check
89
hw/core/bus.c | 21 +-
51
target/arm: Don't allow Thumb Neon insns without FEATURE_NEON
90
target/arm/cpu.c | 6 +
52
target/arm: Add stubs for AArch32 Neon decodetree
91
target/arm/helper.c | 75 +-
53
target/arm: Convert VCMLA (vector) to decodetree
92
target/arm/neon_helper.c | 33 -
54
target/arm: Convert VCADD (vector) to decodetree
93
target/arm/pauth_helper.c | 4 +-
55
target/arm: Convert V[US]DOT (vector) to decodetree
94
target/arm/translate-a64.c | 33 +-
56
target/arm: Convert VFM[AS]L (vector) to decodetree
95
target/arm/translate-vfp.inc.c | 2672 +++++++++++++++++++++++++++++++++++++
57
target/arm: Convert VCMLA (scalar) to decodetree
96
target/arm/translate.c | 1881 +++++---------------------
58
target/arm: Convert V[US]DOT (scalar) to decodetree
97
target/arm/vec_helper.c | 88 ++
59
target/arm: Convert VFM[AS]L (scalar) to decodetree
98
tests/tcg/aarch64/pauth-2.c | 61 +
60
target/arm: Convert Neon load/store multiple structures to decodetree
99
scripts/decodetree.py | 2 +-
61
target/arm: Convert Neon 'load single structure to all lanes' to decodetree
100
target/arm/vfp-uncond.decode | 63 +
62
target/arm: Convert Neon 'load/store single structure' to decodetree
101
target/arm/vfp.decode | 242 ++++
63
target/arm: Convert Neon 3-reg-same VADD/VSUB to decodetree
102
20 files changed, 3593 insertions(+), 1638 deletions(-)
64
target/arm: Convert Neon 3-reg-same logic ops to decodetree
103
create mode 100644 target/arm/translate-vfp.inc.c
65
target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree
104
create mode 100644 tests/tcg/aarch64/pauth-2.c
66
target/arm: Convert Neon 3-reg-same comparisons to decodetree
105
create mode 100644 target/arm/vfp-uncond.decode
67
target/arm: Convert Neon 3-reg-same VQADD/VQSUB to decodetree
106
create mode 100644 target/arm/vfp.decode
68
target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL to decodetree
69
target/arm: Move gen_ function typedefs to translate.h
107
70
71
Philippe Mathieu-Daudé (2):
72
hw/arm/mps2-tz: Use TYPE_IOTKIT instead of hardcoded string
73
target/arm: Use uint64_t for midr field in CPU state struct
74
75
include/hw/arm/xlnx-versal.h | 31 +-
76
target/arm/cpu-param.h | 2 +-
77
target/arm/cpu.h | 38 ++-
78
target/arm/translate-a64.h | 9 -
79
target/arm/translate.h | 26 ++
80
target/arm/neon-dp.decode | 86 +++++
81
target/arm/neon-ls.decode | 52 +++
82
target/arm/neon-shared.decode | 66 ++++
83
hw/arm/mps2-tz.c | 2 +-
84
hw/arm/xlnx-versal-virt.c | 74 ++++-
85
hw/arm/xlnx-versal.c | 115 +++++--
86
target/arm/cpu.c | 3 +-
87
target/arm/cpu64.c | 8 +-
88
target/arm/helper.c | 183 ++++------
89
target/arm/translate-a64.c | 17 -
90
target/arm/translate-neon.inc.c | 714 +++++++++++++++++++++++++++++++++++++++
91
target/arm/translate-vfp.inc.c | 6 -
92
target/arm/translate.c | 716 +++-------------------------------------
93
target/arm/Makefile.objs | 18 +
94
19 files changed, 1302 insertions(+), 864 deletions(-)
95
create mode 100644 target/arm/neon-dp.decode
96
create mode 100644 target/arm/neon-ls.decode
97
create mode 100644 target/arm/neon-shared.decode
98
create mode 100644 target/arm/translate-neon.inc.c
99
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Fredrik Strupe <fredrik@strupe.net>
2
2
3
These instructions shift left or right depending on the sign
3
According to Arm ARM, VQDMULL is only valid when U=0, while having
4
of the input, and 7 bits are significant to the shift. This
4
U=1 is unallocated.
5
requires several masks and selects in addition to the actual
6
shifts to form the complete answer.
7
5
8
That said, the operation is still a small improvement even for
6
Signed-off-by: Fredrik Strupe <fredrik@strupe.net>
9
two 64-bit elements -- 13 vector operations instead of 2 * 7
7
Fixes: 695272dcb976 ("target-arm: Handle UNDEF cases for Neon 3-regs-different-widths")
10
integer operations.
11
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20190603232209.20704-1-richard.henderson@linaro.org
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
10
---
17
target/arm/helper.h | 11 +-
11
target/arm/translate.c | 2 +-
18
target/arm/translate.h | 6 +
12
1 file changed, 1 insertion(+), 1 deletion(-)
19
target/arm/neon_helper.c | 33 ----
20
target/arm/translate-a64.c | 18 +--
21
target/arm/translate.c | 300 +++++++++++++++++++++++++++++++++++--
22
target/arm/vec_helper.c | 88 +++++++++++
23
6 files changed, 390 insertions(+), 66 deletions(-)
24
13
25
diff --git a/target/arm/helper.h b/target/arm/helper.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/helper.h
28
+++ b/target/arm/helper.h
29
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
30
DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
31
DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
32
33
-DEF_HELPER_2(neon_shl_u8, i32, i32, i32)
34
-DEF_HELPER_2(neon_shl_s8, i32, i32, i32)
35
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
36
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
37
-DEF_HELPER_2(neon_shl_u32, i32, i32, i32)
38
-DEF_HELPER_2(neon_shl_s32, i32, i32, i32)
39
-DEF_HELPER_2(neon_shl_u64, i64, i64, i64)
40
-DEF_HELPER_2(neon_shl_s64, i64, i64, i64)
41
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
42
DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
43
DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
44
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr)
45
DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
46
DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
47
48
+DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
49
+DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
51
+DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
52
+
53
#ifdef TARGET_AARCH64
54
#include "helper-a64.h"
55
#include "helper-sve.h"
56
diff --git a/target/arm/translate.h b/target/arm/translate.h
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/translate.h
59
+++ b/target/arm/translate.h
60
@@ -XXX,XX +XXX,XX @@ extern const GVecGen3 bif_op;
61
extern const GVecGen3 mla_op[4];
62
extern const GVecGen3 mls_op[4];
63
extern const GVecGen3 cmtst_op[4];
64
+extern const GVecGen3 sshl_op[4];
65
+extern const GVecGen3 ushl_op[4];
66
extern const GVecGen2i ssra_op[4];
67
extern const GVecGen2i usra_op[4];
68
extern const GVecGen2i sri_op[4];
69
@@ -XXX,XX +XXX,XX @@ extern const GVecGen4 sqadd_op[4];
70
extern const GVecGen4 uqsub_op[4];
71
extern const GVecGen4 sqsub_op[4];
72
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
73
+void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
74
+void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
75
+void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
76
+void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
77
78
/*
79
* Forward to the isar_feature_* tests given a DisasContext pointer.
80
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/neon_helper.c
83
+++ b/target/arm/neon_helper.c
84
@@ -XXX,XX +XXX,XX @@ NEON_VOP(abd_u32, neon_u32, 1)
85
} else { \
86
dest = src1 << tmp; \
87
}} while (0)
88
-NEON_VOP(shl_u8, neon_u8, 4)
89
NEON_VOP(shl_u16, neon_u16, 2)
90
-NEON_VOP(shl_u32, neon_u32, 1)
91
#undef NEON_FN
92
93
-uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
94
-{
95
- int8_t shift = (int8_t)shiftop;
96
- if (shift >= 64 || shift <= -64) {
97
- val = 0;
98
- } else if (shift < 0) {
99
- val >>= -shift;
100
- } else {
101
- val <<= shift;
102
- }
103
- return val;
104
-}
105
-
106
#define NEON_FN(dest, src1, src2) do { \
107
int8_t tmp; \
108
tmp = (int8_t)src2; \
109
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
110
} else { \
111
dest = src1 << tmp; \
112
}} while (0)
113
-NEON_VOP(shl_s8, neon_s8, 4)
114
NEON_VOP(shl_s16, neon_s16, 2)
115
-NEON_VOP(shl_s32, neon_s32, 1)
116
#undef NEON_FN
117
118
-uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
119
-{
120
- int8_t shift = (int8_t)shiftop;
121
- int64_t val = valop;
122
- if (shift >= 64) {
123
- val = 0;
124
- } else if (shift <= -64) {
125
- val >>= 63;
126
- } else if (shift < 0) {
127
- val >>= -shift;
128
- } else {
129
- val <<= shift;
130
- }
131
- return val;
132
-}
133
-
134
#define NEON_FN(dest, src1, src2) do { \
135
int8_t tmp; \
136
tmp = (int8_t)src2; \
137
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/arm/translate-a64.c
140
+++ b/target/arm/translate-a64.c
141
@@ -XXX,XX +XXX,XX @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
142
break;
143
case 0x8: /* SSHL, USHL */
144
if (u) {
145
- gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
146
+ gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
147
} else {
148
- gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
149
+ gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
150
}
151
break;
152
case 0x9: /* SQSHL, UQSHL */
153
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
154
is_q ? 16 : 8, vec_full_reg_size(s),
155
(u ? uqsub_op : sqsub_op) + size);
156
return;
157
+ case 0x08: /* SSHL, USHL */
158
+ gen_gvec_op3(s, is_q, rd, rn, rm,
159
+ u ? &ushl_op[size] : &sshl_op[size]);
160
+ return;
161
case 0x0c: /* SMAX, UMAX */
162
if (u) {
163
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
164
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
165
genfn = fns[size][u];
166
break;
167
}
168
- case 0x8: /* SSHL, USHL */
169
- {
170
- static NeonGenTwoOpFn * const fns[3][2] = {
171
- { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
172
- { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
173
- { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
174
- };
175
- genfn = fns[size][u];
176
- break;
177
- }
178
case 0x9: /* SQSHL, UQSHL */
179
{
180
static NeonGenTwoOpEnvFn * const fns[3][2] = {
181
diff --git a/target/arm/translate.c b/target/arm/translate.c
14
diff --git a/target/arm/translate.c b/target/arm/translate.c
182
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
183
--- a/target/arm/translate.c
16
--- a/target/arm/translate.c
184
+++ b/target/arm/translate.c
17
+++ b/target/arm/translate.c
185
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
186
if (u) {
187
switch (size) {
188
case 1: gen_helper_neon_shl_u16(var, var, shift); break;
189
- case 2: gen_helper_neon_shl_u32(var, var, shift); break;
190
+ case 2: gen_ushl_i32(var, var, shift); break;
191
default: abort();
192
}
193
} else {
194
switch (size) {
195
case 1: gen_helper_neon_shl_s16(var, var, shift); break;
196
- case 2: gen_helper_neon_shl_s32(var, var, shift); break;
197
+ case 2: gen_sshl_i32(var, var, shift); break;
198
default: abort();
199
}
200
}
201
@@ -XXX,XX +XXX,XX @@ const GVecGen3 cmtst_op[4] = {
202
.vece = MO_64 },
203
};
204
205
+void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
206
+{
207
+ TCGv_i32 lval = tcg_temp_new_i32();
208
+ TCGv_i32 rval = tcg_temp_new_i32();
209
+ TCGv_i32 lsh = tcg_temp_new_i32();
210
+ TCGv_i32 rsh = tcg_temp_new_i32();
211
+ TCGv_i32 zero = tcg_const_i32(0);
212
+ TCGv_i32 max = tcg_const_i32(32);
213
+
214
+ /*
215
+ * Rely on the TCG guarantee that out of range shifts produce
216
+ * unspecified results, not undefined behaviour (i.e. no trap).
217
+ * Discard out-of-range results after the fact.
218
+ */
219
+ tcg_gen_ext8s_i32(lsh, b);
220
+ tcg_gen_neg_i32(rsh, lsh);
221
+ tcg_gen_shl_i32(lval, a, lsh);
222
+ tcg_gen_shr_i32(rval, a, rsh);
223
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, lsh, max, lval, zero);
224
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, rsh, max, rval, d);
225
+
226
+ tcg_temp_free_i32(lval);
227
+ tcg_temp_free_i32(rval);
228
+ tcg_temp_free_i32(lsh);
229
+ tcg_temp_free_i32(rsh);
230
+ tcg_temp_free_i32(zero);
231
+ tcg_temp_free_i32(max);
232
+}
233
+
234
+void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
235
+{
236
+ TCGv_i64 lval = tcg_temp_new_i64();
237
+ TCGv_i64 rval = tcg_temp_new_i64();
238
+ TCGv_i64 lsh = tcg_temp_new_i64();
239
+ TCGv_i64 rsh = tcg_temp_new_i64();
240
+ TCGv_i64 zero = tcg_const_i64(0);
241
+ TCGv_i64 max = tcg_const_i64(64);
242
+
243
+ /*
244
+ * Rely on the TCG guarantee that out of range shifts produce
245
+ * unspecified results, not undefined behaviour (i.e. no trap).
246
+ * Discard out-of-range results after the fact.
247
+ */
248
+ tcg_gen_ext8s_i64(lsh, b);
249
+ tcg_gen_neg_i64(rsh, lsh);
250
+ tcg_gen_shl_i64(lval, a, lsh);
251
+ tcg_gen_shr_i64(rval, a, rsh);
252
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, lsh, max, lval, zero);
253
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, rsh, max, rval, d);
254
+
255
+ tcg_temp_free_i64(lval);
256
+ tcg_temp_free_i64(rval);
257
+ tcg_temp_free_i64(lsh);
258
+ tcg_temp_free_i64(rsh);
259
+ tcg_temp_free_i64(zero);
260
+ tcg_temp_free_i64(max);
261
+}
262
+
263
+static void gen_ushl_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
264
+{
265
+ TCGv_vec lval = tcg_temp_new_vec_matching(d);
266
+ TCGv_vec rval = tcg_temp_new_vec_matching(d);
267
+ TCGv_vec lsh = tcg_temp_new_vec_matching(d);
268
+ TCGv_vec rsh = tcg_temp_new_vec_matching(d);
269
+ TCGv_vec msk, max;
270
+
271
+ /*
272
+ * Rely on the TCG guarantee that out of range shifts produce
273
+ * unspecified results, not undefined behaviour (i.e. no trap).
274
+ * Discard out-of-range results after the fact.
275
+ */
276
+ tcg_gen_neg_vec(vece, rsh, b);
277
+ if (vece == MO_8) {
278
+ tcg_gen_mov_vec(lsh, b);
279
+ } else {
280
+ msk = tcg_temp_new_vec_matching(d);
281
+ tcg_gen_dupi_vec(vece, msk, 0xff);
282
+ tcg_gen_and_vec(vece, lsh, b, msk);
283
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
284
+ tcg_temp_free_vec(msk);
285
+ }
286
+
287
+ /*
288
+ * Perform possibly out of range shifts, trusting that the operation
289
+ * does not trap. Discard unused results after the fact.
290
+ */
291
+ tcg_gen_shlv_vec(vece, lval, a, lsh);
292
+ tcg_gen_shrv_vec(vece, rval, a, rsh);
293
+
294
+ max = tcg_temp_new_vec_matching(d);
295
+ tcg_gen_dupi_vec(vece, max, 8 << vece);
296
+
297
+ /*
298
+ * The choice of LT (signed) and GEU (unsigned) are biased toward
299
+ * the instructions of the x86_64 host. For MO_8, the whole byte
300
+ * is significant so we must use an unsigned compare; otherwise we
301
+ * have already masked to a byte and so a signed compare works.
302
+ * Other tcg hosts have a full set of comparisons and do not care.
303
+ */
304
+ if (vece == MO_8) {
305
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
306
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
307
+ tcg_gen_andc_vec(vece, lval, lval, lsh);
308
+ tcg_gen_andc_vec(vece, rval, rval, rsh);
309
+ } else {
310
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
311
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
312
+ tcg_gen_and_vec(vece, lval, lval, lsh);
313
+ tcg_gen_and_vec(vece, rval, rval, rsh);
314
+ }
315
+ tcg_gen_or_vec(vece, d, lval, rval);
316
+
317
+ tcg_temp_free_vec(max);
318
+ tcg_temp_free_vec(lval);
319
+ tcg_temp_free_vec(rval);
320
+ tcg_temp_free_vec(lsh);
321
+ tcg_temp_free_vec(rsh);
322
+}
323
+
324
+static const TCGOpcode ushl_list[] = {
325
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
326
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
327
+};
328
+
329
+const GVecGen3 ushl_op[4] = {
330
+ { .fniv = gen_ushl_vec,
331
+ .fno = gen_helper_gvec_ushl_b,
332
+ .opt_opc = ushl_list,
333
+ .vece = MO_8 },
334
+ { .fniv = gen_ushl_vec,
335
+ .fno = gen_helper_gvec_ushl_h,
336
+ .opt_opc = ushl_list,
337
+ .vece = MO_16 },
338
+ { .fni4 = gen_ushl_i32,
339
+ .fniv = gen_ushl_vec,
340
+ .opt_opc = ushl_list,
341
+ .vece = MO_32 },
342
+ { .fni8 = gen_ushl_i64,
343
+ .fniv = gen_ushl_vec,
344
+ .opt_opc = ushl_list,
345
+ .vece = MO_64 },
346
+};
347
+
348
+void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
349
+{
350
+ TCGv_i32 lval = tcg_temp_new_i32();
351
+ TCGv_i32 rval = tcg_temp_new_i32();
352
+ TCGv_i32 lsh = tcg_temp_new_i32();
353
+ TCGv_i32 rsh = tcg_temp_new_i32();
354
+ TCGv_i32 zero = tcg_const_i32(0);
355
+ TCGv_i32 max = tcg_const_i32(31);
356
+
357
+ /*
358
+ * Rely on the TCG guarantee that out of range shifts produce
359
+ * unspecified results, not undefined behaviour (i.e. no trap).
360
+ * Discard out-of-range results after the fact.
361
+ */
362
+ tcg_gen_ext8s_i32(lsh, b);
363
+ tcg_gen_neg_i32(rsh, lsh);
364
+ tcg_gen_shl_i32(lval, a, lsh);
365
+ tcg_gen_umin_i32(rsh, rsh, max);
366
+ tcg_gen_sar_i32(rval, a, rsh);
367
+ tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
368
+ tcg_gen_movcond_i32(TCG_COND_LT, d, lsh, zero, rval, lval);
369
+
370
+ tcg_temp_free_i32(lval);
371
+ tcg_temp_free_i32(rval);
372
+ tcg_temp_free_i32(lsh);
373
+ tcg_temp_free_i32(rsh);
374
+ tcg_temp_free_i32(zero);
375
+ tcg_temp_free_i32(max);
376
+}
377
+
378
+void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
379
+{
380
+ TCGv_i64 lval = tcg_temp_new_i64();
381
+ TCGv_i64 rval = tcg_temp_new_i64();
382
+ TCGv_i64 lsh = tcg_temp_new_i64();
383
+ TCGv_i64 rsh = tcg_temp_new_i64();
384
+ TCGv_i64 zero = tcg_const_i64(0);
385
+ TCGv_i64 max = tcg_const_i64(63);
386
+
387
+ /*
388
+ * Rely on the TCG guarantee that out of range shifts produce
389
+ * unspecified results, not undefined behaviour (i.e. no trap).
390
+ * Discard out-of-range results after the fact.
391
+ */
392
+ tcg_gen_ext8s_i64(lsh, b);
393
+ tcg_gen_neg_i64(rsh, lsh);
394
+ tcg_gen_shl_i64(lval, a, lsh);
395
+ tcg_gen_umin_i64(rsh, rsh, max);
396
+ tcg_gen_sar_i64(rval, a, rsh);
397
+ tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
398
+ tcg_gen_movcond_i64(TCG_COND_LT, d, lsh, zero, rval, lval);
399
+
400
+ tcg_temp_free_i64(lval);
401
+ tcg_temp_free_i64(rval);
402
+ tcg_temp_free_i64(lsh);
403
+ tcg_temp_free_i64(rsh);
404
+ tcg_temp_free_i64(zero);
405
+ tcg_temp_free_i64(max);
406
+}
407
+
408
+static void gen_sshl_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
409
+{
410
+ TCGv_vec lval = tcg_temp_new_vec_matching(d);
411
+ TCGv_vec rval = tcg_temp_new_vec_matching(d);
412
+ TCGv_vec lsh = tcg_temp_new_vec_matching(d);
413
+ TCGv_vec rsh = tcg_temp_new_vec_matching(d);
414
+ TCGv_vec tmp = tcg_temp_new_vec_matching(d);
415
+
416
+ /*
417
+ * Rely on the TCG guarantee that out of range shifts produce
418
+ * unspecified results, not undefined behaviour (i.e. no trap).
419
+ * Discard out-of-range results after the fact.
420
+ */
421
+ tcg_gen_neg_vec(vece, rsh, b);
422
+ if (vece == MO_8) {
423
+ tcg_gen_mov_vec(lsh, b);
424
+ } else {
425
+ tcg_gen_dupi_vec(vece, tmp, 0xff);
426
+ tcg_gen_and_vec(vece, lsh, b, tmp);
427
+ tcg_gen_and_vec(vece, rsh, rsh, tmp);
428
+ }
429
+
430
+ /* Bound rsh so out of bound right shift gets -1. */
431
+ tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
432
+ tcg_gen_umin_vec(vece, rsh, rsh, tmp);
433
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
434
+
435
+ tcg_gen_shlv_vec(vece, lval, a, lsh);
436
+ tcg_gen_sarv_vec(vece, rval, a, rsh);
437
+
438
+ /* Select in-bound left shift. */
439
+ tcg_gen_andc_vec(vece, lval, lval, tmp);
440
+
441
+ /* Select between left and right shift. */
442
+ if (vece == MO_8) {
443
+ tcg_gen_dupi_vec(vece, tmp, 0);
444
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, d, lsh, tmp, rval, lval);
445
+ } else {
446
+ tcg_gen_dupi_vec(vece, tmp, 0x80);
447
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, d, lsh, tmp, lval, rval);
448
+ }
449
+
450
+ tcg_temp_free_vec(lval);
451
+ tcg_temp_free_vec(rval);
452
+ tcg_temp_free_vec(lsh);
453
+ tcg_temp_free_vec(rsh);
454
+ tcg_temp_free_vec(tmp);
455
+}
456
+
457
+static const TCGOpcode sshl_list[] = {
458
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
459
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
460
+};
461
+
462
+const GVecGen3 sshl_op[4] = {
463
+ { .fniv = gen_sshl_vec,
464
+ .fno = gen_helper_gvec_sshl_b,
465
+ .opt_opc = sshl_list,
466
+ .vece = MO_8 },
467
+ { .fniv = gen_sshl_vec,
468
+ .fno = gen_helper_gvec_sshl_h,
469
+ .opt_opc = sshl_list,
470
+ .vece = MO_16 },
471
+ { .fni4 = gen_sshl_i32,
472
+ .fniv = gen_sshl_vec,
473
+ .opt_opc = sshl_list,
474
+ .vece = MO_32 },
475
+ { .fni8 = gen_sshl_i64,
476
+ .fniv = gen_sshl_vec,
477
+ .opt_opc = sshl_list,
478
+ .vece = MO_64 },
479
+};
480
+
481
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
482
TCGv_vec a, TCGv_vec b)
483
{
484
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
18
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
485
vec_size, vec_size);
19
{0, 0, 0, 0}, /* VMLSL */
486
}
20
{0, 0, 0, 9}, /* VQDMLSL */
487
return 0;
21
{0, 0, 0, 0}, /* Integer VMULL */
488
+
22
- {0, 0, 0, 1}, /* VQDMULL */
489
+ case NEON_3R_VSHL:
23
+ {0, 0, 0, 9}, /* VQDMULL */
490
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
24
{0, 0, 0, 0xa}, /* Polynomial VMULL */
491
+ u ? &ushl_op[size] : &sshl_op[size]);
25
{0, 0, 0, 7}, /* Reserved: always UNDEF */
492
+ return 0;
26
};
493
}
494
495
if (size == 3) {
496
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
497
neon_load_reg64(cpu_V0, rn + pass);
498
neon_load_reg64(cpu_V1, rm + pass);
499
switch (op) {
500
- case NEON_3R_VSHL:
501
- if (u) {
502
- gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
503
- } else {
504
- gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
505
- }
506
- break;
507
case NEON_3R_VQSHL:
508
if (u) {
509
gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
510
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
511
}
512
pairwise = 0;
513
switch (op) {
514
- case NEON_3R_VSHL:
515
case NEON_3R_VQSHL:
516
case NEON_3R_VRSHL:
517
case NEON_3R_VQRSHL:
518
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
519
case NEON_3R_VHSUB:
520
GEN_NEON_INTEGER_OP(hsub);
521
break;
522
- case NEON_3R_VSHL:
523
- GEN_NEON_INTEGER_OP(shl);
524
- break;
525
case NEON_3R_VQSHL:
526
GEN_NEON_INTEGER_OP_ENV(qshl);
527
break;
528
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
529
}
530
} else {
531
if (input_unsigned) {
532
- gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
533
+ gen_ushl_i64(cpu_V0, in, tmp64);
534
} else {
535
- gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
536
+ gen_sshl_i64(cpu_V0, in, tmp64);
537
}
538
}
539
tmp = tcg_temp_new_i32();
540
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
541
index XXXXXXX..XXXXXXX 100644
542
--- a/target/arm/vec_helper.c
543
+++ b/target/arm/vec_helper.c
544
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
545
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
546
get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
547
}
548
+
549
+void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc)
550
+{
551
+ intptr_t i, opr_sz = simd_oprsz(desc);
552
+ int8_t *d = vd, *n = vn, *m = vm;
553
+
554
+ for (i = 0; i < opr_sz; ++i) {
555
+ int8_t mm = m[i];
556
+ int8_t nn = n[i];
557
+ int8_t res = 0;
558
+ if (mm >= 0) {
559
+ if (mm < 8) {
560
+ res = nn << mm;
561
+ }
562
+ } else {
563
+ res = nn >> (mm > -8 ? -mm : 7);
564
+ }
565
+ d[i] = res;
566
+ }
567
+ clear_tail(d, opr_sz, simd_maxsz(desc));
568
+}
569
+
570
+void HELPER(gvec_sshl_h)(void *vd, void *vn, void *vm, uint32_t desc)
571
+{
572
+ intptr_t i, opr_sz = simd_oprsz(desc);
573
+ int16_t *d = vd, *n = vn, *m = vm;
574
+
575
+ for (i = 0; i < opr_sz / 2; ++i) {
576
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
577
+ int16_t nn = n[i];
578
+ int16_t res = 0;
579
+ if (mm >= 0) {
580
+ if (mm < 16) {
581
+ res = nn << mm;
582
+ }
583
+ } else {
584
+ res = nn >> (mm > -16 ? -mm : 15);
585
+ }
586
+ d[i] = res;
587
+ }
588
+ clear_tail(d, opr_sz, simd_maxsz(desc));
589
+}
590
+
591
+void HELPER(gvec_ushl_b)(void *vd, void *vn, void *vm, uint32_t desc)
592
+{
593
+ intptr_t i, opr_sz = simd_oprsz(desc);
594
+ uint8_t *d = vd, *n = vn, *m = vm;
595
+
596
+ for (i = 0; i < opr_sz; ++i) {
597
+ int8_t mm = m[i];
598
+ uint8_t nn = n[i];
599
+ uint8_t res = 0;
600
+ if (mm >= 0) {
601
+ if (mm < 8) {
602
+ res = nn << mm;
603
+ }
604
+ } else {
605
+ if (mm > -8) {
606
+ res = nn >> -mm;
607
+ }
608
+ }
609
+ d[i] = res;
610
+ }
611
+ clear_tail(d, opr_sz, simd_maxsz(desc));
612
+}
613
+
614
+void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
615
+{
616
+ intptr_t i, opr_sz = simd_oprsz(desc);
617
+ uint16_t *d = vd, *n = vn, *m = vm;
618
+
619
+ for (i = 0; i < opr_sz / 2; ++i) {
620
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
621
+ uint16_t nn = n[i];
622
+ uint16_t res = 0;
623
+ if (mm >= 0) {
624
+ if (mm < 16) {
625
+ res = nn << mm;
626
+ }
627
+ } else {
628
+ if (mm > -16) {
629
+ res = nn >> -mm;
630
+ }
631
+ }
632
+ d[i] = res;
633
+ }
634
+ clear_tail(d, opr_sz, simd_maxsz(desc));
635
+}
636
--
27
--
637
2.20.1
28
2.20.1
638
29
639
30
diff view generated by jsdifflib
1
For VFP short vectors, the VFP registers are divided into a
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
series of banks: for single-precision these are s0-s7, s8-s15,
3
s16-s23 and s24-s31; for double-precision they are d0-d3,
4
d4-d7, ... d28-d31. Some banks are "scalar" meaning that
5
use of a register within them triggers a pure-scalar or
6
mixed vector-scalar operation rather than a full vector
7
operation. The scalar banks are s0-s7, d0-d3 and d16-d19.
8
When using a bank as part of a vector operation, we
9
iterate through it, increasing the register number by
10
the specified stride each time, and wrapping around to
11
the beginning of the bank.
12
2
13
Unfortunately our calculation of the "increment" part of this
3
By using the TYPE_* definitions for devices, we can:
14
was incorrect:
4
- quickly find where devices are used with 'git-grep'
15
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask)
5
- easily rename a device (one-line change).
16
will only do the intended thing if bank_mask has exactly
17
one set high bit. For instance for doubles (bank_mask = 0xc),
18
if we start with vd = 6 and delta_d = 2 then vd is updated
19
to 12 rather than the intended 4.
20
6
21
This only causes problems in the unlikely case that the
7
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
22
starting register is not the first in its bank: if the
8
Message-id: 20200428154650.21991-1-f4bug@amsat.org
23
register number doesn't have to wrap around then the
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
24
expression happens to give the right answer.
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
hw/arm/mps2-tz.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
25
14
26
Fix this bug by abstracting out the "check whether register
15
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
27
is in a scalar bank" and "advance register within bank"
28
operations to utility functions which use the right
29
bit masking operations.
30
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
33
---
34
target/arm/translate-vfp.inc.c | 100 ++++++++++++++++++++-------------
35
1 file changed, 60 insertions(+), 40 deletions(-)
36
37
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
38
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-vfp.inc.c
17
--- a/hw/arm/mps2-tz.c
40
+++ b/target/arm/translate-vfp.inc.c
18
+++ b/hw/arm/mps2-tz.c
41
@@ -XXX,XX +XXX,XX @@ typedef void VFPGen3OpDPFn(TCGv_i64 vd,
19
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
42
typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
20
exit(EXIT_FAILURE);
43
typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
44
45
+/*
46
+ * Return true if the specified S reg is in a scalar bank
47
+ * (ie if it is s0..s7)
48
+ */
49
+static inline bool vfp_sreg_is_scalar(int reg)
50
+{
51
+ return (reg & 0x18) == 0;
52
+}
53
+
54
+/*
55
+ * Return true if the specified D reg is in a scalar bank
56
+ * (ie if it is d0..d3 or d16..d19)
57
+ */
58
+static inline bool vfp_dreg_is_scalar(int reg)
59
+{
60
+ return (reg & 0xc) == 0;
61
+}
62
+
63
+/*
64
+ * Advance the S reg number forwards by delta within its bank
65
+ * (ie increment the low 3 bits but leave the rest the same)
66
+ */
67
+static inline int vfp_advance_sreg(int reg, int delta)
68
+{
69
+ return ((reg + delta) & 0x7) | (reg & ~0x7);
70
+}
71
+
72
+/*
73
+ * Advance the D reg number forwards by delta within its bank
74
+ * (ie increment the low 2 bits but leave the rest the same)
75
+ */
76
+static inline int vfp_advance_dreg(int reg, int delta)
77
+{
78
+ return ((reg + delta) & 0x3) | (reg & ~0x3);
79
+}
80
+
81
/*
82
* Perform a 3-operand VFP data processing instruction. fn is the
83
* callback to do the actual operation; this function deals with the
84
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
85
{
86
uint32_t delta_m = 0;
87
uint32_t delta_d = 0;
88
- uint32_t bank_mask = 0;
89
int veclen = s->vec_len;
90
TCGv_i32 f0, f1, fd;
91
TCGv_ptr fpst;
92
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
93
}
21
}
94
22
95
if (veclen > 0) {
23
- sysbus_init_child_obj(OBJECT(machine), "iotkit", &mms->iotkit,
96
- bank_mask = 0x18;
24
+ sysbus_init_child_obj(OBJECT(machine), TYPE_IOTKIT, &mms->iotkit,
97
-
25
sizeof(mms->iotkit), mmc->armsse_type);
98
/* Figure out what type of vector operation this is. */
26
iotkitdev = DEVICE(&mms->iotkit);
99
- if ((vd & bank_mask) == 0) {
27
object_property_set_link(OBJECT(&mms->iotkit), OBJECT(system_memory),
100
+ if (vfp_sreg_is_scalar(vd)) {
101
/* scalar */
102
veclen = 0;
103
} else {
104
delta_d = s->vec_stride + 1;
105
106
- if ((vm & bank_mask) == 0) {
107
+ if (vfp_sreg_is_scalar(vm)) {
108
/* mixed scalar/vector */
109
delta_m = 0;
110
} else {
111
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
112
113
/* Set up the operands for the next iteration */
114
veclen--;
115
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
116
- vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
117
+ vd = vfp_advance_sreg(vd, delta_d);
118
+ vn = vfp_advance_sreg(vn, delta_d);
119
neon_load_reg32(f0, vn);
120
if (delta_m) {
121
- vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
122
+ vm = vfp_advance_sreg(vm, delta_m);
123
neon_load_reg32(f1, vm);
124
}
125
}
126
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
127
{
128
uint32_t delta_m = 0;
129
uint32_t delta_d = 0;
130
- uint32_t bank_mask = 0;
131
int veclen = s->vec_len;
132
TCGv_i64 f0, f1, fd;
133
TCGv_ptr fpst;
134
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
135
}
136
137
if (veclen > 0) {
138
- bank_mask = 0xc;
139
-
140
/* Figure out what type of vector operation this is. */
141
- if ((vd & bank_mask) == 0) {
142
+ if (vfp_dreg_is_scalar(vd)) {
143
/* scalar */
144
veclen = 0;
145
} else {
146
delta_d = (s->vec_stride >> 1) + 1;
147
148
- if ((vm & bank_mask) == 0) {
149
+ if (vfp_dreg_is_scalar(vm)) {
150
/* mixed scalar/vector */
151
delta_m = 0;
152
} else {
153
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
154
}
155
/* Set up the operands for the next iteration */
156
veclen--;
157
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
158
- vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
159
+ vd = vfp_advance_dreg(vd, delta_d);
160
+ vn = vfp_advance_dreg(vn, delta_d);
161
neon_load_reg64(f0, vn);
162
if (delta_m) {
163
- vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
164
+ vm = vfp_advance_dreg(vm, delta_m);
165
neon_load_reg64(f1, vm);
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
169
{
170
uint32_t delta_m = 0;
171
uint32_t delta_d = 0;
172
- uint32_t bank_mask = 0;
173
int veclen = s->vec_len;
174
TCGv_i32 f0, fd;
175
176
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
177
}
178
179
if (veclen > 0) {
180
- bank_mask = 0x18;
181
-
182
/* Figure out what type of vector operation this is. */
183
- if ((vd & bank_mask) == 0) {
184
+ if (vfp_sreg_is_scalar(vd)) {
185
/* scalar */
186
veclen = 0;
187
} else {
188
delta_d = s->vec_stride + 1;
189
190
- if ((vm & bank_mask) == 0) {
191
+ if (vfp_sreg_is_scalar(vm)) {
192
/* mixed scalar/vector */
193
delta_m = 0;
194
} else {
195
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
196
if (delta_m == 0) {
197
/* single source one-many */
198
while (veclen--) {
199
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
200
+ vd = vfp_advance_sreg(vd, delta_d);
201
neon_store_reg32(fd, vd);
202
}
203
break;
204
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
205
206
/* Set up the operands for the next iteration */
207
veclen--;
208
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
209
- vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
210
+ vd = vfp_advance_sreg(vd, delta_d);
211
+ vm = vfp_advance_sreg(vm, delta_m);
212
neon_load_reg32(f0, vm);
213
}
214
215
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
216
{
217
uint32_t delta_m = 0;
218
uint32_t delta_d = 0;
219
- uint32_t bank_mask = 0;
220
int veclen = s->vec_len;
221
TCGv_i64 f0, fd;
222
223
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
224
}
225
226
if (veclen > 0) {
227
- bank_mask = 0xc;
228
-
229
/* Figure out what type of vector operation this is. */
230
- if ((vd & bank_mask) == 0) {
231
+ if (vfp_dreg_is_scalar(vd)) {
232
/* scalar */
233
veclen = 0;
234
} else {
235
delta_d = (s->vec_stride >> 1) + 1;
236
237
- if ((vm & bank_mask) == 0) {
238
+ if (vfp_dreg_is_scalar(vm)) {
239
/* mixed scalar/vector */
240
delta_m = 0;
241
} else {
242
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
243
if (delta_m == 0) {
244
/* single source one-many */
245
while (veclen--) {
246
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
247
+ vd = vfp_advance_dreg(vd, delta_d);
248
neon_store_reg64(fd, vd);
249
}
250
break;
251
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
252
253
/* Set up the operands for the next iteration */
254
veclen--;
255
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
256
- vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
257
+ vd = vfp_advance_dreg(vd, delta_d);
258
+ vd = vfp_advance_dreg(vm, delta_m);
259
neon_load_reg64(f0, vm);
260
}
261
262
@@ -XXX,XX +XXX,XX @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
263
static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
264
{
265
uint32_t delta_d = 0;
266
- uint32_t bank_mask = 0;
267
int veclen = s->vec_len;
268
TCGv_i32 fd;
269
uint32_t n, i, vd;
270
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
271
}
272
273
if (veclen > 0) {
274
- bank_mask = 0x18;
275
/* Figure out what type of vector operation this is. */
276
- if ((vd & bank_mask) == 0) {
277
+ if (vfp_sreg_is_scalar(vd)) {
278
/* scalar */
279
veclen = 0;
280
} else {
281
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
282
283
/* Set up the operands for the next iteration */
284
veclen--;
285
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
286
+ vd = vfp_advance_sreg(vd, delta_d);
287
}
288
289
tcg_temp_free_i32(fd);
290
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
291
static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
292
{
293
uint32_t delta_d = 0;
294
- uint32_t bank_mask = 0;
295
int veclen = s->vec_len;
296
TCGv_i64 fd;
297
uint32_t n, i, vd;
298
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
299
}
300
301
if (veclen > 0) {
302
- bank_mask = 0xc;
303
/* Figure out what type of vector operation this is. */
304
- if ((vd & bank_mask) == 0) {
305
+ if (vfp_dreg_is_scalar(vd)) {
306
/* scalar */
307
veclen = 0;
308
} else {
309
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
310
311
/* Set up the operands for the next iteration */
312
veclen--;
313
- vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
314
+ vfp_advance_dreg(vd, delta_d);
315
}
316
317
tcg_temp_free_i64(fd);
318
--
28
--
319
2.20.1
29
2.20.1
320
30
321
31
diff view generated by jsdifflib
1
Convert the VSEL instructions to decodetree.
1
We define ARMMMUIdx_Stage2 as being an MMU index which uses a QEMU
2
We leave trans_VSEL() in translate.c for now as this allows
2
TLB. However we never actually use the TLB -- all stage 2 lookups
3
the patch to show just the changes from the old handle_vsel().
3
are done by direct calls to get_phys_addr_lpae() followed by a
4
4
physical address load via address_space_ld*().
5
In the old code the check for "do D16-D31 exist" was hidden in
5
6
the VFP_DREG macro, and assumed that VFPv3 always implied that
6
Remove Stage2 from the list of ARM MMU indexes which correspond to
7
D16-D31 exist. In the new code we do the correct ID register test.
7
real core MMU indexes, and instead put it in the set of "NOTLB" ARM
8
This gives identical behaviour for most of our CPUs, and fixes
8
MMU indexes.
9
previously incorrect handling for Cortex-R5F, Cortex-M4 and
9
10
Cortex-M33, which all implement VFPv3 or better with only 16
10
This allows us to drop NB_MMU_MODES to 11. It also means we can
11
double-precision registers.
11
safely add support for the ARMv8.3-TTS2UXN extension, which adds
12
permission bits to the stage 2 descriptors which define execute
13
permission separatel for EL0 and EL1; supporting that while keeping
14
Stage2 in a QEMU TLB would require us to use separate TLBs for
15
"Stage2 for an EL0 access" and "Stage2 for an EL1 access", which is a
16
lot of extra complication given we aren't even using the QEMU TLB.
17
18
In the process of updating the comment on our MMU index use,
19
fix a couple of other minor errors:
20
* NS EL2 EL2&0 was missing from the list in the comment
21
* some text hadn't been updated from when we bumped NB_MMU_MODES
22
above 8
12
23
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
27
Message-id: 20200330210400.11724-2-peter.maydell@linaro.org
15
---
28
---
16
target/arm/cpu.h | 6 ++++++
29
target/arm/cpu-param.h | 2 +-
17
target/arm/translate-vfp.inc.c | 9 +++++++++
30
target/arm/cpu.h | 21 +++++---
18
target/arm/translate.c | 35 ++++++++++++++++++++++++----------
31
target/arm/helper.c | 112 ++++-------------------------------------
19
target/arm/vfp-uncond.decode | 19 ++++++++++++++++++
32
3 files changed, 27 insertions(+), 108 deletions(-)
20
4 files changed, 59 insertions(+), 10 deletions(-)
33
21
34
diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/cpu-param.h
37
+++ b/target/arm/cpu-param.h
38
@@ -XXX,XX +XXX,XX @@
39
# define TARGET_PAGE_BITS_MIN 10
40
#endif
41
42
-#define NB_MMU_MODES 12
43
+#define NB_MMU_MODES 11
44
45
#endif
22
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
46
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
23
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/cpu.h
48
--- a/target/arm/cpu.h
25
+++ b/target/arm/cpu.h
49
+++ b/target/arm/cpu.h
26
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
50
@@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
27
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
51
* handling via the TLB. The only way to do a stage 1 translation without
52
* the immediate stage 2 translation is via the ATS or AT system insns,
53
* which can be slow-pathed and always do a page table walk.
54
+ * The only use of stage 2 translations is either as part of an s1+2
55
+ * lookup or when loading the descriptors during a stage 1 page table walk,
56
+ * and in both those cases we don't use the TLB.
57
* 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
58
* translation regimes, because they map reasonably well to each other
59
* and they can't both be active at the same time.
60
@@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
61
* NS EL1 EL1&0 stage 1+2 (aka NS PL1)
62
* NS EL1 EL1&0 stage 1+2 +PAN
63
* NS EL0 EL2&0
64
+ * NS EL2 EL2&0
65
* NS EL2 EL2&0 +PAN
66
* NS EL2 (aka NS PL2)
67
* S EL0 EL1&0 (aka S PL0)
68
* S EL1 EL1&0 (not used if EL3 is 32 bit)
69
* S EL1 EL1&0 +PAN
70
* S EL3 (aka S PL1)
71
- * NS EL1&0 stage 2
72
*
73
- * for a total of 12 different mmu_idx.
74
+ * for a total of 11 different mmu_idx.
75
*
76
* R profile CPUs have an MPU, but can use the same set of MMU indexes
77
* as A profile. They only need to distinguish NS EL0 and NS EL1 (and
78
@@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
79
* are not quite the same -- different CPU types (most notably M profile
80
* vs A/R profile) would like to use MMU indexes with different semantics,
81
* but since we don't ever need to use all of those in a single CPU we
82
- * can avoid setting NB_MMU_MODES to more than 8. The lower bits of
83
+ * can avoid having to set NB_MMU_MODES to "total number of A profile MMU
84
+ * modes + total number of M profile MMU modes". The lower bits of
85
* ARMMMUIdx are the core TLB mmu index, and the higher bits are always
86
* the same for any particular CPU.
87
* Variables of type ARMMUIdx are always full values, and the core
88
@@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdx {
89
ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A,
90
ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A,
91
92
- ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A,
93
-
94
/*
95
* These are not allocated TLBs and are used only for AT system
96
* instructions or for the first stage of an S12 page table walk.
97
@@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdx {
98
ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB,
99
ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB,
100
ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB,
101
+ /*
102
+ * Not allocated a TLB: used only for second stage of an S12 page
103
+ * table walk, or for descriptor loads during first stage of an S1
104
+ * page table walk. Note that if we ever want to have a TLB for this
105
+ * then various TLB flush insns which currently are no-ops or flush
106
+ * only stage 1 MMU indexes will need to change to flush stage 2.
107
+ */
108
+ ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB,
109
110
/*
111
* M-profile.
112
@@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdxBit {
113
TO_CORE_BIT(SE10_1),
114
TO_CORE_BIT(SE10_1_PAN),
115
TO_CORE_BIT(SE3),
116
- TO_CORE_BIT(Stage2),
117
118
TO_CORE_BIT(MUser),
119
TO_CORE_BIT(MPriv),
120
diff --git a/target/arm/helper.c b/target/arm/helper.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/helper.c
123
+++ b/target/arm/helper.c
124
@@ -XXX,XX +XXX,XX @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
125
tlb_flush_by_mmuidx(cs,
126
ARMMMUIdxBit_E10_1 |
127
ARMMMUIdxBit_E10_1_PAN |
128
- ARMMMUIdxBit_E10_0 |
129
- ARMMMUIdxBit_Stage2);
130
+ ARMMMUIdxBit_E10_0);
28
}
131
}
29
132
30
+static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id)
133
static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
31
+{
134
@@ -XXX,XX +XXX,XX @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
32
+ /* Return true if D16-D31 are implemented */
135
tlb_flush_by_mmuidx_all_cpus_synced(cs,
33
+ return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2;
136
ARMMMUIdxBit_E10_1 |
34
+}
137
ARMMMUIdxBit_E10_1_PAN |
35
+
138
- ARMMMUIdxBit_E10_0 |
36
/*
139
- ARMMMUIdxBit_Stage2);
37
* We always set the FP and SIMD FP16 fields to indicate identical
140
+ ARMMMUIdxBit_E10_0);
38
* levels of support (assuming SIMD is implemented at all), so
39
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-vfp.inc.c
42
+++ b/target/arm/translate-vfp.inc.c
43
@@ -XXX,XX +XXX,XX @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
44
45
return true;
46
}
141
}
47
+
142
48
+/*
143
-static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
49
+ * The most usual kind of VFP access check, for everything except
144
- uint64_t value)
50
+ * FMXR/FMRX to the always-available special registers.
145
-{
51
+ */
146
- /* Invalidate by IPA. This has to invalidate any structures that
52
+static bool vfp_access_check(DisasContext *s)
147
- * contain only stage 2 translation information, but does not need
53
+{
148
- * to apply to structures that contain combined stage 1 and stage 2
54
+ return full_vfp_access_check(s, false);
149
- * translation information.
55
+}
150
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
56
diff --git a/target/arm/translate.c b/target/arm/translate.c
151
- */
57
index XXXXXXX..XXXXXXX 100644
152
- CPUState *cs = env_cpu(env);
58
--- a/target/arm/translate.c
153
- uint64_t pageaddr;
59
+++ b/target/arm/translate.c
154
-
60
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
155
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
61
tcg_temp_free_i32(tmp);
156
- return;
157
- }
158
-
159
- pageaddr = sextract64(value << 12, 0, 40);
160
-
161
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
162
-}
163
-
164
-static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
165
- uint64_t value)
166
-{
167
- CPUState *cs = env_cpu(env);
168
- uint64_t pageaddr;
169
-
170
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
171
- return;
172
- }
173
-
174
- pageaddr = sextract64(value << 12, 0, 40);
175
-
176
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
177
- ARMMMUIdxBit_Stage2);
178
-}
179
180
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
181
uint64_t value)
182
@@ -XXX,XX +XXX,XX @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
183
tlb_flush_by_mmuidx(cs,
184
ARMMMUIdxBit_E10_1 |
185
ARMMMUIdxBit_E10_1_PAN |
186
- ARMMMUIdxBit_E10_0 |
187
- ARMMMUIdxBit_Stage2);
188
+ ARMMMUIdxBit_E10_0);
189
raw_write(env, ri, value);
190
}
62
}
191
}
63
192
@@ -XXX,XX +XXX,XX @@ static int alle1_tlbmask(CPUARMState *env)
64
-static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
193
return ARMMMUIdxBit_SE10_1 |
65
- uint32_t dp)
194
ARMMMUIdxBit_SE10_1_PAN |
66
+static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
195
ARMMMUIdxBit_SE10_0;
196
- } else if (arm_feature(env, ARM_FEATURE_EL2)) {
197
- return ARMMMUIdxBit_E10_1 |
198
- ARMMMUIdxBit_E10_1_PAN |
199
- ARMMMUIdxBit_E10_0 |
200
- ARMMMUIdxBit_Stage2;
201
} else {
202
return ARMMMUIdxBit_E10_1 |
203
ARMMMUIdxBit_E10_1_PAN |
204
@@ -XXX,XX +XXX,XX @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
205
ARMMMUIdxBit_SE3);
206
}
207
208
-static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
209
- uint64_t value)
210
-{
211
- /* Invalidate by IPA. This has to invalidate any structures that
212
- * contain only stage 2 translation information, but does not need
213
- * to apply to structures that contain combined stage 1 and stage 2
214
- * translation information.
215
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
216
- */
217
- ARMCPU *cpu = env_archcpu(env);
218
- CPUState *cs = CPU(cpu);
219
- uint64_t pageaddr;
220
-
221
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
222
- return;
223
- }
224
-
225
- pageaddr = sextract64(value << 12, 0, 48);
226
-
227
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
228
-}
229
-
230
-static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
231
- uint64_t value)
232
-{
233
- CPUState *cs = env_cpu(env);
234
- uint64_t pageaddr;
235
-
236
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
237
- return;
238
- }
239
-
240
- pageaddr = sextract64(value << 12, 0, 48);
241
-
242
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
243
- ARMMMUIdxBit_Stage2);
244
-}
245
-
246
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
247
bool isread)
67
{
248
{
68
- uint32_t cc = extract32(insn, 20, 2);
249
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
69
+ uint32_t rd, rn, rm;
250
.writefn = tlbi_aa64_vae1_write },
70
+ bool dp = a->dp;
251
{ .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
71
+
252
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
72
+ if (!dc_isar_feature(aa32_vsel, s)) {
253
- .access = PL2_W, .type = ARM_CP_NO_RAW,
73
+ return false;
254
- .writefn = tlbi_aa64_ipas2e1is_write },
74
+ }
255
+ .access = PL2_W, .type = ARM_CP_NOP },
75
+
256
{ .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
76
+ /* UNDEF accesses to D16-D31 if they don't exist */
257
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
77
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
258
- .access = PL2_W, .type = ARM_CP_NO_RAW,
78
+ ((a->vm | a->vn | a->vd) & 0x10)) {
259
- .writefn = tlbi_aa64_ipas2e1is_write },
79
+ return false;
260
+ .access = PL2_W, .type = ARM_CP_NOP },
80
+ }
261
{ .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
81
+ rd = a->vd;
262
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
82
+ rn = a->vn;
263
.access = PL2_W, .type = ARM_CP_NO_RAW,
83
+ rm = a->vm;
264
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
84
+
265
.writefn = tlbi_aa64_alle1is_write },
85
+ if (!vfp_access_check(s)) {
266
{ .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
86
+ return true;
267
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
87
+ }
268
- .access = PL2_W, .type = ARM_CP_NO_RAW,
88
269
- .writefn = tlbi_aa64_ipas2e1_write },
89
if (dp) {
270
+ .access = PL2_W, .type = ARM_CP_NOP },
90
TCGv_i64 frn, frm, dest;
271
{ .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
91
@@ -XXX,XX +XXX,XX @@ static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
272
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
92
273
- .access = PL2_W, .type = ARM_CP_NO_RAW,
93
tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
274
- .writefn = tlbi_aa64_ipas2e1_write },
94
tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
275
+ .access = PL2_W, .type = ARM_CP_NOP },
95
- switch (cc) {
276
{ .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
96
+ switch (a->cc) {
277
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
97
case 0: /* eq: Z */
278
.access = PL2_W, .type = ARM_CP_NO_RAW,
98
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
279
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
99
frn, frm);
280
.writefn = tlbimva_hyp_is_write },
100
@@ -XXX,XX +XXX,XX @@ static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
281
{ .name = "TLBIIPAS2",
101
dest = tcg_temp_new_i32();
282
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
102
tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
283
- .type = ARM_CP_NO_RAW, .access = PL2_W,
103
tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
284
- .writefn = tlbiipas2_write },
104
- switch (cc) {
285
+ .type = ARM_CP_NOP, .access = PL2_W },
105
+ switch (a->cc) {
286
{ .name = "TLBIIPAS2IS",
106
case 0: /* eq: Z */
287
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
107
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
288
- .type = ARM_CP_NO_RAW, .access = PL2_W,
108
frn, frm);
289
- .writefn = tlbiipas2_is_write },
109
@@ -XXX,XX +XXX,XX @@ static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
290
+ .type = ARM_CP_NOP, .access = PL2_W },
110
tcg_temp_free_i32(zero);
291
{ .name = "TLBIIPAS2L",
111
}
292
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
112
293
- .type = ARM_CP_NO_RAW, .access = PL2_W,
113
- return 0;
294
- .writefn = tlbiipas2_write },
114
+ return true;
295
+ .type = ARM_CP_NOP, .access = PL2_W },
115
}
296
{ .name = "TLBIIPAS2LIS",
116
297
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
117
static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
298
- .type = ARM_CP_NO_RAW, .access = PL2_W,
118
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
299
- .writefn = tlbiipas2_is_write },
119
rm = VFP_SREG_M(insn);
300
+ .type = ARM_CP_NOP, .access = PL2_W },
120
}
301
/* 32 bit cache operations */
121
302
{ .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
122
- if ((insn & 0x0f800e50) == 0x0e000a00 && dc_isar_feature(aa32_vsel, s)) {
303
.type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access },
123
- return handle_vsel(insn, rd, rn, rm, dp);
124
- } else if ((insn & 0x0fb00e10) == 0x0e800a00 &&
125
- dc_isar_feature(aa32_vminmaxnm, s)) {
126
+ if ((insn & 0x0fb00e10) == 0x0e800a00 &&
127
+ dc_isar_feature(aa32_vminmaxnm, s)) {
128
return handle_vminmaxnm(insn, rd, rn, rm, dp);
129
} else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
130
dc_isar_feature(aa32_vrint, s)) {
131
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
132
index XXXXXXX..XXXXXXX 100644
133
--- a/target/arm/vfp-uncond.decode
134
+++ b/target/arm/vfp-uncond.decode
135
@@ -XXX,XX +XXX,XX @@
136
# 1111 1110 .... .... .... 101. .... ....
137
# (but those patterns might also cover some Neon instructions,
138
# which do not live in this file.)
139
+
140
+# VFP registers have an odd encoding with a four-bit field
141
+# and a one-bit field which are assembled in different orders
142
+# depending on whether the register is double or single precision.
143
+# Each individual instruction function must do the checks for
144
+# "double register selected but CPU does not have double support"
145
+# and "double register number has bit 4 set but CPU does not
146
+# support D16-D31" (which should UNDEF).
147
+%vm_dp 5:1 0:4
148
+%vm_sp 0:4 5:1
149
+%vn_dp 7:1 16:4
150
+%vn_sp 16:4 7:1
151
+%vd_dp 22:1 12:4
152
+%vd_sp 12:4 22:1
153
+
154
+VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
155
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
156
+VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
157
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
158
--
304
--
159
2.20.1
305
2.20.1
160
306
161
307
diff view generated by jsdifflib
1
The NSACR register allows secure code to configure the FPU
1
The access_type argument to get_phys_addr_lpae() is an MMUAccessType;
2
to be inaccessible to non-secure code. If the NSACR.CP10
2
use the enum constant MMU_DATA_LOAD rather than a literal 0 when we
3
bit is set then:
3
call it in S1_ptw_translate().
4
* NS accesses to the FPU trap as UNDEF (ie to NS EL1 or EL2)
5
* CPACR.{CP10,CP11} behave as if RAZ/WI
6
* HCPTR.{TCP11,TCP10} behave as if RAO/WI
7
4
8
Note that we do not implement the NSACR.NSASEDIS bit which
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
gates only access to Advanced SIMD, in the same way that
6
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
10
we don't implement the equivalent CPACR.ASEDIS and HCPTR.TASE.
11
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Message-id: 20200330210400.11724-3-peter.maydell@linaro.org
14
Message-id: 20190510110357.18825-1-peter.maydell@linaro.org
15
---
9
---
16
target/arm/helper.c | 75 +++++++++++++++++++++++++++++++++++++++++++--
10
target/arm/helper.c | 5 +++--
17
1 file changed, 73 insertions(+), 2 deletions(-)
11
1 file changed, 3 insertions(+), 2 deletions(-)
18
12
19
diff --git a/target/arm/helper.c b/target/arm/helper.c
13
diff --git a/target/arm/helper.c b/target/arm/helper.c
20
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/helper.c
15
--- a/target/arm/helper.c
22
+++ b/target/arm/helper.c
16
+++ b/target/arm/helper.c
23
@@ -XXX,XX +XXX,XX @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
17
@@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
18
pcacheattrs = &cacheattrs;
24
}
19
}
25
value &= mask;
20
26
}
21
- ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa,
27
+
22
- &txattrs, &s2prot, &s2size, fi, pcacheattrs);
28
+ /*
23
+ ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2,
29
+ * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
24
+ &s2pa, &txattrs, &s2prot, &s2size, fi,
30
+ * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
25
+ pcacheattrs);
31
+ */
26
if (ret) {
32
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
27
assert(fi->type != ARMFault_None);
33
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
28
fi->s2addr = addr;
34
+ value &= ~(0xf << 20);
35
+ value |= env->cp15.cpacr_el1 & (0xf << 20);
36
+ }
37
+
38
env->cp15.cpacr_el1 = value;
39
}
40
41
+static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri)
42
+{
43
+ /*
44
+ * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
45
+ * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
46
+ */
47
+ uint64_t value = env->cp15.cpacr_el1;
48
+
49
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
50
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
51
+ value &= ~(0xf << 20);
52
+ }
53
+ return value;
54
+}
55
+
56
+
57
static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
58
{
59
/* Call cpacr_write() so that we reset with the correct RAO bits set
60
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
61
{ .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
62
.crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
63
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
64
- .resetfn = cpacr_reset, .writefn = cpacr_write },
65
+ .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
66
REGINFO_SENTINEL
67
};
68
69
@@ -XXX,XX +XXX,XX @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
70
return ret;
71
}
72
73
+static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
74
+ uint64_t value)
75
+{
76
+ /*
77
+ * For A-profile AArch32 EL3, if NSACR.CP10
78
+ * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
79
+ */
80
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
81
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
82
+ value &= ~(0x3 << 10);
83
+ value |= env->cp15.cptr_el[2] & (0x3 << 10);
84
+ }
85
+ env->cp15.cptr_el[2] = value;
86
+}
87
+
88
+static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri)
89
+{
90
+ /*
91
+ * For A-profile AArch32 EL3, if NSACR.CP10
92
+ * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
93
+ */
94
+ uint64_t value = env->cp15.cptr_el[2];
95
+
96
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
97
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
98
+ value |= 0x3 << 10;
99
+ }
100
+ return value;
101
+}
102
+
103
static const ARMCPRegInfo el2_cp_reginfo[] = {
104
{ .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
105
.type = ARM_CP_IO,
106
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
107
{ .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
108
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
109
.access = PL2_RW, .accessfn = cptr_access, .resetvalue = 0,
110
- .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]) },
111
+ .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]),
112
+ .readfn = cptr_el2_read, .writefn = cptr_el2_write },
113
{ .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH,
114
.opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0,
115
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
116
@@ -XXX,XX +XXX,XX @@ int fp_exception_el(CPUARMState *env, int cur_el)
117
break;
118
}
119
120
+ /*
121
+ * The NSACR allows A-profile AArch32 EL3 and M-profile secure mode
122
+ * to control non-secure access to the FPU. It doesn't have any
123
+ * effect if EL3 is AArch64 or if EL3 doesn't exist at all.
124
+ */
125
+ if ((arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
126
+ cur_el <= 2 && !arm_is_secure_below_el3(env))) {
127
+ if (!extract32(env->cp15.nsacr, 10, 1)) {
128
+ /* FP insns act as UNDEF */
129
+ return cur_el == 2 ? 2 : 1;
130
+ }
131
+ }
132
+
133
/* For the CPTR registers we don't need to guard with an ARM_FEATURE
134
* check because zero bits in the registers mean "don't trap".
135
*/
136
--
29
--
137
2.20.1
30
2.20.1
138
31
139
32
diff view generated by jsdifflib
1
Convert the VCVTA/VCVTN/VCVTP/VCVTM instructions to decodetree.
1
For ARMv8.2-TTS2UXN, the stage 2 page table walk wants to know
2
trans_VCVT() is temporarily left in translate.c.
2
whether the stage 1 access is for EL0 or not, because whether
3
exec permission is given can depend on whether this is an EL0
4
or EL1 access. Add a new argument to get_phys_addr_lpae() so
5
the call sites can pass this information in.
6
7
Since get_phys_addr_lpae() doesn't already have a doc comment,
8
add one so we have a place to put the documentation of the
9
semantics of the new s1_is_el0 argument.
3
10
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20200330210400.11724-4-peter.maydell@linaro.org
6
---
15
---
7
target/arm/translate.c | 72 +++++++++++++++++-------------------
16
target/arm/helper.c | 29 ++++++++++++++++++++++++++++-
8
target/arm/vfp-uncond.decode | 6 +++
17
1 file changed, 28 insertions(+), 1 deletion(-)
9
2 files changed, 39 insertions(+), 39 deletions(-)
10
18
11
diff --git a/target/arm/translate.c b/target/arm/translate.c
19
diff --git a/target/arm/helper.c b/target/arm/helper.c
12
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate.c
21
--- a/target/arm/helper.c
14
+++ b/target/arm/translate.c
22
+++ b/target/arm/helper.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
23
@@ -XXX,XX +XXX,XX @@
16
return true;
24
25
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
26
MMUAccessType access_type, ARMMMUIdx mmu_idx,
27
+ bool s1_is_el0,
28
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
29
target_ulong *page_size_ptr,
30
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
31
@@ -XXX,XX +XXX,XX @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
32
}
33
34
ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2,
35
+ false,
36
&s2pa, &txattrs, &s2prot, &s2size, fi,
37
pcacheattrs);
38
if (ret) {
39
@@ -XXX,XX +XXX,XX @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
40
};
17
}
41
}
18
42
19
-static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
43
+/**
20
- int rounding)
44
+ * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
21
+static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
45
+ *
22
{
46
+ * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
23
- bool is_signed = extract32(insn, 7, 1);
47
+ * prot and page_size may not be filled in, and the populated fsr value provides
24
- TCGv_ptr fpst = get_fpstatus_ptr(0);
48
+ * information on why the translation aborted, in the format of a long-format
25
+ uint32_t rd, rm;
49
+ * DFSR/IFSR fault register, with the following caveats:
26
+ bool dp = a->dp;
50
+ * * the WnR bit is never set (the caller must do this).
27
+ TCGv_ptr fpst;
51
+ *
28
TCGv_i32 tcg_rmode, tcg_shift;
52
+ * @env: CPUARMState
29
+ int rounding = fp_decode_rm[a->rm];
53
+ * @address: virtual address to get physical address for
30
+ bool is_signed = a->op;
54
+ * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
31
+
55
+ * @mmu_idx: MMU index indicating required translation regime
32
+ if (!dc_isar_feature(aa32_vcvt_dr, s)) {
56
+ * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table
33
+ return false;
57
+ * walk), must be true if this is stage 2 of a stage 1+2 walk for an
34
+ }
58
+ * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored.
35
+
59
+ * @phys_ptr: set to the physical address corresponding to the virtual address
36
+ /* UNDEF accesses to D16-D31 if they don't exist */
60
+ * @attrs: set to the memory transaction attributes to use
37
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
61
+ * @prot: set to the permissions for the page containing phys_ptr
38
+ return false;
62
+ * @page_size_ptr: set to the size of the page containing phys_ptr
39
+ }
63
+ * @fi: set to fault info if the translation fails
40
+ rd = a->vd;
64
+ * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
41
+ rm = a->vm;
65
+ */
42
+
66
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
43
+ if (!vfp_access_check(s)) {
67
MMUAccessType access_type, ARMMMUIdx mmu_idx,
44
+ return true;
68
+ bool s1_is_el0,
45
+ }
69
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
46
+
70
target_ulong *page_size_ptr,
47
+ fpst = get_fpstatus_ptr(0);
71
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
48
72
@@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
49
tcg_shift = tcg_const_i32(0);
73
50
74
/* S1 is done. Now do S2 translation. */
51
@@ -XXX,XX +XXX,XX @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
75
ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2,
52
if (dp) {
76
+ mmu_idx == ARMMMUIdx_E10_0,
53
TCGv_i64 tcg_double, tcg_res;
77
phys_ptr, attrs, &s2_prot,
54
TCGv_i32 tcg_tmp;
78
page_size, fi,
55
- /* Rd is encoded as a single precision register even when the source
79
cacheattrs != NULL ? &cacheattrs2 : NULL);
56
- * is double precision.
80
@@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
57
- */
58
- rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
59
tcg_double = tcg_temp_new_i64();
60
tcg_res = tcg_temp_new_i64();
61
tcg_tmp = tcg_temp_new_i32();
62
@@ -XXX,XX +XXX,XX @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
63
64
tcg_temp_free_ptr(fpst);
65
66
- return 0;
67
-}
68
-
69
-static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
70
-{
71
- uint32_t rd, rm, dp = extract32(insn, 8, 1);
72
-
73
- if (dp) {
74
- VFP_DREG_D(rd, insn);
75
- VFP_DREG_M(rm, insn);
76
- } else {
77
- rd = VFP_SREG_D(insn);
78
- rm = VFP_SREG_M(insn);
79
- }
80
-
81
- if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
82
- dc_isar_feature(aa32_vcvt_dr, s)) {
83
- /* VCVTA, VCVTN, VCVTP, VCVTM */
84
- int rounding = fp_decode_rm[extract32(insn, 16, 2)];
85
- return handle_vcvt(insn, rd, rm, dp, rounding);
86
- }
87
- return 1;
88
+ return true;
89
}
90
91
/*
92
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
93
}
94
}
81
}
95
82
96
+ if (extract32(insn, 28, 4) == 0xf) {
83
if (regime_using_lpae_format(env, mmu_idx)) {
97
+ /*
84
- return get_phys_addr_lpae(env, address, access_type, mmu_idx,
98
+ * Encodings with T=1 (Thumb) or unconditional (ARM): these
85
+ return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
99
+ * were all handled by the decodetree decoder, so any insn
86
phys_ptr, attrs, prot, page_size,
100
+ * patterns which get here must be UNDEF.
87
fi, cacheattrs);
101
+ */
88
} else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
102
+ return 1;
103
+ }
104
+
105
/*
106
* FIXME: this access check should not take precedence over UNDEF
107
* for invalid encodings; we will generate incorrect syndrome information
108
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
109
return 0;
110
}
111
112
- if (extract32(insn, 28, 4) == 0xf) {
113
- /*
114
- * Encodings with T=1 (Thumb) or unconditional (ARM):
115
- * only used for the "miscellaneous VFP features" added in v8A
116
- * and v7M (and gated on the MVFR2.FPMisc field).
117
- */
118
- return disas_vfp_misc_insn(s, insn);
119
- }
120
-
121
dp = ((insn & 0xf00) == 0xb00);
122
switch ((insn >> 24) & 0xf) {
123
case 0xe:
124
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/arm/vfp-uncond.decode
127
+++ b/target/arm/vfp-uncond.decode
128
@@ -XXX,XX +XXX,XX @@ VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
129
vm=%vm_sp vd=%vd_sp dp=0
130
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
131
vm=%vm_dp vd=%vd_dp dp=1
132
+
133
+# VCVT float to int with specified rounding mode; Vd is always single-precision
134
+VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \
135
+ vm=%vm_sp vd=%vd_sp dp=0
136
+VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \
137
+ vm=%vm_dp vd=%vd_sp dp=1
138
--
89
--
139
2.20.1
90
2.20.1
140
91
141
92
diff view generated by jsdifflib
1
Convert the VFP VMLA instruction to decodetree.
1
The ARMv8.2-TTS2UXN feature extends the XN field in stage 2
2
2
translation table descriptors from just bit [54] to bits [54:53],
3
This is the first of the VFP 3-operand data processing instructions,
3
allowing stage 2 to control execution permissions separately for EL0
4
so we include in this patch the code which loops over the elements
4
and EL1. Implement the new semantics of the XN field and enable
5
for an old-style VFP vector operation. The existing code to do this
5
the feature for our 'max' CPU.
6
looping uses the deprecated cpu_F0s/F0d/F1s/F1d TCG globals; since
7
we are going to be converting instructions one at a time anyway
8
we can take the opportunity to make the new loop use TCG temporaries,
9
which means we can do that conversion one operation at a time
10
rather than needing to do it all in one go.
11
12
We include an UNDEF check which was missing in the old code:
13
short-vector operations (with stride or length non-zero) were
14
deprecated in v7A and must UNDEF in v8A, so if the MVFR0 FPShVec
15
field does not indicate that support for short vectors is present
16
we UNDEF the operations that would use them. (This is a change
17
of behaviour for Cortex-A7, Cortex-A15 and the v8 CPUs, which
18
previously were all incorrectly allowing short-vector operations.)
19
20
Note that the conversion fixes a bug in the old code for the
21
case of VFP short-vector "mixed scalar/vector operations". These
22
happen where the destination register is in a vector bank but
23
but the second operand is in a scalar bank. For example
24
vmla.f64 d10, d1, d16 with length 2 stride 2
25
is equivalent to the pair of scalar operations
26
vmla.f64 d10, d1, d16
27
vmla.f64 d8, d3, d16
28
where the destination and first input register cycle through
29
their vector but the second input is scalar (d16). In the
30
old decoder the gen_vfp_F1_mul() operation uses cpu_F1{s,d}
31
as a temporary output for the multiply, which trashes the
32
second input operand. For the fully-scalar case (where we
33
never do a second iteration) and the fully-vector case
34
(where the loop loads the new second input operand) this
35
doesn't matter, but for the mixed scalar/vector case we
36
will end up using the wrong value for later loop iterations.
37
In the new code we use TCG temporaries and so avoid the bug.
38
This bug is present for all the multiply-accumulate insns
39
that operate on short vectors: VMLA, VMLS, VNMLA, VNMLS.
40
41
Note 2: the expression used to calculate the next register
42
number in the vector bank is not in fact correct; we leave
43
this behaviour unchanged from the old decoder and will
44
fix this bug later in the series.
45
6
46
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
47
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20200330210400.11724-5-peter.maydell@linaro.org
48
---
11
---
49
target/arm/cpu.h | 5 +
12
target/arm/cpu.h | 15 +++++++++++++++
50
target/arm/translate-vfp.inc.c | 205 +++++++++++++++++++++++++++++++++
13
target/arm/cpu.c | 1 +
51
target/arm/translate.c | 14 ++-
14
target/arm/cpu64.c | 2 ++
52
target/arm/vfp.decode | 6 +
15
target/arm/helper.c | 37 +++++++++++++++++++++++++++++++------
53
4 files changed, 224 insertions(+), 6 deletions(-)
16
4 files changed, 49 insertions(+), 6 deletions(-)
54
17
55
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
18
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
56
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/cpu.h
20
--- a/target/arm/cpu.h
58
+++ b/target/arm/cpu.h
21
+++ b/target/arm/cpu.h
59
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id)
22
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
60
return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2;
23
return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
61
}
24
}
62
25
63
+static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id)
26
+static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
64
+{
27
+{
65
+ return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0;
28
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
66
+}
29
+}
67
+
30
+
68
/*
31
/*
69
* We always set the FP and SIMD FP16 fields to indicate identical
32
* 64-bit feature tests via id registers.
70
* levels of support (assuming SIMD is implemented at all), so
33
*/
71
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
34
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
72
index XXXXXXX..XXXXXXX 100644
35
return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
73
--- a/target/arm/translate-vfp.inc.c
74
+++ b/target/arm/translate-vfp.inc.c
75
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
76
77
return true;
78
}
36
}
79
+
37
80
+/*
38
+static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
81
+ * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
82
+ * The callback should emit code to write a value to vd. If
83
+ * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
84
+ * will contain the old value of the relevant VFP register;
85
+ * otherwise it must be written to only.
86
+ */
87
+typedef void VFPGen3OpSPFn(TCGv_i32 vd,
88
+ TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
89
+typedef void VFPGen3OpDPFn(TCGv_i64 vd,
90
+ TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
91
+
92
+/*
93
+ * Perform a 3-operand VFP data processing instruction. fn is the
94
+ * callback to do the actual operation; this function deals with the
95
+ * code to handle looping around for VFP vector processing.
96
+ */
97
+static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
98
+ int vd, int vn, int vm, bool reads_vd)
99
+{
39
+{
100
+ uint32_t delta_m = 0;
40
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
101
+ uint32_t delta_d = 0;
102
+ uint32_t bank_mask = 0;
103
+ int veclen = s->vec_len;
104
+ TCGv_i32 f0, f1, fd;
105
+ TCGv_ptr fpst;
106
+
107
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
108
+ (veclen != 0 || s->vec_stride != 0)) {
109
+ return false;
110
+ }
111
+
112
+ if (!vfp_access_check(s)) {
113
+ return true;
114
+ }
115
+
116
+ if (veclen > 0) {
117
+ bank_mask = 0x18;
118
+
119
+ /* Figure out what type of vector operation this is. */
120
+ if ((vd & bank_mask) == 0) {
121
+ /* scalar */
122
+ veclen = 0;
123
+ } else {
124
+ delta_d = s->vec_stride + 1;
125
+
126
+ if ((vm & bank_mask) == 0) {
127
+ /* mixed scalar/vector */
128
+ delta_m = 0;
129
+ } else {
130
+ /* vector */
131
+ delta_m = delta_d;
132
+ }
133
+ }
134
+ }
135
+
136
+ f0 = tcg_temp_new_i32();
137
+ f1 = tcg_temp_new_i32();
138
+ fd = tcg_temp_new_i32();
139
+ fpst = get_fpstatus_ptr(0);
140
+
141
+ neon_load_reg32(f0, vn);
142
+ neon_load_reg32(f1, vm);
143
+
144
+ for (;;) {
145
+ if (reads_vd) {
146
+ neon_load_reg32(fd, vd);
147
+ }
148
+ fn(fd, f0, f1, fpst);
149
+ neon_store_reg32(fd, vd);
150
+
151
+ if (veclen == 0) {
152
+ break;
153
+ }
154
+
155
+ /* Set up the operands for the next iteration */
156
+ veclen--;
157
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
158
+ vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
159
+ neon_load_reg32(f0, vn);
160
+ if (delta_m) {
161
+ vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
162
+ neon_load_reg32(f1, vm);
163
+ }
164
+ }
165
+
166
+ tcg_temp_free_i32(f0);
167
+ tcg_temp_free_i32(f1);
168
+ tcg_temp_free_i32(fd);
169
+ tcg_temp_free_ptr(fpst);
170
+
171
+ return true;
172
+}
41
+}
173
+
42
+
174
+static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
43
/*
175
+ int vd, int vn, int vm, bool reads_vd)
44
* Feature tests for "does this exist in either 32-bit or 64-bit?"
45
*/
46
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id)
47
return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id);
48
}
49
50
+static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id)
176
+{
51
+{
177
+ uint32_t delta_m = 0;
52
+ return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id);
178
+ uint32_t delta_d = 0;
179
+ uint32_t bank_mask = 0;
180
+ int veclen = s->vec_len;
181
+ TCGv_i64 f0, f1, fd;
182
+ TCGv_ptr fpst;
183
+
184
+ /* UNDEF accesses to D16-D31 if they don't exist */
185
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vn | vm) & 0x10)) {
186
+ return false;
187
+ }
188
+
189
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
190
+ (veclen != 0 || s->vec_stride != 0)) {
191
+ return false;
192
+ }
193
+
194
+ if (!vfp_access_check(s)) {
195
+ return true;
196
+ }
197
+
198
+ if (veclen > 0) {
199
+ bank_mask = 0xc;
200
+
201
+ /* Figure out what type of vector operation this is. */
202
+ if ((vd & bank_mask) == 0) {
203
+ /* scalar */
204
+ veclen = 0;
205
+ } else {
206
+ delta_d = (s->vec_stride >> 1) + 1;
207
+
208
+ if ((vm & bank_mask) == 0) {
209
+ /* mixed scalar/vector */
210
+ delta_m = 0;
211
+ } else {
212
+ /* vector */
213
+ delta_m = delta_d;
214
+ }
215
+ }
216
+ }
217
+
218
+ f0 = tcg_temp_new_i64();
219
+ f1 = tcg_temp_new_i64();
220
+ fd = tcg_temp_new_i64();
221
+ fpst = get_fpstatus_ptr(0);
222
+
223
+ neon_load_reg64(f0, vn);
224
+ neon_load_reg64(f1, vm);
225
+
226
+ for (;;) {
227
+ if (reads_vd) {
228
+ neon_load_reg64(fd, vd);
229
+ }
230
+ fn(fd, f0, f1, fpst);
231
+ neon_store_reg64(fd, vd);
232
+
233
+ if (veclen == 0) {
234
+ break;
235
+ }
236
+ /* Set up the operands for the next iteration */
237
+ veclen--;
238
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
239
+ vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
240
+ neon_load_reg64(f0, vn);
241
+ if (delta_m) {
242
+ vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
243
+ neon_load_reg64(f1, vm);
244
+ }
245
+ }
246
+
247
+ tcg_temp_free_i64(f0);
248
+ tcg_temp_free_i64(f1);
249
+ tcg_temp_free_i64(fd);
250
+ tcg_temp_free_ptr(fpst);
251
+
252
+ return true;
253
+}
53
+}
254
+
54
+
255
+static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
55
/*
256
+{
56
* Forward to the above feature tests given an ARMCPU pointer.
257
+ /* Note that order of inputs to the add matters for NaNs */
57
*/
258
+ TCGv_i32 tmp = tcg_temp_new_i32();
58
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/cpu.c
61
+++ b/target/arm/cpu.c
62
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
63
t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
64
t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
65
t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
66
+ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
67
cpu->isar.id_mmfr4 = t;
68
}
69
#endif
70
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/cpu64.c
73
+++ b/target/arm/cpu64.c
74
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
75
t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1);
76
t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */
77
t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* VMID16 */
78
+ t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* TTS2UXN */
79
cpu->isar.id_aa64mmfr1 = t;
80
81
t = cpu->isar.id_aa64mmfr2;
82
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
83
u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */
84
u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
85
u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */
86
+ u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */
87
cpu->isar.id_mmfr4 = u;
88
89
u = cpu->isar.id_aa64dfr0;
90
diff --git a/target/arm/helper.c b/target/arm/helper.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/helper.c
93
+++ b/target/arm/helper.c
94
@@ -XXX,XX +XXX,XX @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
95
*
96
* @env: CPUARMState
97
* @s2ap: The 2-bit stage2 access permissions (S2AP)
98
- * @xn: XN (execute-never) bit
99
+ * @xn: XN (execute-never) bits
100
+ * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
101
*/
102
-static int get_S2prot(CPUARMState *env, int s2ap, int xn)
103
+static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
104
{
105
int prot = 0;
106
107
@@ -XXX,XX +XXX,XX @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn)
108
if (s2ap & 2) {
109
prot |= PAGE_WRITE;
110
}
111
- if (!xn) {
112
- if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
259
+
113
+
260
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
114
+ if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
261
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
115
+ switch (xn) {
262
+ tcg_temp_free_i32(tmp);
116
+ case 0:
263
+}
117
prot |= PAGE_EXEC;
264
+
118
+ break;
265
+static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
119
+ case 1:
266
+{
120
+ if (s1_is_el0) {
267
+ return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
121
+ prot |= PAGE_EXEC;
268
+}
269
+
270
+static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
271
+{
272
+ /* Note that order of inputs to the add matters for NaNs */
273
+ TCGv_i64 tmp = tcg_temp_new_i64();
274
+
275
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
276
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
277
+ tcg_temp_free_i64(tmp);
278
+}
279
+
280
+static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_sp *a)
281
+{
282
+ return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
283
+}
284
diff --git a/target/arm/translate.c b/target/arm/translate.c
285
index XXXXXXX..XXXXXXX 100644
286
--- a/target/arm/translate.c
287
+++ b/target/arm/translate.c
288
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
289
op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
290
rn = VFP_SREG_N(insn);
291
292
+ switch (op) {
293
+ case 0:
294
+ /* Already handled by decodetree */
295
+ return 1;
296
+ default:
297
+ break;
298
+ }
122
+ }
299
+
123
+ break;
300
if (op == 15) {
124
+ case 2:
301
/* rn is opcode, encoded as per VFP_SREG_N. */
125
+ break;
302
switch (rn) {
126
+ case 3:
303
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
127
+ if (!s1_is_el0) {
304
for (;;) {
128
+ prot |= PAGE_EXEC;
305
/* Perform the calculation. */
129
+ }
306
switch (op) {
130
+ break;
307
- case 0: /* VMLA: fd + (fn * fm) */
131
+ default:
308
- /* Note that order of inputs to the add matters for NaNs */
132
+ g_assert_not_reached();
309
- gen_vfp_F1_mul(dp);
133
+ }
310
- gen_mov_F0_vreg(dp, rd);
134
+ } else {
311
- gen_vfp_add(dp);
135
+ if (!extract32(xn, 1, 1)) {
312
- break;
136
+ if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
313
case 1: /* VMLS: fd + -(fn * fm) */
137
+ prot |= PAGE_EXEC;
314
gen_vfp_mul(dp);
138
+ }
315
gen_vfp_F1_neg(dp);
139
}
316
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
140
}
317
index XXXXXXX..XXXXXXX 100644
141
return prot;
318
--- a/target/arm/vfp.decode
142
@@ -XXX,XX +XXX,XX @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
319
+++ b/target/arm/vfp.decode
143
}
320
@@ -XXX,XX +XXX,XX @@ VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
144
321
vd=%vd_sp p=1 u=0 w=1
145
ap = extract32(attrs, 4, 2);
322
VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
146
- xn = extract32(attrs, 12, 1);
323
vd=%vd_dp p=1 u=0 w=1
147
324
+
148
if (mmu_idx == ARMMMUIdx_Stage2) {
325
+# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
149
ns = true;
326
+VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... \
150
- *prot = get_S2prot(env, ap, xn);
327
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
151
+ xn = extract32(attrs, 11, 2);
328
+VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... \
152
+ *prot = get_S2prot(env, ap, xn, s1_is_el0);
329
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
153
} else {
154
ns = extract32(attrs, 3, 1);
155
+ xn = extract32(attrs, 12, 1);
156
pxn = extract32(attrs, 11, 1);
157
*prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
158
}
330
--
159
--
331
2.20.1
160
2.20.1
332
161
333
162
diff view generated by jsdifflib
1
Convert the VFP round-to-integer instructions VRINTR, VRINTZ and
1
In aarch64_max_initfn() we update both 32-bit and 64-bit ID
2
VRINTX to decodetree.
2
registers. The intended pattern is that for 64-bit ID registers we
3
use FIELD_DP64 and the uint64_t 't' register, while 32-bit ID
4
registers use FIELD_DP32 and the uint32_t 'u' register. For
5
ID_AA64DFR0 we accidentally used 'u', meaning that the top 32 bits of
6
this 64-bit ID register would end up always zero. Luckily at the
7
moment that's what they should be anyway, so this bug has no visible
8
effects.
3
9
4
These instructions were only introduced as part of the "VFP misc"
10
Use the right-sized variable.
5
additions in v8A, so we check this. The old decoder's implementation
6
was incorrectly providing them even for v7A CPUs.
7
11
12
Fixes: 3bec78447a958d481991
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Laurent Desnogues <laurent.desnogues@gmail.com>
15
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
16
Message-id: 20200423110915.10527-1-peter.maydell@linaro.org
10
---
17
---
11
target/arm/translate-vfp.inc.c | 163 +++++++++++++++++++++++++++++++++
18
target/arm/cpu64.c | 6 +++---
12
target/arm/translate.c | 45 +--------
19
1 file changed, 3 insertions(+), 3 deletions(-)
13
target/arm/vfp.decode | 15 +++
14
3 files changed, 179 insertions(+), 44 deletions(-)
15
20
16
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
21
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
17
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-vfp.inc.c
23
--- a/target/arm/cpu64.c
19
+++ b/target/arm/translate-vfp.inc.c
24
+++ b/target/arm/cpu64.c
20
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
21
tcg_temp_free_i32(tmp);
26
u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */
22
return true;
27
cpu->isar.id_mmfr4 = u;
23
}
28
24
+
29
- u = cpu->isar.id_aa64dfr0;
25
+static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
30
- u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
26
+{
31
- cpu->isar.id_aa64dfr0 = u;
27
+ TCGv_ptr fpst;
32
+ t = cpu->isar.id_aa64dfr0;
28
+ TCGv_i32 tmp;
33
+ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
29
+
34
+ cpu->isar.id_aa64dfr0 = t;
30
+ if (!dc_isar_feature(aa32_vrint, s)) {
35
31
+ return false;
36
u = cpu->isar.id_dfr0;
32
+ }
37
u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */
33
+
34
+ if (!vfp_access_check(s)) {
35
+ return true;
36
+ }
37
+
38
+ tmp = tcg_temp_new_i32();
39
+ neon_load_reg32(tmp, a->vm);
40
+ fpst = get_fpstatus_ptr(false);
41
+ gen_helper_rints(tmp, tmp, fpst);
42
+ neon_store_reg32(tmp, a->vd);
43
+ tcg_temp_free_ptr(fpst);
44
+ tcg_temp_free_i32(tmp);
45
+ return true;
46
+}
47
+
48
+static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_sp *a)
49
+{
50
+ TCGv_ptr fpst;
51
+ TCGv_i64 tmp;
52
+
53
+ if (!dc_isar_feature(aa32_vrint, s)) {
54
+ return false;
55
+ }
56
+
57
+ /* UNDEF accesses to D16-D31 if they don't exist. */
58
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
59
+ return false;
60
+ }
61
+
62
+ if (!vfp_access_check(s)) {
63
+ return true;
64
+ }
65
+
66
+ tmp = tcg_temp_new_i64();
67
+ neon_load_reg64(tmp, a->vm);
68
+ fpst = get_fpstatus_ptr(false);
69
+ gen_helper_rintd(tmp, tmp, fpst);
70
+ neon_store_reg64(tmp, a->vd);
71
+ tcg_temp_free_ptr(fpst);
72
+ tcg_temp_free_i64(tmp);
73
+ return true;
74
+}
75
+
76
+static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
77
+{
78
+ TCGv_ptr fpst;
79
+ TCGv_i32 tmp;
80
+ TCGv_i32 tcg_rmode;
81
+
82
+ if (!dc_isar_feature(aa32_vrint, s)) {
83
+ return false;
84
+ }
85
+
86
+ if (!vfp_access_check(s)) {
87
+ return true;
88
+ }
89
+
90
+ tmp = tcg_temp_new_i32();
91
+ neon_load_reg32(tmp, a->vm);
92
+ fpst = get_fpstatus_ptr(false);
93
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
94
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
95
+ gen_helper_rints(tmp, tmp, fpst);
96
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
97
+ neon_store_reg32(tmp, a->vd);
98
+ tcg_temp_free_ptr(fpst);
99
+ tcg_temp_free_i32(tcg_rmode);
100
+ tcg_temp_free_i32(tmp);
101
+ return true;
102
+}
103
+
104
+static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_sp *a)
105
+{
106
+ TCGv_ptr fpst;
107
+ TCGv_i64 tmp;
108
+ TCGv_i32 tcg_rmode;
109
+
110
+ if (!dc_isar_feature(aa32_vrint, s)) {
111
+ return false;
112
+ }
113
+
114
+ /* UNDEF accesses to D16-D31 if they don't exist. */
115
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
116
+ return false;
117
+ }
118
+
119
+ if (!vfp_access_check(s)) {
120
+ return true;
121
+ }
122
+
123
+ tmp = tcg_temp_new_i64();
124
+ neon_load_reg64(tmp, a->vm);
125
+ fpst = get_fpstatus_ptr(false);
126
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
127
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
128
+ gen_helper_rintd(tmp, tmp, fpst);
129
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
130
+ neon_store_reg64(tmp, a->vd);
131
+ tcg_temp_free_ptr(fpst);
132
+ tcg_temp_free_i64(tmp);
133
+ tcg_temp_free_i32(tcg_rmode);
134
+ return true;
135
+}
136
+
137
+static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
138
+{
139
+ TCGv_ptr fpst;
140
+ TCGv_i32 tmp;
141
+
142
+ if (!dc_isar_feature(aa32_vrint, s)) {
143
+ return false;
144
+ }
145
+
146
+ if (!vfp_access_check(s)) {
147
+ return true;
148
+ }
149
+
150
+ tmp = tcg_temp_new_i32();
151
+ neon_load_reg32(tmp, a->vm);
152
+ fpst = get_fpstatus_ptr(false);
153
+ gen_helper_rints_exact(tmp, tmp, fpst);
154
+ neon_store_reg32(tmp, a->vd);
155
+ tcg_temp_free_ptr(fpst);
156
+ tcg_temp_free_i32(tmp);
157
+ return true;
158
+}
159
+
160
+static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
161
+{
162
+ TCGv_ptr fpst;
163
+ TCGv_i64 tmp;
164
+
165
+ if (!dc_isar_feature(aa32_vrint, s)) {
166
+ return false;
167
+ }
168
+
169
+ /* UNDEF accesses to D16-D31 if they don't exist. */
170
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
171
+ return false;
172
+ }
173
+
174
+ if (!vfp_access_check(s)) {
175
+ return true;
176
+ }
177
+
178
+ tmp = tcg_temp_new_i64();
179
+ neon_load_reg64(tmp, a->vm);
180
+ fpst = get_fpstatus_ptr(false);
181
+ gen_helper_rintd_exact(tmp, tmp, fpst);
182
+ neon_store_reg64(tmp, a->vd);
183
+ tcg_temp_free_ptr(fpst);
184
+ tcg_temp_free_i64(tmp);
185
+ return true;
186
+}
187
diff --git a/target/arm/translate.c b/target/arm/translate.c
188
index XXXXXXX..XXXXXXX 100644
189
--- a/target/arm/translate.c
190
+++ b/target/arm/translate.c
191
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
192
return 1;
193
case 15:
194
switch (rn) {
195
- case 0 ... 11:
196
+ case 0 ... 14:
197
/* Already handled by decodetree */
198
return 1;
199
default:
200
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
201
if (op == 15) {
202
/* rn is opcode, encoded as per VFP_SREG_N. */
203
switch (rn) {
204
- case 0x0c: /* vrintr */
205
- case 0x0d: /* vrintz */
206
- case 0x0e: /* vrintx */
207
- break;
208
-
209
case 0x0f: /* vcvt double<->single */
210
rd_is_dp = !dp;
211
break;
212
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
213
switch (op) {
214
case 15: /* extension space */
215
switch (rn) {
216
- case 12: /* vrintr */
217
- {
218
- TCGv_ptr fpst = get_fpstatus_ptr(0);
219
- if (dp) {
220
- gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
221
- } else {
222
- gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
223
- }
224
- tcg_temp_free_ptr(fpst);
225
- break;
226
- }
227
- case 13: /* vrintz */
228
- {
229
- TCGv_ptr fpst = get_fpstatus_ptr(0);
230
- TCGv_i32 tcg_rmode;
231
- tcg_rmode = tcg_const_i32(float_round_to_zero);
232
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
233
- if (dp) {
234
- gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
235
- } else {
236
- gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
237
- }
238
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
239
- tcg_temp_free_i32(tcg_rmode);
240
- tcg_temp_free_ptr(fpst);
241
- break;
242
- }
243
- case 14: /* vrintx */
244
- {
245
- TCGv_ptr fpst = get_fpstatus_ptr(0);
246
- if (dp) {
247
- gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
248
- } else {
249
- gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
250
- }
251
- tcg_temp_free_ptr(fpst);
252
- break;
253
- }
254
case 15: /* single<->double conversion */
255
if (dp) {
256
gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
257
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
258
index XXXXXXX..XXXXXXX 100644
259
--- a/target/arm/vfp.decode
260
+++ b/target/arm/vfp.decode
261
@@ -XXX,XX +XXX,XX @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
262
vd=%vd_sp vm=%vm_sp
263
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
264
vd=%vd_sp vm=%vm_dp
265
+
266
+VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... \
267
+ vd=%vd_sp vm=%vm_sp
268
+VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... \
269
+ vd=%vd_dp vm=%vm_dp
270
+
271
+VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... \
272
+ vd=%vd_sp vm=%vm_sp
273
+VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... \
274
+ vd=%vd_dp vm=%vm_dp
275
+
276
+VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... \
277
+ vd=%vd_sp vm=%vm_sp
278
+VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... \
279
+ vd=%vd_dp vm=%vm_dp
280
--
38
--
281
2.20.1
39
2.20.1
282
40
283
41
diff view generated by jsdifflib
1
At the moment our -cpu max for AArch32 supports VFP short-vectors
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
because we always implement them, even for CPUs which should
3
not have them. The following commits are going to switch to
4
using the correct ID-register-check to enable or disable short
5
vector support, so we need to turn it on explicitly for -cpu max,
6
because Cortex-A15 doesn't implement it.
7
2
8
We don't enable this for the AArch64 -cpu max, because the v8A
3
MIDR_EL1 is a 64-bit system register with the top 32-bit being RES0.
9
architecture never supports short-vectors.
4
Represent it in QEMU's ARMCPU struct with a uint64_t, not a
5
uint32_t.
10
6
7
This fixes an error when compiling with -Werror=conversion
8
because we were manipulating the register value using a
9
local uint64_t variable:
10
11
target/arm/cpu64.c: In function ‘aarch64_max_initfn’:
12
target/arm/cpu64.c:628:21: error: conversion from ‘uint64_t’ {aka ‘long unsigned int’} to ‘uint32_t’ {aka ‘unsigned int’} may change value [-Werror=conversion]
13
628 | cpu->midr = t;
14
| ^
15
16
and future-proofs us against a possible future architecture
17
change using some of the top 32 bits.
18
19
Suggested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
20
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
22
Reviewed-by: Laurent Desnogues <laurent.desnogues@gmail.com>
23
Message-id: 20200428172634.29707-1-f4bug@amsat.org
24
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
25
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
---
26
---
14
target/arm/cpu.c | 4 ++++
27
target/arm/cpu.h | 2 +-
15
1 file changed, 4 insertions(+)
28
target/arm/cpu.c | 2 +-
29
2 files changed, 2 insertions(+), 2 deletions(-)
16
30
31
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/cpu.h
34
+++ b/target/arm/cpu.h
35
@@ -XXX,XX +XXX,XX @@ struct ARMCPU {
36
uint64_t id_aa64dfr0;
37
uint64_t id_aa64dfr1;
38
} isar;
39
- uint32_t midr;
40
+ uint64_t midr;
41
uint32_t revidr;
42
uint32_t reset_fpsid;
43
uint32_t ctr;
17
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
44
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
18
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/cpu.c
46
--- a/target/arm/cpu.c
20
+++ b/target/arm/cpu.c
47
+++ b/target/arm/cpu.c
21
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
48
@@ -XXX,XX +XXX,XX @@ static const ARMCPUInfo arm_cpus[] = {
22
kvm_arm_set_cpu_features_from_host(cpu);
49
static Property arm_cpu_properties[] = {
23
} else {
50
DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
24
cortex_a15_initfn(obj);
51
DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
25
+
52
- DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
26
+ /* old-style VFP short-vector support */
53
+ DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
27
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
54
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
28
+
55
mp_affinity, ARM64_AFFINITY_INVALID),
29
#ifdef CONFIG_USER_ONLY
56
DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
30
/* We don't set these in system emulation mode for the moment,
31
* since we don't correctly set (all of) the ID registers to
32
--
57
--
33
2.20.1
58
2.20.1
34
59
35
60
diff view generated by jsdifflib
1
Convert the VSQRT instruction to decodetree.
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
2
3
Remove inclusion of arm_gicv3_common.h, this already gets
4
included via xlnx-versal.h.
5
6
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
9
Message-id: 20200427181649.26851-2-edgar.iglesias@gmail.com
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
11
---
6
target/arm/translate-vfp.inc.c | 20 ++++++++++++++++++++
12
hw/arm/xlnx-versal.c | 1 -
7
target/arm/translate.c | 14 +-------------
13
1 file changed, 1 deletion(-)
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 26 insertions(+), 13 deletions(-)
10
14
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
15
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
17
--- a/hw/arm/xlnx-versal.c
14
+++ b/target/arm/translate-vfp.inc.c
18
+++ b/hw/arm/xlnx-versal.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
19
@@ -XXX,XX +XXX,XX @@
16
{
20
#include "hw/arm/boot.h"
17
return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
21
#include "kvm_arm.h"
18
}
22
#include "hw/misc/unimp.h"
19
+
23
-#include "hw/intc/arm_gicv3_common.h"
20
+static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
24
#include "hw/arm/xlnx-versal.h"
21
+{
25
#include "hw/char/pl011.h"
22
+ gen_helper_vfp_sqrts(vd, vm, cpu_env);
26
23
+}
24
+
25
+static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a)
26
+{
27
+ return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm);
28
+}
29
+
30
+static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
31
+{
32
+ gen_helper_vfp_sqrtd(vd, vm, cpu_env);
33
+}
34
+
35
+static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
36
+{
37
+ return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
38
+}
39
diff --git a/target/arm/translate.c b/target/arm/translate.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate.c
42
+++ b/target/arm/translate.c
43
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_neg(int dp)
44
gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
45
}
46
47
-static inline void gen_vfp_sqrt(int dp)
48
-{
49
- if (dp)
50
- gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
51
- else
52
- gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
53
-}
54
-
55
static inline void gen_vfp_cmp(int dp)
56
{
57
if (dp)
58
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
59
return 1;
60
case 15:
61
switch (rn) {
62
- case 1 ... 2:
63
+ case 1 ... 3:
64
/* Already handled by decodetree */
65
return 1;
66
default:
67
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
68
/* rn is opcode, encoded as per VFP_SREG_N. */
69
switch (rn) {
70
case 0x00: /* vmov */
71
- case 0x03: /* vsqrt */
72
break;
73
74
case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
75
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
76
case 0: /* cpy */
77
/* no-op */
78
break;
79
- case 3: /* sqrt */
80
- gen_vfp_sqrt(dp);
81
- break;
82
case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
83
{
84
TCGv_ptr fpst = get_fpstatus_ptr(false);
85
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/arm/vfp.decode
88
+++ b/target/arm/vfp.decode
89
@@ -XXX,XX +XXX,XX @@ VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... \
90
vd=%vd_sp vm=%vm_sp
91
VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... \
92
vd=%vd_dp vm=%vm_dp
93
+
94
+VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... \
95
+ vd=%vd_sp vm=%vm_sp
96
+VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... \
97
+ vd=%vd_dp vm=%vm_dp
98
--
27
--
99
2.20.1
28
2.20.1
100
29
101
30
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
2
3
Typo comparing the sign of the field, twice, instead of also comparing
3
Move misplaced comment.
4
the mask of the field (which itself encodes both position and length).
5
4
6
Reported-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20190604154225.26992-1-richard.henderson@linaro.org
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20200427181649.26851-3-edgar.iglesias@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
11
---
13
scripts/decodetree.py | 2 +-
12
hw/arm/xlnx-versal.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
15
14
16
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
15
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
17
index XXXXXXX..XXXXXXX 100755
16
index XXXXXXX..XXXXXXX 100644
18
--- a/scripts/decodetree.py
17
--- a/hw/arm/xlnx-versal.c
19
+++ b/scripts/decodetree.py
18
+++ b/hw/arm/xlnx-versal.c
20
@@ -XXX,XX +XXX,XX @@ class Field:
19
@@ -XXX,XX +XXX,XX @@ static void versal_create_apu_cpus(Versal *s)
21
return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
20
22
21
obj = object_new(XLNX_VERSAL_ACPU_TYPE);
23
def __eq__(self, other):
22
if (!obj) {
24
- return self.sign == other.sign and self.sign == other.sign
23
- /* Secondary CPUs start in PSCI powered-down state */
25
+ return self.sign == other.sign and self.mask == other.mask
24
error_report("Unable to create apu.cpu[%d] of type %s",
26
25
i, XLNX_VERSAL_ACPU_TYPE);
27
def __ne__(self, other):
26
exit(EXIT_FAILURE);
28
return not self.__eq__(other)
27
@@ -XXX,XX +XXX,XX @@ static void versal_create_apu_cpus(Versal *s)
28
object_property_set_int(obj, s->cfg.psci_conduit,
29
"psci-conduit", &error_abort);
30
if (i) {
31
+ /* Secondary CPUs start in PSCI powered-down state */
32
object_property_set_bool(obj, true,
33
"start-powered-off", &error_abort);
34
}
29
--
35
--
30
2.20.1
36
2.20.1
31
37
32
38
diff view generated by jsdifflib
1
The SMMUv3 ID registers cover an area 0x30 bytes in size
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
(12 registers, 4 bytes each). We were incorrectly decoding
3
only the first 0x20 bytes.
4
2
3
Fix typo xlnx-ve -> xlnx-versal.
4
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
9
Message-id: 20200427181649.26851-4-edgar.iglesias@gmail.com
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Message-id: 20190524124829.2589-1-peter.maydell@linaro.org
8
---
11
---
9
hw/arm/smmuv3.c | 2 +-
12
hw/arm/xlnx-versal-virt.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
11
14
12
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
15
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/arm/smmuv3.c
17
--- a/hw/arm/xlnx-versal-virt.c
15
+++ b/hw/arm/smmuv3.c
18
+++ b/hw/arm/xlnx-versal-virt.c
16
@@ -XXX,XX +XXX,XX @@ static MemTxResult smmu_readl(SMMUv3State *s, hwaddr offset,
19
@@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine)
17
uint64_t *data, MemTxAttrs attrs)
20
psci_conduit = QEMU_PSCI_CONDUIT_SMC;
18
{
21
}
19
switch (offset) {
22
20
- case A_IDREGS ... A_IDREGS + 0x1f:
23
- sysbus_init_child_obj(OBJECT(machine), "xlnx-ve", &s->soc,
21
+ case A_IDREGS ... A_IDREGS + 0x2f:
24
+ sysbus_init_child_obj(OBJECT(machine), "xlnx-versal", &s->soc,
22
*data = smmuv3_idreg(offset - A_IDREGS);
25
sizeof(s->soc), TYPE_XLNX_VERSAL);
23
return MEMTX_OK;
26
object_property_set_link(OBJECT(&s->soc), OBJECT(machine->ram),
24
case A_IDR0 ... A_IDR5:
27
"ddr", &error_abort);
25
--
28
--
26
2.20.1
29
2.20.1
27
30
28
31
diff view generated by jsdifflib
1
Convert the VNEG instruction to decodetree.
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
2
3
Embed the UARTs into the SoC type.
4
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
10
Message-id: 20200427181649.26851-5-edgar.iglesias@gmail.com
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
12
---
6
target/arm/translate-vfp.inc.c | 10 ++++++++++
13
include/hw/arm/xlnx-versal.h | 3 ++-
7
target/arm/translate.c | 6 +-----
14
hw/arm/xlnx-versal.c | 12 ++++++------
8
target/arm/vfp.decode | 5 +++++
15
2 files changed, 8 insertions(+), 7 deletions(-)
9
3 files changed, 16 insertions(+), 5 deletions(-)
10
16
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
17
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
12
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
19
--- a/include/hw/arm/xlnx-versal.h
14
+++ b/target/arm/translate-vfp.inc.c
20
+++ b/include/hw/arm/xlnx-versal.h
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
21
@@ -XXX,XX +XXX,XX @@
16
{
22
#include "hw/sysbus.h"
17
return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
23
#include "hw/arm/boot.h"
24
#include "hw/intc/arm_gicv3.h"
25
+#include "hw/char/pl011.h"
26
27
#define TYPE_XLNX_VERSAL "xlnx-versal"
28
#define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL)
29
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
30
MemoryRegion mr_ocm;
31
32
struct {
33
- SysBusDevice *uart[XLNX_VERSAL_NR_UARTS];
34
+ PL011State uart[XLNX_VERSAL_NR_UARTS];
35
SysBusDevice *gem[XLNX_VERSAL_NR_GEMS];
36
SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS];
37
} iou;
38
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/arm/xlnx-versal.c
41
+++ b/hw/arm/xlnx-versal.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "kvm_arm.h"
44
#include "hw/misc/unimp.h"
45
#include "hw/arm/xlnx-versal.h"
46
-#include "hw/char/pl011.h"
47
48
#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
49
#define GEM_REVISION 0x40070106
50
@@ -XXX,XX +XXX,XX @@ static void versal_create_uarts(Versal *s, qemu_irq *pic)
51
DeviceState *dev;
52
MemoryRegion *mr;
53
54
- dev = qdev_create(NULL, TYPE_PL011);
55
- s->lpd.iou.uart[i] = SYS_BUS_DEVICE(dev);
56
+ sysbus_init_child_obj(OBJECT(s), name,
57
+ &s->lpd.iou.uart[i], sizeof(s->lpd.iou.uart[i]),
58
+ TYPE_PL011);
59
+ dev = DEVICE(&s->lpd.iou.uart[i]);
60
qdev_prop_set_chr(dev, "chardev", serial_hd(i));
61
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
62
qdev_init_nofail(dev);
63
64
- mr = sysbus_mmio_get_region(s->lpd.iou.uart[i], 0);
65
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
66
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
67
68
- sysbus_connect_irq(s->lpd.iou.uart[i], 0, pic[irqs[i]]);
69
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
70
g_free(name);
71
}
18
}
72
}
19
+
20
+static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
21
+{
22
+ return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
23
+}
24
+
25
+static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
26
+{
27
+ return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
28
+}
29
diff --git a/target/arm/translate.c b/target/arm/translate.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate.c
32
+++ b/target/arm/translate.c
33
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
34
return 1;
35
case 15:
36
switch (rn) {
37
- case 1:
38
+ case 1 ... 2:
39
/* Already handled by decodetree */
40
return 1;
41
default:
42
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
43
/* rn is opcode, encoded as per VFP_SREG_N. */
44
switch (rn) {
45
case 0x00: /* vmov */
46
- case 0x02: /* vneg */
47
case 0x03: /* vsqrt */
48
break;
49
50
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
51
case 0: /* cpy */
52
/* no-op */
53
break;
54
- case 2: /* neg */
55
- gen_vfp_neg(dp);
56
- break;
57
case 3: /* sqrt */
58
gen_vfp_sqrt(dp);
59
break;
60
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/vfp.decode
63
+++ b/target/arm/vfp.decode
64
@@ -XXX,XX +XXX,XX @@ VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... \
65
vd=%vd_sp vm=%vm_sp
66
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... \
67
vd=%vd_dp vm=%vm_dp
68
+
69
+VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... \
70
+ vd=%vd_sp vm=%vm_sp
71
+VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... \
72
+ vd=%vd_dp vm=%vm_dp
73
--
73
--
74
2.20.1
74
2.20.1
75
75
76
76
diff view generated by jsdifflib
1
Convert the "double-precision" register moves to decodetree:
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
this covers VMOV scalar-to-gpreg, VMOV gpreg-to-scalar and VDUP.
3
2
4
Note that the conversion process has tightened up a few of the
3
Embed the GEMs into the SoC type.
5
UNDEF encoding checks: we now correctly forbid:
6
* VMOV-to-gpr with U:opc1:opc2 == 10x00 or x0x10
7
* VMOV-from-gpr with opc1:opc2 == 0x10
8
* VDUP with B:E == 11
9
* VDUP with Q == 1 and Vn<0> == 1
10
4
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
10
Message-id: 20200427181649.26851-6-edgar.iglesias@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
12
---
13
The accesses of elements < 32 bits could be improved by doing
13
include/hw/arm/xlnx-versal.h | 3 ++-
14
direct ld/st of the right size rather than 32-bit read-and-shift
14
hw/arm/xlnx-versal.c | 15 ++++++++-------
15
or read-modify-write, but we leave this for later cleanup,
15
2 files changed, 10 insertions(+), 8 deletions(-)
16
since this series is generally trying to stick to fixing
17
the decode.
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
---
20
target/arm/translate-vfp.inc.c | 147 +++++++++++++++++++++++++++++++++
21
target/arm/translate.c | 83 +------------------
22
target/arm/vfp.decode | 36 ++++++++
23
3 files changed, 185 insertions(+), 81 deletions(-)
24
16
25
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
17
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
26
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-vfp.inc.c
19
--- a/include/hw/arm/xlnx-versal.h
28
+++ b/target/arm/translate-vfp.inc.c
20
+++ b/include/hw/arm/xlnx-versal.h
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
21
@@ -XXX,XX +XXX,XX @@
30
22
#include "hw/arm/boot.h"
31
return true;
23
#include "hw/intc/arm_gicv3.h"
24
#include "hw/char/pl011.h"
25
+#include "hw/net/cadence_gem.h"
26
27
#define TYPE_XLNX_VERSAL "xlnx-versal"
28
#define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL)
29
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
30
31
struct {
32
PL011State uart[XLNX_VERSAL_NR_UARTS];
33
- SysBusDevice *gem[XLNX_VERSAL_NR_GEMS];
34
+ CadenceGEMState gem[XLNX_VERSAL_NR_GEMS];
35
SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS];
36
} iou;
37
} lpd;
38
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/arm/xlnx-versal.c
41
+++ b/hw/arm/xlnx-versal.c
42
@@ -XXX,XX +XXX,XX @@ static void versal_create_gems(Versal *s, qemu_irq *pic)
43
DeviceState *dev;
44
MemoryRegion *mr;
45
46
- dev = qdev_create(NULL, "cadence_gem");
47
- s->lpd.iou.gem[i] = SYS_BUS_DEVICE(dev);
48
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
49
+ sysbus_init_child_obj(OBJECT(s), name,
50
+ &s->lpd.iou.gem[i], sizeof(s->lpd.iou.gem[i]),
51
+ TYPE_CADENCE_GEM);
52
+ dev = DEVICE(&s->lpd.iou.gem[i]);
53
if (nd->used) {
54
qemu_check_nic_model(nd, "cadence_gem");
55
qdev_set_nic_properties(dev, nd);
56
}
57
- object_property_set_int(OBJECT(s->lpd.iou.gem[i]),
58
+ object_property_set_int(OBJECT(dev),
59
2, "num-priority-queues",
60
&error_abort);
61
- object_property_set_link(OBJECT(s->lpd.iou.gem[i]),
62
+ object_property_set_link(OBJECT(dev),
63
OBJECT(&s->mr_ps), "dma",
64
&error_abort);
65
qdev_init_nofail(dev);
66
67
- mr = sysbus_mmio_get_region(s->lpd.iou.gem[i], 0);
68
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
69
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
70
71
- sysbus_connect_irq(s->lpd.iou.gem[i], 0, pic[irqs[i]]);
72
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
73
g_free(name);
74
}
32
}
75
}
33
+
34
+static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
35
+{
36
+ /* VMOV scalar to general purpose register */
37
+ TCGv_i32 tmp;
38
+ int pass;
39
+ uint32_t offset;
40
+
41
+ /* UNDEF accesses to D16-D31 if they don't exist */
42
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
43
+ return false;
44
+ }
45
+
46
+ offset = a->index << a->size;
47
+ pass = extract32(offset, 2, 1);
48
+ offset = extract32(offset, 0, 2) * 8;
49
+
50
+ if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
51
+ return false;
52
+ }
53
+
54
+ if (!vfp_access_check(s)) {
55
+ return true;
56
+ }
57
+
58
+ tmp = neon_load_reg(a->vn, pass);
59
+ switch (a->size) {
60
+ case 0:
61
+ if (offset) {
62
+ tcg_gen_shri_i32(tmp, tmp, offset);
63
+ }
64
+ if (a->u) {
65
+ gen_uxtb(tmp);
66
+ } else {
67
+ gen_sxtb(tmp);
68
+ }
69
+ break;
70
+ case 1:
71
+ if (a->u) {
72
+ if (offset) {
73
+ tcg_gen_shri_i32(tmp, tmp, 16);
74
+ } else {
75
+ gen_uxth(tmp);
76
+ }
77
+ } else {
78
+ if (offset) {
79
+ tcg_gen_sari_i32(tmp, tmp, 16);
80
+ } else {
81
+ gen_sxth(tmp);
82
+ }
83
+ }
84
+ break;
85
+ case 2:
86
+ break;
87
+ }
88
+ store_reg(s, a->rt, tmp);
89
+
90
+ return true;
91
+}
92
+
93
+static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
94
+{
95
+ /* VMOV general purpose register to scalar */
96
+ TCGv_i32 tmp, tmp2;
97
+ int pass;
98
+ uint32_t offset;
99
+
100
+ /* UNDEF accesses to D16-D31 if they don't exist */
101
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
102
+ return false;
103
+ }
104
+
105
+ offset = a->index << a->size;
106
+ pass = extract32(offset, 2, 1);
107
+ offset = extract32(offset, 0, 2) * 8;
108
+
109
+ if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
110
+ return false;
111
+ }
112
+
113
+ if (!vfp_access_check(s)) {
114
+ return true;
115
+ }
116
+
117
+ tmp = load_reg(s, a->rt);
118
+ switch (a->size) {
119
+ case 0:
120
+ tmp2 = neon_load_reg(a->vn, pass);
121
+ tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
122
+ tcg_temp_free_i32(tmp2);
123
+ break;
124
+ case 1:
125
+ tmp2 = neon_load_reg(a->vn, pass);
126
+ tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
127
+ tcg_temp_free_i32(tmp2);
128
+ break;
129
+ case 2:
130
+ break;
131
+ }
132
+ neon_store_reg(a->vn, pass, tmp);
133
+
134
+ return true;
135
+}
136
+
137
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
138
+{
139
+ /* VDUP (general purpose register) */
140
+ TCGv_i32 tmp;
141
+ int size, vec_size;
142
+
143
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
144
+ return false;
145
+ }
146
+
147
+ /* UNDEF accesses to D16-D31 if they don't exist */
148
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
149
+ return false;
150
+ }
151
+
152
+ if (a->b && a->e) {
153
+ return false;
154
+ }
155
+
156
+ if (a->q && (a->vn & 1)) {
157
+ return false;
158
+ }
159
+
160
+ vec_size = a->q ? 16 : 8;
161
+ if (a->b) {
162
+ size = 0;
163
+ } else if (a->e) {
164
+ size = 1;
165
+ } else {
166
+ size = 2;
167
+ }
168
+
169
+ if (!vfp_access_check(s)) {
170
+ return true;
171
+ }
172
+
173
+ tmp = load_reg(s, a->rt);
174
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
175
+ vec_size, vec_size, tmp);
176
+ tcg_temp_free_i32(tmp);
177
+
178
+ return true;
179
+}
180
diff --git a/target/arm/translate.c b/target/arm/translate.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/target/arm/translate.c
183
+++ b/target/arm/translate.c
184
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
185
/* single register transfer */
186
rd = (insn >> 12) & 0xf;
187
if (dp) {
188
- int size;
189
- int pass;
190
-
191
- VFP_DREG_N(rn, insn);
192
- if (insn & 0xf)
193
- return 1;
194
- if (insn & 0x00c00060
195
- && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
196
- return 1;
197
- }
198
-
199
- pass = (insn >> 21) & 1;
200
- if (insn & (1 << 22)) {
201
- size = 0;
202
- offset = ((insn >> 5) & 3) * 8;
203
- } else if (insn & (1 << 5)) {
204
- size = 1;
205
- offset = (insn & (1 << 6)) ? 16 : 0;
206
- } else {
207
- size = 2;
208
- offset = 0;
209
- }
210
- if (insn & ARM_CP_RW_BIT) {
211
- /* vfp->arm */
212
- tmp = neon_load_reg(rn, pass);
213
- switch (size) {
214
- case 0:
215
- if (offset)
216
- tcg_gen_shri_i32(tmp, tmp, offset);
217
- if (insn & (1 << 23))
218
- gen_uxtb(tmp);
219
- else
220
- gen_sxtb(tmp);
221
- break;
222
- case 1:
223
- if (insn & (1 << 23)) {
224
- if (offset) {
225
- tcg_gen_shri_i32(tmp, tmp, 16);
226
- } else {
227
- gen_uxth(tmp);
228
- }
229
- } else {
230
- if (offset) {
231
- tcg_gen_sari_i32(tmp, tmp, 16);
232
- } else {
233
- gen_sxth(tmp);
234
- }
235
- }
236
- break;
237
- case 2:
238
- break;
239
- }
240
- store_reg(s, rd, tmp);
241
- } else {
242
- /* arm->vfp */
243
- tmp = load_reg(s, rd);
244
- if (insn & (1 << 23)) {
245
- /* VDUP */
246
- int vec_size = pass ? 16 : 8;
247
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0),
248
- vec_size, vec_size, tmp);
249
- tcg_temp_free_i32(tmp);
250
- } else {
251
- /* VMOV */
252
- switch (size) {
253
- case 0:
254
- tmp2 = neon_load_reg(rn, pass);
255
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
256
- tcg_temp_free_i32(tmp2);
257
- break;
258
- case 1:
259
- tmp2 = neon_load_reg(rn, pass);
260
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
261
- tcg_temp_free_i32(tmp2);
262
- break;
263
- case 2:
264
- break;
265
- }
266
- neon_store_reg(rn, pass, tmp);
267
- }
268
- }
269
+ /* already handled by decodetree */
270
+ return 1;
271
} else { /* !dp */
272
bool is_sysreg;
273
274
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
275
index XXXXXXX..XXXXXXX 100644
276
--- a/target/arm/vfp.decode
277
+++ b/target/arm/vfp.decode
278
@@ -XXX,XX +XXX,XX @@
279
# 1110 1110 .... .... .... 101. .... ....
280
# (but those patterns might also cover some Neon instructions,
281
# which do not live in this file.)
282
+
283
+# VFP registers have an odd encoding with a four-bit field
284
+# and a one-bit field which are assembled in different orders
285
+# depending on whether the register is double or single precision.
286
+# Each individual instruction function must do the checks for
287
+# "double register selected but CPU does not have double support"
288
+# and "double register number has bit 4 set but CPU does not
289
+# support D16-D31" (which should UNDEF).
290
+%vm_dp 5:1 0:4
291
+%vm_sp 0:4 5:1
292
+%vn_dp 7:1 16:4
293
+%vn_sp 16:4 7:1
294
+%vd_dp 22:1 12:4
295
+%vd_sp 12:4 22:1
296
+
297
+%vmov_idx_b 21:1 5:2
298
+%vmov_idx_h 21:1 6:1
299
+
300
+# VMOV scalar to general-purpose register; note that this does
301
+# include some Neon cases.
302
+VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \
303
+ vn=%vn_dp size=0 index=%vmov_idx_b
304
+VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \
305
+ vn=%vn_dp size=1 index=%vmov_idx_h
306
+VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \
307
+ vn=%vn_dp size=2 u=0
308
+
309
+VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \
310
+ vn=%vn_dp size=0 index=%vmov_idx_b
311
+VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \
312
+ vn=%vn_dp size=1 index=%vmov_idx_h
313
+VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
314
+ vn=%vn_dp size=2
315
+
316
+VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
317
+ vn=%vn_dp
318
--
76
--
319
2.20.1
77
2.20.1
320
78
321
79
diff view generated by jsdifflib
1
In commit 80376c3fc2c38fdd453 in 2010 we added a workaround for
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
some qbus buses not being connected to qdev devices -- if the
3
bus has no parent object then we register a reset function which
4
resets the bus on system reset (and unregister it when the
5
bus is unparented).
6
2
7
Nearly a decade later, we have now no buses in the tree which
3
Embed the ADMAs into the SoC type.
8
are created with non-NULL parents, so we can remove the
9
workaround and instead just assert that if the bus has a NULL
10
parent then it is the main system bus.
11
4
12
(The absence of other parentless buses was confirmed by
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
13
code inspection of all the callsites of qbus_create() and
6
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
14
qbus_create_inplace() and cross-checked by 'make check'.)
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
10
Message-id: 20200427181649.26851-7-edgar.iglesias@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
include/hw/arm/xlnx-versal.h | 3 ++-
14
hw/arm/xlnx-versal.c | 14 +++++++-------
15
2 files changed, 9 insertions(+), 8 deletions(-)
15
16
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
17
Reviewed-by: Markus Armbruster <armbru@redhat.com>
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Reviewed-by: Damien Hedde <damien.hedde@greensocs.com>
20
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
21
Message-id: 20190523150543.22676-1-peter.maydell@linaro.org
22
---
23
hw/core/bus.c | 21 +++++++++------------
24
1 file changed, 9 insertions(+), 12 deletions(-)
25
26
diff --git a/hw/core/bus.c b/hw/core/bus.c
27
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/core/bus.c
19
--- a/include/hw/arm/xlnx-versal.h
29
+++ b/hw/core/bus.c
20
+++ b/include/hw/arm/xlnx-versal.h
30
@@ -XXX,XX +XXX,XX @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
21
@@ -XXX,XX +XXX,XX @@
31
bus->parent->num_child_bus++;
22
#include "hw/arm/boot.h"
32
object_property_add_child(OBJECT(bus->parent), bus->name, OBJECT(bus), NULL);
23
#include "hw/intc/arm_gicv3.h"
33
object_unref(OBJECT(bus));
24
#include "hw/char/pl011.h"
34
- } else if (bus != sysbus_get_default()) {
25
+#include "hw/dma/xlnx-zdma.h"
35
- /* TODO: once all bus devices are qdevified,
26
#include "hw/net/cadence_gem.h"
36
- only reset handler for main_system_bus should be registered here. */
27
37
- qemu_register_reset(qbus_reset_all_fn, bus);
28
#define TYPE_XLNX_VERSAL "xlnx-versal"
38
+ } else {
29
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
39
+ /* The only bus without a parent is the main system bus */
30
struct {
40
+ assert(bus == sysbus_get_default());
31
PL011State uart[XLNX_VERSAL_NR_UARTS];
32
CadenceGEMState gem[XLNX_VERSAL_NR_GEMS];
33
- SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS];
34
+ XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS];
35
} iou;
36
} lpd;
37
38
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/arm/xlnx-versal.c
41
+++ b/hw/arm/xlnx-versal.c
42
@@ -XXX,XX +XXX,XX @@ static void versal_create_admas(Versal *s, qemu_irq *pic)
43
DeviceState *dev;
44
MemoryRegion *mr;
45
46
- dev = qdev_create(NULL, "xlnx.zdma");
47
- s->lpd.iou.adma[i] = SYS_BUS_DEVICE(dev);
48
- object_property_set_int(OBJECT(s->lpd.iou.adma[i]), 128, "bus-width",
49
- &error_abort);
50
- object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
51
+ sysbus_init_child_obj(OBJECT(s), name,
52
+ &s->lpd.iou.adma[i], sizeof(s->lpd.iou.adma[i]),
53
+ TYPE_XLNX_ZDMA);
54
+ dev = DEVICE(&s->lpd.iou.adma[i]);
55
+ object_property_set_int(OBJECT(dev), 128, "bus-width", &error_abort);
56
qdev_init_nofail(dev);
57
58
- mr = sysbus_mmio_get_region(s->lpd.iou.adma[i], 0);
59
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
60
memory_region_add_subregion(&s->mr_ps,
61
MM_ADMA_CH0 + i * MM_ADMA_CH0_SIZE, mr);
62
63
- sysbus_connect_irq(s->lpd.iou.adma[i], 0, pic[VERSAL_ADMA_IRQ_0 + i]);
64
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_ADMA_IRQ_0 + i]);
65
g_free(name);
41
}
66
}
42
}
67
}
43
44
@@ -XXX,XX +XXX,XX @@ static void bus_unparent(Object *obj)
45
BusState *bus = BUS(obj);
46
BusChild *kid;
47
48
+ /* Only the main system bus has no parent, and that bus is never freed */
49
+ assert(bus->parent);
50
+
51
while ((kid = QTAILQ_FIRST(&bus->children)) != NULL) {
52
DeviceState *dev = kid->child;
53
object_unparent(OBJECT(dev));
54
}
55
- if (bus->parent) {
56
- QLIST_REMOVE(bus, sibling);
57
- bus->parent->num_child_bus--;
58
- bus->parent = NULL;
59
- } else {
60
- assert(bus != sysbus_get_default()); /* main_system_bus is never freed */
61
- qemu_unregister_reset(qbus_reset_all_fn, bus);
62
- }
63
+ QLIST_REMOVE(bus, sibling);
64
+ bus->parent->num_child_bus--;
65
+ bus->parent = NULL;
66
}
67
68
void qbus_create_inplace(void *bus, size_t size, const char *typename,
69
--
68
--
70
2.20.1
69
2.20.1
71
70
72
71
diff view generated by jsdifflib
1
Convert the VRINTA/VRINTN/VRINTP/VRINTM instructions to decodetree.
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
Again, trans_VRINT() is temporarily left in translate.c.
3
2
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
Embed the APUs into the SoC type.
4
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
10
Message-id: 20200427181649.26851-8-edgar.iglesias@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
---
12
---
6
target/arm/translate.c | 60 +++++++++++++++++++++++-------------
13
include/hw/arm/xlnx-versal.h | 2 +-
7
target/arm/vfp-uncond.decode | 5 +++
14
hw/arm/xlnx-versal-virt.c | 4 ++--
8
2 files changed, 43 insertions(+), 22 deletions(-)
15
hw/arm/xlnx-versal.c | 19 +++++--------------
16
3 files changed, 8 insertions(+), 17 deletions(-)
9
17
10
diff --git a/target/arm/translate.c b/target/arm/translate.c
18
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
11
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate.c
20
--- a/include/hw/arm/xlnx-versal.h
13
+++ b/target/arm/translate.c
21
+++ b/include/hw/arm/xlnx-versal.h
14
@@ -XXX,XX +XXX,XX @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
22
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
15
return true;
23
struct {
24
struct {
25
MemoryRegion mr;
26
- ARMCPU *cpu[XLNX_VERSAL_NR_ACPUS];
27
+ ARMCPU cpu[XLNX_VERSAL_NR_ACPUS];
28
GICv3State gic;
29
} apu;
30
} fpd;
31
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/arm/xlnx-versal-virt.c
34
+++ b/hw/arm/xlnx-versal-virt.c
35
@@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine)
36
s->binfo.get_dtb = versal_virt_get_dtb;
37
s->binfo.modify_dtb = versal_virt_modify_dtb;
38
if (machine->kernel_filename) {
39
- arm_load_kernel(s->soc.fpd.apu.cpu[0], machine, &s->binfo);
40
+ arm_load_kernel(&s->soc.fpd.apu.cpu[0], machine, &s->binfo);
41
} else {
42
- AddressSpace *as = arm_boot_address_space(s->soc.fpd.apu.cpu[0],
43
+ AddressSpace *as = arm_boot_address_space(&s->soc.fpd.apu.cpu[0],
44
&s->binfo);
45
/* Some boot-loaders (e.g u-boot) don't like blobs at address 0 (NULL).
46
* Offset things by 4K. */
47
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/arm/xlnx-versal.c
50
+++ b/hw/arm/xlnx-versal.c
51
@@ -XXX,XX +XXX,XX @@ static void versal_create_apu_cpus(Versal *s)
52
53
for (i = 0; i < ARRAY_SIZE(s->fpd.apu.cpu); i++) {
54
Object *obj;
55
- char *name;
56
-
57
- obj = object_new(XLNX_VERSAL_ACPU_TYPE);
58
- if (!obj) {
59
- error_report("Unable to create apu.cpu[%d] of type %s",
60
- i, XLNX_VERSAL_ACPU_TYPE);
61
- exit(EXIT_FAILURE);
62
- }
63
-
64
- name = g_strdup_printf("apu-cpu[%d]", i);
65
- object_property_add_child(OBJECT(s), name, obj, &error_fatal);
66
- g_free(name);
67
68
+ object_initialize_child(OBJECT(s), "apu-cpu[*]",
69
+ &s->fpd.apu.cpu[i], sizeof(s->fpd.apu.cpu[i]),
70
+ XLNX_VERSAL_ACPU_TYPE, &error_abort, NULL);
71
+ obj = OBJECT(&s->fpd.apu.cpu[i]);
72
object_property_set_int(obj, s->cfg.psci_conduit,
73
"psci-conduit", &error_abort);
74
if (i) {
75
@@ -XXX,XX +XXX,XX @@ static void versal_create_apu_cpus(Versal *s)
76
object_property_set_link(obj, OBJECT(&s->fpd.apu.mr), "memory",
77
&error_abort);
78
object_property_set_bool(obj, true, "realized", &error_fatal);
79
- s->fpd.apu.cpu[i] = ARM_CPU(obj);
80
}
16
}
81
}
17
82
18
-static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
83
@@ -XXX,XX +XXX,XX @@ static void versal_create_apu_gic(Versal *s, qemu_irq *pic)
19
- int rounding)
20
+/*
21
+ * Table for converting the most common AArch32 encoding of
22
+ * rounding mode to arm_fprounding order (which matches the
23
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
24
+ */
25
+static const uint8_t fp_decode_rm[] = {
26
+ FPROUNDING_TIEAWAY,
27
+ FPROUNDING_TIEEVEN,
28
+ FPROUNDING_POSINF,
29
+ FPROUNDING_NEGINF,
30
+};
31
+
32
+static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
33
{
34
- TCGv_ptr fpst = get_fpstatus_ptr(0);
35
+ uint32_t rd, rm;
36
+ bool dp = a->dp;
37
+ TCGv_ptr fpst;
38
TCGv_i32 tcg_rmode;
39
+ int rounding = fp_decode_rm[a->rm];
40
+
41
+ if (!dc_isar_feature(aa32_vrint, s)) {
42
+ return false;
43
+ }
44
+
45
+ /* UNDEF accesses to D16-D31 if they don't exist */
46
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
47
+ ((a->vm | a->vd) & 0x10)) {
48
+ return false;
49
+ }
50
+ rd = a->vd;
51
+ rm = a->vm;
52
+
53
+ if (!vfp_access_check(s)) {
54
+ return true;
55
+ }
56
+
57
+ fpst = get_fpstatus_ptr(0);
58
59
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
60
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
61
@@ -XXX,XX +XXX,XX @@ static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
62
tcg_temp_free_i32(tcg_rmode);
63
64
tcg_temp_free_ptr(fpst);
65
- return 0;
66
+ return true;
67
}
68
69
static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
70
@@ -XXX,XX +XXX,XX @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
71
return 0;
72
}
73
74
-/* Table for converting the most common AArch32 encoding of
75
- * rounding mode to arm_fprounding order (which matches the
76
- * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
77
- */
78
-static const uint8_t fp_decode_rm[] = {
79
- FPROUNDING_TIEAWAY,
80
- FPROUNDING_TIEEVEN,
81
- FPROUNDING_POSINF,
82
- FPROUNDING_NEGINF,
83
-};
84
-
85
static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
86
{
87
uint32_t rd, rm, dp = extract32(insn, 8, 1);
88
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
89
rm = VFP_SREG_M(insn);
90
}
84
}
91
85
92
- if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
86
for (i = 0; i < nr_apu_cpus; i++) {
93
- dc_isar_feature(aa32_vrint, s)) {
87
- DeviceState *cpudev = DEVICE(s->fpd.apu.cpu[i]);
94
- /* VRINTA, VRINTN, VRINTP, VRINTM */
88
+ DeviceState *cpudev = DEVICE(&s->fpd.apu.cpu[i]);
95
- int rounding = fp_decode_rm[extract32(insn, 16, 2)];
89
int ppibase = XLNX_VERSAL_NR_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
96
- return handle_vrint(insn, rd, rm, dp, rounding);
90
qemu_irq maint_irq;
97
- } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
91
int ti;
98
- dc_isar_feature(aa32_vcvt_dr, s)) {
99
+ if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
100
+ dc_isar_feature(aa32_vcvt_dr, s)) {
101
/* VCVTA, VCVTN, VCVTP, VCVTM */
102
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
103
return handle_vcvt(insn, rd, rm, dp, rounding);
104
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
105
index XXXXXXX..XXXXXXX 100644
106
--- a/target/arm/vfp-uncond.decode
107
+++ b/target/arm/vfp-uncond.decode
108
@@ -XXX,XX +XXX,XX @@ VMINMAXNM 1111 1110 1.00 .... .... 1010 . op:1 .0 .... \
109
vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
110
VMINMAXNM 1111 1110 1.00 .... .... 1011 . op:1 .0 .... \
111
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
112
+
113
+VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
114
+ vm=%vm_sp vd=%vd_sp dp=0
115
+VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
116
+ vm=%vm_dp vd=%vd_dp dp=1
117
--
92
--
118
2.20.1
93
2.20.1
119
94
120
95
diff view generated by jsdifflib
1
Convert the VFP VABS instruction to decodetree.
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
2
3
Unlike the 3-op versions, we don't pass fpst to the VFPGen2OpSPFn or
3
Add support for SD.
4
VFPGen2OpDPFn because none of the operations which use this format
5
and support short vectors will need it.
6
4
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
9
Message-id: 20200427181649.26851-9-edgar.iglesias@gmail.com
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
11
---
10
target/arm/translate-vfp.inc.c | 167 +++++++++++++++++++++++++++++++++
12
include/hw/arm/xlnx-versal.h | 12 ++++++++++++
11
target/arm/translate.c | 12 ++-
13
hw/arm/xlnx-versal.c | 31 +++++++++++++++++++++++++++++++
12
target/arm/vfp.decode | 5 +
14
2 files changed, 43 insertions(+)
13
3 files changed, 180 insertions(+), 4 deletions(-)
14
15
15
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
16
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-vfp.inc.c
18
--- a/include/hw/arm/xlnx-versal.h
18
+++ b/target/arm/translate-vfp.inc.c
19
+++ b/include/hw/arm/xlnx-versal.h
19
@@ -XXX,XX +XXX,XX @@ typedef void VFPGen3OpSPFn(TCGv_i32 vd,
20
@@ -XXX,XX +XXX,XX @@
20
typedef void VFPGen3OpDPFn(TCGv_i64 vd,
21
21
TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
22
#include "hw/sysbus.h"
22
23
#include "hw/arm/boot.h"
23
+/*
24
+#include "hw/sd/sdhci.h"
24
+ * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
25
#include "hw/intc/arm_gicv3.h"
25
+ * The callback should emit code to write a value to vd (which
26
#include "hw/char/pl011.h"
26
+ * should be written to only).
27
#include "hw/dma/xlnx-zdma.h"
27
+ */
28
@@ -XXX,XX +XXX,XX @@
28
+typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
29
#define XLNX_VERSAL_NR_UARTS 2
29
+typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
30
#define XLNX_VERSAL_NR_GEMS 2
31
#define XLNX_VERSAL_NR_ADMAS 8
32
+#define XLNX_VERSAL_NR_SDS 2
33
#define XLNX_VERSAL_NR_IRQS 192
34
35
typedef struct Versal {
36
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
37
} iou;
38
} lpd;
39
40
+ /* The Platform Management Controller subsystem. */
41
+ struct {
42
+ struct {
43
+ SDHCIState sd[XLNX_VERSAL_NR_SDS];
44
+ } iou;
45
+ } pmc;
30
+
46
+
31
/*
47
struct {
32
* Perform a 3-operand VFP data processing instruction. fn is the
48
MemoryRegion *mr_ddr;
33
* callback to do the actual operation; this function deals with the
49
uint32_t psci_conduit;
34
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
50
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
35
return true;
51
#define VERSAL_GEM1_IRQ_0 58
52
#define VERSAL_GEM1_WAKE_IRQ_0 59
53
#define VERSAL_ADMA_IRQ_0 60
54
+#define VERSAL_SD0_IRQ_0 126
55
56
/* Architecturally reserved IRQs suitable for virtualization. */
57
#define VERSAL_RSVD_IRQ_FIRST 111
58
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
59
#define MM_FPD_CRF 0xfd1a0000U
60
#define MM_FPD_CRF_SIZE 0x140000
61
62
+#define MM_PMC_SD0 0xf1040000U
63
+#define MM_PMC_SD0_SIZE 0x10000
64
#define MM_PMC_CRP 0xf1260000U
65
#define MM_PMC_CRP_SIZE 0x10000
66
#endif
67
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/hw/arm/xlnx-versal.c
70
+++ b/hw/arm/xlnx-versal.c
71
@@ -XXX,XX +XXX,XX @@ static void versal_create_admas(Versal *s, qemu_irq *pic)
72
}
36
}
73
}
37
74
38
+static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
75
+#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */
76
+static void versal_create_sds(Versal *s, qemu_irq *pic)
39
+{
77
+{
40
+ uint32_t delta_m = 0;
78
+ int i;
41
+ uint32_t delta_d = 0;
42
+ uint32_t bank_mask = 0;
43
+ int veclen = s->vec_len;
44
+ TCGv_i32 f0, fd;
45
+
79
+
46
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
80
+ for (i = 0; i < ARRAY_SIZE(s->pmc.iou.sd); i++) {
47
+ (veclen != 0 || s->vec_stride != 0)) {
81
+ DeviceState *dev;
48
+ return false;
82
+ MemoryRegion *mr;
83
+
84
+ sysbus_init_child_obj(OBJECT(s), "sd[*]",
85
+ &s->pmc.iou.sd[i], sizeof(s->pmc.iou.sd[i]),
86
+ TYPE_SYSBUS_SDHCI);
87
+ dev = DEVICE(&s->pmc.iou.sd[i]);
88
+
89
+ object_property_set_uint(OBJECT(dev),
90
+ 3, "sd-spec-version", &error_fatal);
91
+ object_property_set_uint(OBJECT(dev), SDHCI_CAPABILITIES, "capareg",
92
+ &error_fatal);
93
+ object_property_set_uint(OBJECT(dev), UHS_I, "uhs", &error_fatal);
94
+ qdev_init_nofail(dev);
95
+
96
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
97
+ memory_region_add_subregion(&s->mr_ps,
98
+ MM_PMC_SD0 + i * MM_PMC_SD0_SIZE, mr);
99
+
100
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
101
+ pic[VERSAL_SD0_IRQ_0 + i * 2]);
49
+ }
102
+ }
50
+
51
+ if (!vfp_access_check(s)) {
52
+ return true;
53
+ }
54
+
55
+ if (veclen > 0) {
56
+ bank_mask = 0x18;
57
+
58
+ /* Figure out what type of vector operation this is. */
59
+ if ((vd & bank_mask) == 0) {
60
+ /* scalar */
61
+ veclen = 0;
62
+ } else {
63
+ delta_d = s->vec_stride + 1;
64
+
65
+ if ((vm & bank_mask) == 0) {
66
+ /* mixed scalar/vector */
67
+ delta_m = 0;
68
+ } else {
69
+ /* vector */
70
+ delta_m = delta_d;
71
+ }
72
+ }
73
+ }
74
+
75
+ f0 = tcg_temp_new_i32();
76
+ fd = tcg_temp_new_i32();
77
+
78
+ neon_load_reg32(f0, vm);
79
+
80
+ for (;;) {
81
+ fn(fd, f0);
82
+ neon_store_reg32(fd, vd);
83
+
84
+ if (veclen == 0) {
85
+ break;
86
+ }
87
+
88
+ if (delta_m == 0) {
89
+ /* single source one-many */
90
+ while (veclen--) {
91
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
92
+ neon_store_reg32(fd, vd);
93
+ }
94
+ break;
95
+ }
96
+
97
+ /* Set up the operands for the next iteration */
98
+ veclen--;
99
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
100
+ vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
101
+ neon_load_reg32(f0, vm);
102
+ }
103
+
104
+ tcg_temp_free_i32(f0);
105
+ tcg_temp_free_i32(fd);
106
+
107
+ return true;
108
+}
103
+}
109
+
104
+
110
+static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
105
/* This takes the board allocated linear DDR memory and creates aliases
111
+{
106
* for each split DDR range/aperture on the Versal address map.
112
+ uint32_t delta_m = 0;
107
*/
113
+ uint32_t delta_d = 0;
108
@@ -XXX,XX +XXX,XX @@ static void versal_realize(DeviceState *dev, Error **errp)
114
+ uint32_t bank_mask = 0;
109
versal_create_uarts(s, pic);
115
+ int veclen = s->vec_len;
110
versal_create_gems(s, pic);
116
+ TCGv_i64 f0, fd;
111
versal_create_admas(s, pic);
117
+
112
+ versal_create_sds(s, pic);
118
+ /* UNDEF accesses to D16-D31 if they don't exist */
113
versal_map_ddr(s);
119
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vm) & 0x10)) {
114
versal_unimp(s);
120
+ return false;
115
121
+ }
122
+
123
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
124
+ (veclen != 0 || s->vec_stride != 0)) {
125
+ return false;
126
+ }
127
+
128
+ if (!vfp_access_check(s)) {
129
+ return true;
130
+ }
131
+
132
+ if (veclen > 0) {
133
+ bank_mask = 0xc;
134
+
135
+ /* Figure out what type of vector operation this is. */
136
+ if ((vd & bank_mask) == 0) {
137
+ /* scalar */
138
+ veclen = 0;
139
+ } else {
140
+ delta_d = (s->vec_stride >> 1) + 1;
141
+
142
+ if ((vm & bank_mask) == 0) {
143
+ /* mixed scalar/vector */
144
+ delta_m = 0;
145
+ } else {
146
+ /* vector */
147
+ delta_m = delta_d;
148
+ }
149
+ }
150
+ }
151
+
152
+ f0 = tcg_temp_new_i64();
153
+ fd = tcg_temp_new_i64();
154
+
155
+ neon_load_reg64(f0, vm);
156
+
157
+ for (;;) {
158
+ fn(fd, f0);
159
+ neon_store_reg64(fd, vd);
160
+
161
+ if (veclen == 0) {
162
+ break;
163
+ }
164
+
165
+ if (delta_m == 0) {
166
+ /* single source one-many */
167
+ while (veclen--) {
168
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
169
+ neon_store_reg64(fd, vd);
170
+ }
171
+ break;
172
+ }
173
+
174
+ /* Set up the operands for the next iteration */
175
+ veclen--;
176
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
177
+ vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
178
+ neon_load_reg64(f0, vm);
179
+ }
180
+
181
+ tcg_temp_free_i64(f0);
182
+ tcg_temp_free_i64(fd);
183
+
184
+ return true;
185
+}
186
+
187
static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
188
{
189
/* Note that order of inputs to the add matters for NaNs */
190
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
191
tcg_temp_free_i64(fd);
192
return true;
193
}
194
+
195
+static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
196
+{
197
+ return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
198
+}
199
+
200
+static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
201
+{
202
+ return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
203
+}
204
diff --git a/target/arm/translate.c b/target/arm/translate.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/target/arm/translate.c
207
+++ b/target/arm/translate.c
208
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
209
case 0 ... 14:
210
/* Already handled by decodetree */
211
return 1;
212
+ case 15:
213
+ switch (rn) {
214
+ case 1:
215
+ /* Already handled by decodetree */
216
+ return 1;
217
+ default:
218
+ break;
219
+ }
220
default:
221
break;
222
}
223
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
224
/* rn is opcode, encoded as per VFP_SREG_N. */
225
switch (rn) {
226
case 0x00: /* vmov */
227
- case 0x01: /* vabs */
228
case 0x02: /* vneg */
229
case 0x03: /* vsqrt */
230
break;
231
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
232
case 0: /* cpy */
233
/* no-op */
234
break;
235
- case 1: /* abs */
236
- gen_vfp_abs(dp);
237
- break;
238
case 2: /* neg */
239
gen_vfp_neg(dp);
240
break;
241
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
242
index XXXXXXX..XXXXXXX 100644
243
--- a/target/arm/vfp.decode
244
+++ b/target/arm/vfp.decode
245
@@ -XXX,XX +XXX,XX @@ VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
246
vd=%vd_sp
247
VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
248
vd=%vd_dp
249
+
250
+VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... \
251
+ vd=%vd_sp vm=%vm_sp
252
+VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... \
253
+ vd=%vd_dp vm=%vm_dp
254
--
116
--
255
2.20.1
117
2.20.1
256
118
257
119
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
2
3
The ARM pseudocode installs the error_code into the original
3
hw/arm: versal: Add support for the RTC.
4
pointer, not the encrypted pointer. The difference applies
5
within the 7 bits of pac data; the result should be the sign
6
extension of bit 55.
7
4
8
Add a testcase to that effect.
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
9
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
9
Message-id: 20200427181649.26851-10-edgar.iglesias@gmail.com
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
11
---
14
tests/tcg/aarch64/Makefile.target | 2 +-
12
include/hw/arm/xlnx-versal.h | 8 ++++++++
15
target/arm/pauth_helper.c | 4 +-
13
hw/arm/xlnx-versal.c | 21 +++++++++++++++++++++
16
tests/tcg/aarch64/pauth-2.c | 61 +++++++++++++++++++++++++++++++
14
2 files changed, 29 insertions(+)
17
3 files changed, 64 insertions(+), 3 deletions(-)
18
create mode 100644 tests/tcg/aarch64/pauth-2.c
19
15
20
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
16
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
21
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
22
--- a/tests/tcg/aarch64/Makefile.target
18
--- a/include/hw/arm/xlnx-versal.h
23
+++ b/tests/tcg/aarch64/Makefile.target
19
+++ b/include/hw/arm/xlnx-versal.h
24
@@ -XXX,XX +XXX,XX @@ run-fcvt: fcvt
20
@@ -XXX,XX +XXX,XX @@
25
    $(call run-test,$<,$(QEMU) $<, "$< on $(TARGET_NAME)")
21
#include "hw/char/pl011.h"
26
    $(call diff-out,$<,$(AARCH64_SRC)/fcvt.ref)
22
#include "hw/dma/xlnx-zdma.h"
27
23
#include "hw/net/cadence_gem.h"
28
-AARCH64_TESTS += pauth-1
24
+#include "hw/rtc/xlnx-zynqmp-rtc.h"
29
+AARCH64_TESTS += pauth-1 pauth-2
25
30
run-pauth-%: QEMU += -cpu max
26
#define TYPE_XLNX_VERSAL "xlnx-versal"
31
27
#define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL)
32
TESTS:=$(AARCH64_TESTS)
28
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
33
diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c
29
struct {
30
SDHCIState sd[XLNX_VERSAL_NR_SDS];
31
} iou;
32
+
33
+ XlnxZynqMPRTC rtc;
34
} pmc;
35
36
struct {
37
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
38
#define VERSAL_GEM1_IRQ_0 58
39
#define VERSAL_GEM1_WAKE_IRQ_0 59
40
#define VERSAL_ADMA_IRQ_0 60
41
+#define VERSAL_RTC_APB_ERR_IRQ 121
42
#define VERSAL_SD0_IRQ_0 126
43
+#define VERSAL_RTC_ALARM_IRQ 142
44
+#define VERSAL_RTC_SECONDS_IRQ 143
45
46
/* Architecturally reserved IRQs suitable for virtualization. */
47
#define VERSAL_RSVD_IRQ_FIRST 111
48
@@ -XXX,XX +XXX,XX @@ typedef struct Versal {
49
#define MM_PMC_SD0_SIZE 0x10000
50
#define MM_PMC_CRP 0xf1260000U
51
#define MM_PMC_CRP_SIZE 0x10000
52
+#define MM_PMC_RTC 0xf12a0000
53
+#define MM_PMC_RTC_SIZE 0x10000
54
#endif
55
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
34
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/pauth_helper.c
57
--- a/hw/arm/xlnx-versal.c
36
+++ b/target/arm/pauth_helper.c
58
+++ b/hw/arm/xlnx-versal.c
37
@@ -XXX,XX +XXX,XX @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
59
@@ -XXX,XX +XXX,XX @@ static void versal_create_sds(Versal *s, qemu_irq *pic)
38
if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) {
39
int error_code = (keynumber << 1) | (keynumber ^ 1);
40
if (param.tbi) {
41
- return deposit64(ptr, 53, 2, error_code);
42
+ return deposit64(orig_ptr, 53, 2, error_code);
43
} else {
44
- return deposit64(ptr, 61, 2, error_code);
45
+ return deposit64(orig_ptr, 61, 2, error_code);
46
}
47
}
60
}
48
return orig_ptr;
61
}
49
diff --git a/tests/tcg/aarch64/pauth-2.c b/tests/tcg/aarch64/pauth-2.c
62
50
new file mode 100644
63
+static void versal_create_rtc(Versal *s, qemu_irq *pic)
51
index XXXXXXX..XXXXXXX
64
+{
52
--- /dev/null
65
+ SysBusDevice *sbd;
53
+++ b/tests/tcg/aarch64/pauth-2.c
66
+ MemoryRegion *mr;
54
@@ -XXX,XX +XXX,XX @@
55
+#include <stdint.h>
56
+#include <assert.h>
57
+
67
+
58
+asm(".arch armv8.4-a");
68
+ sysbus_init_child_obj(OBJECT(s), "rtc", &s->pmc.rtc, sizeof(s->pmc.rtc),
69
+ TYPE_XLNX_ZYNQMP_RTC);
70
+ sbd = SYS_BUS_DEVICE(&s->pmc.rtc);
71
+ qdev_init_nofail(DEVICE(sbd));
59
+
72
+
60
+void do_test(uint64_t value)
73
+ mr = sysbus_mmio_get_region(sbd, 0);
61
+{
74
+ memory_region_add_subregion(&s->mr_ps, MM_PMC_RTC, mr);
62
+ uint64_t salt1, salt2;
63
+ uint64_t encode, decode;
64
+
75
+
65
+ /*
76
+ /*
66
+ * With TBI enabled and a 48-bit VA, there are 7 bits of auth,
77
+ * TODO: Connect the ALARM and SECONDS interrupts once our RTC model
67
+ * and so a 1/128 chance of encode = pac(value,key,salt) producing
78
+ * supports them.
68
+ * an auth for which leaves value unchanged.
69
+ * Iterate until we find a salt for which encode != value.
70
+ */
79
+ */
71
+ for (salt1 = 1; ; salt1++) {
80
+ sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]);
72
+ asm volatile("pacda %0, %2" : "=r"(encode) : "0"(value), "r"(salt1));
73
+ if (encode != value) {
74
+ break;
75
+ }
76
+ }
77
+
78
+ /* A valid salt must produce a valid authorization. */
79
+ asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt1));
80
+ assert(decode == value);
81
+
82
+ /*
83
+ * An invalid salt usually fails authorization, but again there
84
+ * is a chance of choosing another salt that works.
85
+ * Iterate until we find another salt which does fail.
86
+ */
87
+ for (salt2 = salt1 + 1; ; salt2++) {
88
+ asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt2));
89
+ if (decode != value) {
90
+ break;
91
+ }
92
+ }
93
+
94
+ /* The VA bits, bit 55, and the TBI bits, should be unchanged. */
95
+ assert(((decode ^ value) & 0xff80ffffffffffffull) == 0);
96
+
97
+ /*
98
+ * Bits [54:53] are an error indicator based on the key used;
99
+ * the DA key above is keynumber 0, so error == 0b01. Otherwise
100
+ * bit 55 of the original is sign-extended into the rest of the auth.
101
+ */
102
+ if ((value >> 55) & 1) {
103
+ assert(((decode >> 48) & 0xff) == 0b10111111);
104
+ } else {
105
+ assert(((decode >> 48) & 0xff) == 0b00100000);
106
+ }
107
+}
81
+}
108
+
82
+
109
+int main()
83
/* This takes the board allocated linear DDR memory and creates aliases
110
+{
84
* for each split DDR range/aperture on the Versal address map.
111
+ do_test(0);
85
*/
112
+ do_test(-1);
86
@@ -XXX,XX +XXX,XX @@ static void versal_realize(DeviceState *dev, Error **errp)
113
+ do_test(0xda004acedeadbeefull);
87
versal_create_gems(s, pic);
114
+ return 0;
88
versal_create_admas(s, pic);
115
+}
89
versal_create_sds(s, pic);
90
+ versal_create_rtc(s, pic);
91
versal_map_ddr(s);
92
versal_unimp(s);
93
116
--
94
--
117
2.20.1
95
2.20.1
118
96
119
97
diff view generated by jsdifflib
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
3
Add support for SD.
4
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
8
Message-id: 20200427181649.26851-11-edgar.iglesias@gmail.com
1
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
3
---
10
---
4
target/arm/translate-vfp.inc.c | 10 ++++++++++
11
hw/arm/xlnx-versal-virt.c | 46 +++++++++++++++++++++++++++++++++++++++
5
target/arm/translate.c | 8 +-------
12
1 file changed, 46 insertions(+)
6
target/arm/vfp.decode | 5 +++++
7
3 files changed, 16 insertions(+), 7 deletions(-)
8
13
9
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
14
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
10
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/translate-vfp.inc.c
16
--- a/hw/arm/xlnx-versal-virt.c
12
+++ b/target/arm/translate-vfp.inc.c
17
+++ b/hw/arm/xlnx-versal-virt.c
13
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
18
@@ -XXX,XX +XXX,XX @@
14
return true;
19
#include "hw/arm/sysbus-fdt.h"
20
#include "hw/arm/fdt.h"
21
#include "cpu.h"
22
+#include "hw/qdev-properties.h"
23
#include "hw/arm/xlnx-versal.h"
24
25
#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt")
26
@@ -XXX,XX +XXX,XX @@ static void fdt_add_zdma_nodes(VersalVirt *s)
27
}
15
}
28
}
16
29
17
+static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a)
30
+static void fdt_add_sd_nodes(VersalVirt *s)
18
+{
31
+{
19
+ return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm);
32
+ const char clocknames[] = "clk_xin\0clk_ahb";
33
+ const char compat[] = "arasan,sdhci-8.9a";
34
+ int i;
35
+
36
+ for (i = ARRAY_SIZE(s->soc.pmc.iou.sd) - 1; i >= 0; i--) {
37
+ uint64_t addr = MM_PMC_SD0 + MM_PMC_SD0_SIZE * i;
38
+ char *name = g_strdup_printf("/sdhci@%" PRIx64, addr);
39
+
40
+ qemu_fdt_add_subnode(s->fdt, name);
41
+
42
+ qemu_fdt_setprop_cells(s->fdt, name, "clocks",
43
+ s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
44
+ qemu_fdt_setprop(s->fdt, name, "clock-names",
45
+ clocknames, sizeof(clocknames));
46
+ qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
47
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_SD0_IRQ_0 + i * 2,
48
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
49
+ qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
50
+ 2, addr, 2, MM_PMC_SD0_SIZE);
51
+ qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
52
+ g_free(name);
53
+ }
20
+}
54
+}
21
+
55
+
22
+static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a)
56
static void fdt_nop_memory_nodes(void *fdt, Error **errp)
57
{
58
Error *err = NULL;
59
@@ -XXX,XX +XXX,XX @@ static void create_virtio_regions(VersalVirt *s)
60
}
61
}
62
63
+static void sd_plugin_card(SDHCIState *sd, DriveInfo *di)
23
+{
64
+{
24
+ return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm);
65
+ BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
66
+ DeviceState *card;
67
+
68
+ card = qdev_create(qdev_get_child_bus(DEVICE(sd), "sd-bus"), TYPE_SD_CARD);
69
+ object_property_add_child(OBJECT(sd), "card[*]", OBJECT(card),
70
+ &error_fatal);
71
+ qdev_prop_set_drive(card, "drive", blk, &error_fatal);
72
+ object_property_set_bool(OBJECT(card), true, "realized", &error_fatal);
25
+}
73
+}
26
+
74
+
27
static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
75
static void versal_virt_init(MachineState *machine)
28
{
76
{
29
return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
77
VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(machine);
30
diff --git a/target/arm/translate.c b/target/arm/translate.c
78
int psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
31
index XXXXXXX..XXXXXXX 100644
79
+ int i;
32
--- a/target/arm/translate.c
80
33
+++ b/target/arm/translate.c
81
/*
34
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
82
* If the user provides an Operating System to be loaded, we expect them
35
return 1;
83
@@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine)
36
case 15:
84
fdt_add_gic_nodes(s);
37
switch (rn) {
85
fdt_add_timer_nodes(s);
38
- case 1 ... 3:
86
fdt_add_zdma_nodes(s);
39
+ case 0 ... 3:
87
+ fdt_add_sd_nodes(s);
40
/* Already handled by decodetree */
88
fdt_add_cpu_nodes(s, psci_conduit);
41
return 1;
89
fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
42
default:
90
fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
43
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
91
@@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine)
44
if (op == 15) {
92
memory_region_add_subregion_overlap(get_system_memory(),
45
/* rn is opcode, encoded as per VFP_SREG_N. */
93
0, &s->soc.fpd.apu.mr, 0);
46
switch (rn) {
94
47
- case 0x00: /* vmov */
95
+ /* Plugin SD cards. */
48
- break;
96
+ for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) {
49
-
97
+ sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD));
50
case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
98
+ }
51
case 0x05: /* vcvtt.f64.f16, vcvtt.f32.f16 */
52
/*
53
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
54
switch (op) {
55
case 15: /* extension space */
56
switch (rn) {
57
- case 0: /* cpy */
58
- /* no-op */
59
- break;
60
case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
61
{
62
TCGv_ptr fpst = get_fpstatus_ptr(false);
63
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/vfp.decode
66
+++ b/target/arm/vfp.decode
67
@@ -XXX,XX +XXX,XX @@ VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
68
VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
69
vd=%vd_dp
70
71
+VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... \
72
+ vd=%vd_sp vm=%vm_sp
73
+VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... \
74
+ vd=%vd_dp vm=%vm_dp
75
+
99
+
76
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... \
100
s->binfo.ram_size = machine->ram_size;
77
vd=%vd_sp vm=%vm_sp
101
s->binfo.loader_start = 0x0;
78
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... \
102
s->binfo.get_dtb = versal_virt_get_dtb;
79
--
103
--
80
2.20.1
104
2.20.1
81
105
82
106
diff view generated by jsdifflib
1
The current VFP code has two different idioms for
1
From: "Edgar E. Iglesias" <edgar.iglesias@xilinx.com>
2
loading and storing from the VFP register file:
3
1 using the gen_mov_F0_vreg() and similar functions,
4
which load and store to a fixed set of TCG globals
5
cpu_F0s, CPU_F0d, etc
6
2 by direct calls to tcg_gen_ld_f64() and friends
7
2
8
We want to phase out idiom 1 (because the use of the
3
Add support for the RTC.
9
fixed globals is a relic of a much older version of TCG),
10
but idiom 2 is quite longwinded:
11
tcg_gen_ld_f64(tmp, cpu_env, vfp_reg_offset(true, reg))
12
requires us to specify the 64-bitness twice, once in
13
the function name and once by passing 'true' to
14
vfp_reg_offset(). There's no guard against accidentally
15
passing the wrong flag.
16
4
17
Instead, let's move to a convention of accessing 64-bit
5
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
18
registers via the existing neon_load_reg64() and
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
19
neon_store_reg64(), and provide new neon_load_reg32()
7
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
20
and neon_store_reg32() for the 32-bit equivalents.
8
Message-id: 20200427181649.26851-12-edgar.iglesias@gmail.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
hw/arm/xlnx-versal-virt.c | 22 ++++++++++++++++++++++
12
1 file changed, 22 insertions(+)
21
13
22
Implement the new functions and use them in the code in
14
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
23
translate-vfp.inc.c. We will convert the rest of the VFP
24
code as we do the decodetree conversion in subsequent
25
commits.
26
27
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
---
30
target/arm/translate-vfp.inc.c | 40 +++++++++++++++++-----------------
31
target/arm/translate.c | 10 +++++++++
32
2 files changed, 30 insertions(+), 20 deletions(-)
33
34
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
35
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-vfp.inc.c
16
--- a/hw/arm/xlnx-versal-virt.c
37
+++ b/target/arm/translate-vfp.inc.c
17
+++ b/hw/arm/xlnx-versal-virt.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
18
@@ -XXX,XX +XXX,XX @@ static void fdt_add_sd_nodes(VersalVirt *s)
39
tcg_gen_ext_i32_i64(nf, cpu_NF);
40
tcg_gen_ext_i32_i64(vf, cpu_VF);
41
42
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
43
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
44
+ neon_load_reg64(frn, rn);
45
+ neon_load_reg64(frm, rm);
46
switch (a->cc) {
47
case 0: /* eq: Z */
48
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
49
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
50
tcg_temp_free_i64(tmp);
51
break;
52
}
53
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
54
+ neon_store_reg64(dest, rd);
55
tcg_temp_free_i64(frn);
56
tcg_temp_free_i64(frm);
57
tcg_temp_free_i64(dest);
58
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
59
frn = tcg_temp_new_i32();
60
frm = tcg_temp_new_i32();
61
dest = tcg_temp_new_i32();
62
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
63
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
64
+ neon_load_reg32(frn, rn);
65
+ neon_load_reg32(frm, rm);
66
switch (a->cc) {
67
case 0: /* eq: Z */
68
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
69
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
70
tcg_temp_free_i32(tmp);
71
break;
72
}
73
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
74
+ neon_store_reg32(dest, rd);
75
tcg_temp_free_i32(frn);
76
tcg_temp_free_i32(frm);
77
tcg_temp_free_i32(dest);
78
@@ -XXX,XX +XXX,XX @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
79
frm = tcg_temp_new_i64();
80
dest = tcg_temp_new_i64();
81
82
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
83
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
84
+ neon_load_reg64(frn, rn);
85
+ neon_load_reg64(frm, rm);
86
if (vmin) {
87
gen_helper_vfp_minnumd(dest, frn, frm, fpst);
88
} else {
89
gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
90
}
91
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
92
+ neon_store_reg64(dest, rd);
93
tcg_temp_free_i64(frn);
94
tcg_temp_free_i64(frm);
95
tcg_temp_free_i64(dest);
96
@@ -XXX,XX +XXX,XX @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
97
frm = tcg_temp_new_i32();
98
dest = tcg_temp_new_i32();
99
100
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
101
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
102
+ neon_load_reg32(frn, rn);
103
+ neon_load_reg32(frm, rm);
104
if (vmin) {
105
gen_helper_vfp_minnums(dest, frn, frm, fpst);
106
} else {
107
gen_helper_vfp_maxnums(dest, frn, frm, fpst);
108
}
109
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
110
+ neon_store_reg32(dest, rd);
111
tcg_temp_free_i32(frn);
112
tcg_temp_free_i32(frm);
113
tcg_temp_free_i32(dest);
114
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
115
TCGv_i64 tcg_res;
116
tcg_op = tcg_temp_new_i64();
117
tcg_res = tcg_temp_new_i64();
118
- tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
119
+ neon_load_reg64(tcg_op, rm);
120
gen_helper_rintd(tcg_res, tcg_op, fpst);
121
- tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
122
+ neon_store_reg64(tcg_res, rd);
123
tcg_temp_free_i64(tcg_op);
124
tcg_temp_free_i64(tcg_res);
125
} else {
126
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
127
TCGv_i32 tcg_res;
128
tcg_op = tcg_temp_new_i32();
129
tcg_res = tcg_temp_new_i32();
130
- tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
131
+ neon_load_reg32(tcg_op, rm);
132
gen_helper_rints(tcg_res, tcg_op, fpst);
133
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
134
+ neon_store_reg32(tcg_res, rd);
135
tcg_temp_free_i32(tcg_op);
136
tcg_temp_free_i32(tcg_res);
137
}
19
}
138
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
139
tcg_double = tcg_temp_new_i64();
140
tcg_res = tcg_temp_new_i64();
141
tcg_tmp = tcg_temp_new_i32();
142
- tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
143
+ neon_load_reg64(tcg_double, rm);
144
if (is_signed) {
145
gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
146
} else {
147
gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
148
}
149
tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
150
- tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
151
+ neon_store_reg32(tcg_tmp, rd);
152
tcg_temp_free_i32(tcg_tmp);
153
tcg_temp_free_i64(tcg_res);
154
tcg_temp_free_i64(tcg_double);
155
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
156
TCGv_i32 tcg_single, tcg_res;
157
tcg_single = tcg_temp_new_i32();
158
tcg_res = tcg_temp_new_i32();
159
- tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
160
+ neon_load_reg32(tcg_single, rm);
161
if (is_signed) {
162
gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
163
} else {
164
gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
165
}
166
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
167
+ neon_store_reg32(tcg_res, rd);
168
tcg_temp_free_i32(tcg_res);
169
tcg_temp_free_i32(tcg_single);
170
}
171
diff --git a/target/arm/translate.c b/target/arm/translate.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/target/arm/translate.c
174
+++ b/target/arm/translate.c
175
@@ -XXX,XX +XXX,XX @@ static inline void neon_store_reg64(TCGv_i64 var, int reg)
176
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
177
}
20
}
178
21
179
+static inline void neon_load_reg32(TCGv_i32 var, int reg)
22
+static void fdt_add_rtc_node(VersalVirt *s)
180
+{
23
+{
181
+ tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
24
+ const char compat[] = "xlnx,zynqmp-rtc";
25
+ const char interrupt_names[] = "alarm\0sec";
26
+ char *name = g_strdup_printf("/rtc@%x", MM_PMC_RTC);
27
+
28
+ qemu_fdt_add_subnode(s->fdt, name);
29
+
30
+ qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
31
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_ALARM_IRQ,
32
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
33
+ GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_SECONDS_IRQ,
34
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
35
+ qemu_fdt_setprop(s->fdt, name, "interrupt-names",
36
+ interrupt_names, sizeof(interrupt_names));
37
+ qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
38
+ 2, MM_PMC_RTC, 2, MM_PMC_RTC_SIZE);
39
+ qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
40
+ g_free(name);
182
+}
41
+}
183
+
42
+
184
+static inline void neon_store_reg32(TCGv_i32 var, int reg)
43
static void fdt_nop_memory_nodes(void *fdt, Error **errp)
185
+{
186
+ tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
187
+}
188
+
189
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
190
{
44
{
191
TCGv_ptr ret = tcg_temp_new_ptr();
45
Error *err = NULL;
46
@@ -XXX,XX +XXX,XX @@ static void versal_virt_init(MachineState *machine)
47
fdt_add_timer_nodes(s);
48
fdt_add_zdma_nodes(s);
49
fdt_add_sd_nodes(s);
50
+ fdt_add_rtc_node(s);
51
fdt_add_cpu_nodes(s, psci_conduit);
52
fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
53
fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
192
--
54
--
193
2.20.1
55
2.20.1
194
56
195
57
diff view generated by jsdifflib
1
Factor out the VFP access checking code so that we can use it in the
1
Somewhere along theline we accidentally added a duplicate
2
leaf functions of the decodetree decoder.
2
"using D16-D31 when they don't exist" check to do_vfm_dp()
3
3
(probably an artifact of a patchseries rebase). Remove it.
4
We call the function full_vfp_access_check() so we can keep
5
the more natural vfp_access_check() for a version which doesn't
6
have the 'ignore_vfp_enabled' flag -- that way almost all VFP
7
insns will be able to use vfp_access_check(s) and only the
8
special-register access function will have to use
9
full_vfp_access_check(s, ignore_vfp_enabled).
10
4
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Message-id: 20200430181003.21682-2-peter.maydell@linaro.org
13
---
9
---
14
target/arm/translate-vfp.inc.c | 100 ++++++++++++++++++++++++++++++++
10
target/arm/translate-vfp.inc.c | 6 ------
15
target/arm/translate.c | 101 +++++----------------------------
11
1 file changed, 6 deletions(-)
16
2 files changed, 113 insertions(+), 88 deletions(-)
17
12
18
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/translate-vfp.inc.c
21
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/translate-vfp.inc.c
22
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
23
/* Include the generated VFP decoder */
18
return false;
24
#include "decode-vfp.inc.c"
25
#include "decode-vfp-uncond.inc.c"
26
+
27
+/*
28
+ * Check that VFP access is enabled. If it is, do the necessary
29
+ * M-profile lazy-FP handling and then return true.
30
+ * If not, emit code to generate an appropriate exception and
31
+ * return false.
32
+ * The ignore_vfp_enabled argument specifies that we should ignore
33
+ * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
34
+ * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
35
+ */
36
+static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
37
+{
38
+ if (s->fp_excp_el) {
39
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
40
+ gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
41
+ s->fp_excp_el);
42
+ } else {
43
+ gen_exception_insn(s, 4, EXCP_UDEF,
44
+ syn_fp_access_trap(1, 0xe, false),
45
+ s->fp_excp_el);
46
+ }
47
+ return false;
48
+ }
49
+
50
+ if (!s->vfp_enabled && !ignore_vfp_enabled) {
51
+ assert(!arm_dc_feature(s, ARM_FEATURE_M));
52
+ gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
53
+ default_exception_el(s));
54
+ return false;
55
+ }
56
+
57
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
58
+ /* Handle M-profile lazy FP state mechanics */
59
+
60
+ /* Trigger lazy-state preservation if necessary */
61
+ if (s->v7m_lspact) {
62
+ /*
63
+ * Lazy state saving affects external memory and also the NVIC,
64
+ * so we must mark it as an IO operation for icount.
65
+ */
66
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
67
+ gen_io_start();
68
+ }
69
+ gen_helper_v7m_preserve_fp_state(cpu_env);
70
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
71
+ gen_io_end();
72
+ }
73
+ /*
74
+ * If the preserve_fp_state helper doesn't throw an exception
75
+ * then it will clear LSPACT; we don't need to repeat this for
76
+ * any further FP insns in this TB.
77
+ */
78
+ s->v7m_lspact = false;
79
+ }
80
+
81
+ /* Update ownership of FP context: set FPCCR.S to match current state */
82
+ if (s->v8m_fpccr_s_wrong) {
83
+ TCGv_i32 tmp;
84
+
85
+ tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
86
+ if (s->v8m_secure) {
87
+ tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
88
+ } else {
89
+ tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
90
+ }
91
+ store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
92
+ /* Don't need to do this for any further FP insns in this TB */
93
+ s->v8m_fpccr_s_wrong = false;
94
+ }
95
+
96
+ if (s->v7m_new_fp_ctxt_needed) {
97
+ /*
98
+ * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
99
+ * and the FPSCR.
100
+ */
101
+ TCGv_i32 control, fpscr;
102
+ uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
103
+
104
+ fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
105
+ gen_helper_vfp_set_fpscr(cpu_env, fpscr);
106
+ tcg_temp_free_i32(fpscr);
107
+ /*
108
+ * We don't need to arrange to end the TB, because the only
109
+ * parts of FPSCR which we cache in the TB flags are the VECLEN
110
+ * and VECSTRIDE, and those don't exist for M-profile.
111
+ */
112
+
113
+ if (s->v8m_secure) {
114
+ bits |= R_V7M_CONTROL_SFPA_MASK;
115
+ }
116
+ control = load_cpu_field(v7m.control[M_REG_S]);
117
+ tcg_gen_ori_i32(control, control, bits);
118
+ store_cpu_field(control, v7m.control[M_REG_S]);
119
+ /* Don't need to do this for any further FP insns in this TB */
120
+ s->v7m_new_fp_ctxt_needed = false;
121
+ }
122
+ }
123
+
124
+ return true;
125
+}
126
diff --git a/target/arm/translate.c b/target/arm/translate.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/target/arm/translate.c
129
+++ b/target/arm/translate.c
130
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
131
return 1;
132
}
133
134
-/* Disassemble a VFP instruction. Returns nonzero if an error occurred
135
- (ie. an undefined instruction). */
136
+/*
137
+ * Disassemble a VFP instruction. Returns nonzero if an error occurred
138
+ * (ie. an undefined instruction).
139
+ */
140
static int disas_vfp_insn(DisasContext *s, uint32_t insn)
141
{
142
uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
143
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
144
TCGv_i32 addr;
145
TCGv_i32 tmp;
146
TCGv_i32 tmp2;
147
+ bool ignore_vfp_enabled = false;
148
149
if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
150
return 1;
151
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
152
}
153
}
19
}
154
20
155
- /* FIXME: this access check should not take precedence over UNDEF
21
- /* UNDEF accesses to D16-D31 if they don't exist. */
156
+ /*
22
- if (!dc_isar_feature(aa32_simd_r32, s) &&
157
+ * FIXME: this access check should not take precedence over UNDEF
23
- ((a->vd | a->vn | a->vm) & 0x10)) {
158
* for invalid encodings; we will generate incorrect syndrome information
24
- return false;
159
* for attempts to execute invalid vfp/neon encodings with FP disabled.
160
*/
161
- if (s->fp_excp_el) {
162
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
163
- gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
164
- s->fp_excp_el);
165
- } else {
166
- gen_exception_insn(s, 4, EXCP_UDEF,
167
- syn_fp_access_trap(1, 0xe, false),
168
- s->fp_excp_el);
169
- }
170
- return 0;
171
- }
25
- }
172
-
26
-
173
- if (!s->vfp_enabled) {
27
if (!vfp_access_check(s)) {
174
- /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
28
return true;
175
- if ((insn & 0x0fe00fff) != 0x0ee00a10)
176
- return 1;
177
+ if ((insn & 0x0fe00fff) == 0x0ee00a10) {
178
rn = (insn >> 16) & 0xf;
179
- if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
180
- && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
181
- return 1;
182
+ if (rn == ARM_VFP_FPSID || rn == ARM_VFP_FPEXC || rn == ARM_VFP_MVFR2
183
+ || rn == ARM_VFP_MVFR1 || rn == ARM_VFP_MVFR0) {
184
+ ignore_vfp_enabled = true;
185
}
186
}
29
}
187
-
188
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
189
- /* Handle M-profile lazy FP state mechanics */
190
-
191
- /* Trigger lazy-state preservation if necessary */
192
- if (s->v7m_lspact) {
193
- /*
194
- * Lazy state saving affects external memory and also the NVIC,
195
- * so we must mark it as an IO operation for icount.
196
- */
197
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
198
- gen_io_start();
199
- }
200
- gen_helper_v7m_preserve_fp_state(cpu_env);
201
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
202
- gen_io_end();
203
- }
204
- /*
205
- * If the preserve_fp_state helper doesn't throw an exception
206
- * then it will clear LSPACT; we don't need to repeat this for
207
- * any further FP insns in this TB.
208
- */
209
- s->v7m_lspact = false;
210
- }
211
-
212
- /* Update ownership of FP context: set FPCCR.S to match current state */
213
- if (s->v8m_fpccr_s_wrong) {
214
- TCGv_i32 tmp;
215
-
216
- tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
217
- if (s->v8m_secure) {
218
- tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
219
- } else {
220
- tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
221
- }
222
- store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
223
- /* Don't need to do this for any further FP insns in this TB */
224
- s->v8m_fpccr_s_wrong = false;
225
- }
226
-
227
- if (s->v7m_new_fp_ctxt_needed) {
228
- /*
229
- * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
230
- * and the FPSCR.
231
- */
232
- TCGv_i32 control, fpscr;
233
- uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
234
-
235
- fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
236
- gen_helper_vfp_set_fpscr(cpu_env, fpscr);
237
- tcg_temp_free_i32(fpscr);
238
- /*
239
- * We don't need to arrange to end the TB, because the only
240
- * parts of FPSCR which we cache in the TB flags are the VECLEN
241
- * and VECSTRIDE, and those don't exist for M-profile.
242
- */
243
-
244
- if (s->v8m_secure) {
245
- bits |= R_V7M_CONTROL_SFPA_MASK;
246
- }
247
- control = load_cpu_field(v7m.control[M_REG_S]);
248
- tcg_gen_ori_i32(control, control, bits);
249
- store_cpu_field(control, v7m.control[M_REG_S]);
250
- /* Don't need to do this for any further FP insns in this TB */
251
- s->v7m_new_fp_ctxt_needed = false;
252
- }
253
+ if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
254
+ return 0;
255
}
256
257
if (extract32(insn, 28, 4) == 0xf) {
258
--
30
--
259
2.20.1
31
2.20.1
260
32
261
33
diff view generated by jsdifflib
1
Convert the VMINNM and VMAXNM instructions to decodetree.
1
We were accidentally permitting decode of Thumb Neon insns even if
2
As with VSEL, we leave the trans_VMINMAXNM() function
2
the CPU didn't have the FEATURE_NEON bit set, because the feature
3
in translate.c for the moment.
3
check was being done before the call to disas_neon_data_insn() and
4
disas_neon_ls_insn() in the Arm decoder but was omitted from the
5
Thumb decoder. Push the feature bit check down into the called
6
functions so it is done for both Arm and Thumb encodings.
4
7
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Message-id: 20200430181003.21682-3-peter.maydell@linaro.org
7
---
12
---
8
target/arm/translate.c | 41 ++++++++++++++++++++++++------------
13
target/arm/translate.c | 16 ++++++++--------
9
target/arm/vfp-uncond.decode | 5 +++++
14
1 file changed, 8 insertions(+), 8 deletions(-)
10
2 files changed, 33 insertions(+), 13 deletions(-)
11
15
12
diff --git a/target/arm/translate.c b/target/arm/translate.c
16
diff --git a/target/arm/translate.c b/target/arm/translate.c
13
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate.c
18
--- a/target/arm/translate.c
15
+++ b/target/arm/translate.c
19
+++ b/target/arm/translate.c
16
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
20
@@ -XXX,XX +XXX,XX @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
17
return true;
21
TCGv_i32 tmp2;
18
}
22
TCGv_i64 tmp64;
19
23
20
-static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
24
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
21
- uint32_t rm, uint32_t dp)
25
+ return 1;
22
+static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
23
{
24
- uint32_t vmin = extract32(insn, 6, 1);
25
- TCGv_ptr fpst = get_fpstatus_ptr(0);
26
+ uint32_t rd, rn, rm;
27
+ bool dp = a->dp;
28
+ bool vmin = a->op;
29
+ TCGv_ptr fpst;
30
+
31
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
32
+ return false;
33
+ }
26
+ }
34
+
27
+
35
+ /* UNDEF accesses to D16-D31 if they don't exist */
28
/* FIXME: this access check should not take precedence over UNDEF
36
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
29
* for invalid encodings; we will generate incorrect syndrome information
37
+ ((a->vm | a->vn | a->vd) & 0x10)) {
30
* for attempts to execute invalid vfp/neon encodings with FP disabled.
38
+ return false;
31
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
39
+ }
32
TCGv_ptr ptr1, ptr2, ptr3;
40
+ rd = a->vd;
33
TCGv_i64 tmp64;
41
+ rn = a->vn;
34
42
+ rm = a->vm;
35
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
43
+
36
+ return 1;
44
+ if (!vfp_access_check(s)) {
45
+ return true;
46
+ }
37
+ }
47
+
38
+
48
+ fpst = get_fpstatus_ptr(0);
39
/* FIXME: this access check should not take precedence over UNDEF
49
40
* for invalid encodings; we will generate incorrect syndrome information
50
if (dp) {
41
* for attempts to execute invalid vfp/neon encodings with FP disabled.
51
TCGv_i64 frn, frm, dest;
42
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
52
@@ -XXX,XX +XXX,XX @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
43
53
}
44
if (((insn >> 25) & 7) == 1) {
54
45
/* NEON Data processing. */
55
tcg_temp_free_ptr(fpst);
46
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
56
- return 0;
47
- goto illegal_op;
57
+ return true;
48
- }
58
}
49
-
59
50
if (disas_neon_data_insn(s, insn)) {
60
static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
51
goto illegal_op;
61
@@ -XXX,XX +XXX,XX @@ static const uint8_t fp_decode_rm[] = {
52
}
62
53
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
63
static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
54
}
64
{
55
if ((insn & 0x0f100000) == 0x04000000) {
65
- uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
56
/* NEON load/store. */
66
+ uint32_t rd, rm, dp = extract32(insn, 8, 1);
57
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
67
58
- goto illegal_op;
68
if (dp) {
59
- }
69
VFP_DREG_D(rd, insn);
60
-
70
- VFP_DREG_N(rn, insn);
61
if (disas_neon_ls_insn(s, insn)) {
71
VFP_DREG_M(rm, insn);
62
goto illegal_op;
72
} else {
63
}
73
rd = VFP_SREG_D(insn);
74
- rn = VFP_SREG_N(insn);
75
rm = VFP_SREG_M(insn);
76
}
77
78
- if ((insn & 0x0fb00e10) == 0x0e800a00 &&
79
- dc_isar_feature(aa32_vminmaxnm, s)) {
80
- return handle_vminmaxnm(insn, rd, rn, rm, dp);
81
- } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
82
- dc_isar_feature(aa32_vrint, s)) {
83
+ if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
84
+ dc_isar_feature(aa32_vrint, s)) {
85
/* VRINTA, VRINTN, VRINTP, VRINTM */
86
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
87
return handle_vrint(insn, rd, rm, dp, rounding);
88
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
89
index XXXXXXX..XXXXXXX 100644
90
--- a/target/arm/vfp-uncond.decode
91
+++ b/target/arm/vfp-uncond.decode
92
@@ -XXX,XX +XXX,XX @@ VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
93
vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
94
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
95
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
96
+
97
+VMINMAXNM 1111 1110 1.00 .... .... 1010 . op:1 .0 .... \
98
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp dp=0
99
+VMINMAXNM 1111 1110 1.00 .... .... 1011 . op:1 .0 .... \
100
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
101
--
64
--
102
2.20.1
65
2.20.1
103
66
104
67
diff view generated by jsdifflib
1
Add the infrastructure for building and invoking a decodetree decoder
1
Add the infrastructure for building and invoking a decodetree decoder
2
for the AArch32 VFP encodings. At the moment the new decoder covers
2
for the AArch32 Neon encodings. At the moment the new decoder covers
3
nothing, so we always fall back to the existing hand-written decode.
3
nothing, so we always fall back to the existing hand-written decode.
4
4
5
We need to have one decoder for the unconditional insns and one for
5
We follow the same pattern we did for the VFP decodetree conversion
6
the conditional insns, as otherwise the patterns for conditional
6
(commit 78e138bc1f672c145ef6ace74617d and following): code that deals
7
insns would incorrectly match against the unconditional ones too.
7
with Neon will be moving gradually out to translate-neon.vfp.inc,
8
8
which we #include into translate.c.
9
Since translate.c is over 14,000 lines long and we're going to be
9
10
touching pretty much every line of the VFP code as part of the
10
In order to share the decode files between A32 and T32, we
11
decodetree conversion, we create a new translate-vfp.inc.c to hold
11
split Neon into 3 parts:
12
the code which deals with VFP in the new scheme. It should be
12
* data-processing
13
possible to convert this into a standalone translation unit
13
* load-store
14
eventually, but the conversion process will be much simpler if we
14
* 'shared' encodings
15
simply #include it midway through translate.c to start with.
15
16
The first two groups of instructions have similar but not identical
17
A32 and T32 encodings, so we need to manually transform the T32
18
encoding into the A32 one before calling the decoder; the third group
19
covers the Neon instructions which are identical in A32 and T32.
16
20
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
23
Message-id: 20200430181003.21682-4-peter.maydell@linaro.org
19
---
24
---
20
target/arm/Makefile.objs | 13 +++++++++++++
25
target/arm/neon-dp.decode | 29 ++++++++++++++++++++++++++
21
target/arm/translate-vfp.inc.c | 31 +++++++++++++++++++++++++++++++
26
target/arm/neon-ls.decode | 29 ++++++++++++++++++++++++++
22
target/arm/translate.c | 19 +++++++++++++++++++
27
target/arm/neon-shared.decode | 27 +++++++++++++++++++++++++
23
target/arm/vfp-uncond.decode | 28 ++++++++++++++++++++++++++++
28
target/arm/translate-neon.inc.c | 32 +++++++++++++++++++++++++++++
24
target/arm/vfp.decode | 28 ++++++++++++++++++++++++++++
29
target/arm/translate.c | 36 +++++++++++++++++++++++++++++++--
25
5 files changed, 119 insertions(+)
30
target/arm/Makefile.objs | 18 +++++++++++++++++
26
create mode 100644 target/arm/translate-vfp.inc.c
31
6 files changed, 169 insertions(+), 2 deletions(-)
27
create mode 100644 target/arm/vfp-uncond.decode
32
create mode 100644 target/arm/neon-dp.decode
28
create mode 100644 target/arm/vfp.decode
33
create mode 100644 target/arm/neon-ls.decode
29
34
create mode 100644 target/arm/neon-shared.decode
35
create mode 100644 target/arm/translate-neon.inc.c
36
37
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
38
new file mode 100644
39
index XXXXXXX..XXXXXXX
40
--- /dev/null
41
+++ b/target/arm/neon-dp.decode
42
@@ -XXX,XX +XXX,XX @@
43
+# AArch32 Neon data-processing instruction descriptions
44
+#
45
+# Copyright (c) 2020 Linaro, Ltd
46
+#
47
+# This library is free software; you can redistribute it and/or
48
+# modify it under the terms of the GNU Lesser General Public
49
+# License as published by the Free Software Foundation; either
50
+# version 2 of the License, or (at your option) any later version.
51
+#
52
+# This library is distributed in the hope that it will be useful,
53
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
54
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
55
+# Lesser General Public License for more details.
56
+#
57
+# You should have received a copy of the GNU Lesser General Public
58
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
59
+
60
+#
61
+# This file is processed by scripts/decodetree.py
62
+#
63
+
64
+# Encodings for Neon data processing instructions where the T32 encoding
65
+# is a simple transformation of the A32 encoding.
66
+# More specifically, this file covers instructions where the A32 encoding is
67
+# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
68
+# and the T32 encoding is
69
+# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
70
+# This file works on the A32 encoding only; calling code for T32 has to
71
+# transform the insn into the A32 version first.
72
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
73
new file mode 100644
74
index XXXXXXX..XXXXXXX
75
--- /dev/null
76
+++ b/target/arm/neon-ls.decode
77
@@ -XXX,XX +XXX,XX @@
78
+# AArch32 Neon load/store instruction descriptions
79
+#
80
+# Copyright (c) 2020 Linaro, Ltd
81
+#
82
+# This library is free software; you can redistribute it and/or
83
+# modify it under the terms of the GNU Lesser General Public
84
+# License as published by the Free Software Foundation; either
85
+# version 2 of the License, or (at your option) any later version.
86
+#
87
+# This library is distributed in the hope that it will be useful,
88
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
89
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
90
+# Lesser General Public License for more details.
91
+#
92
+# You should have received a copy of the GNU Lesser General Public
93
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
94
+
95
+#
96
+# This file is processed by scripts/decodetree.py
97
+#
98
+
99
+# Encodings for Neon load/store instructions where the T32 encoding
100
+# is a simple transformation of the A32 encoding.
101
+# More specifically, this file covers instructions where the A32 encoding is
102
+# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
103
+# and the T32 encoding is
104
+# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
105
+# This file works on the A32 encoding only; calling code for T32 has to
106
+# transform the insn into the A32 version first.
107
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
108
new file mode 100644
109
index XXXXXXX..XXXXXXX
110
--- /dev/null
111
+++ b/target/arm/neon-shared.decode
112
@@ -XXX,XX +XXX,XX @@
113
+# AArch32 Neon instruction descriptions
114
+#
115
+# Copyright (c) 2020 Linaro, Ltd
116
+#
117
+# This library is free software; you can redistribute it and/or
118
+# modify it under the terms of the GNU Lesser General Public
119
+# License as published by the Free Software Foundation; either
120
+# version 2 of the License, or (at your option) any later version.
121
+#
122
+# This library is distributed in the hope that it will be useful,
123
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
124
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
125
+# Lesser General Public License for more details.
126
+#
127
+# You should have received a copy of the GNU Lesser General Public
128
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
129
+
130
+#
131
+# This file is processed by scripts/decodetree.py
132
+#
133
+
134
+# Encodings for Neon instructions whose encoding is the same for
135
+# both A32 and T32.
136
+
137
+# More specifically, this covers:
138
+# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
139
+# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
140
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
141
new file mode 100644
142
index XXXXXXX..XXXXXXX
143
--- /dev/null
144
+++ b/target/arm/translate-neon.inc.c
145
@@ -XXX,XX +XXX,XX @@
146
+/*
147
+ * ARM translation: AArch32 Neon instructions
148
+ *
149
+ * Copyright (c) 2003 Fabrice Bellard
150
+ * Copyright (c) 2005-2007 CodeSourcery
151
+ * Copyright (c) 2007 OpenedHand, Ltd.
152
+ * Copyright (c) 2020 Linaro, Ltd.
153
+ *
154
+ * This library is free software; you can redistribute it and/or
155
+ * modify it under the terms of the GNU Lesser General Public
156
+ * License as published by the Free Software Foundation; either
157
+ * version 2 of the License, or (at your option) any later version.
158
+ *
159
+ * This library is distributed in the hope that it will be useful,
160
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
161
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
162
+ * Lesser General Public License for more details.
163
+ *
164
+ * You should have received a copy of the GNU Lesser General Public
165
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
166
+ */
167
+
168
+/*
169
+ * This file is intended to be included from translate.c; it uses
170
+ * some macros and definitions provided by that file.
171
+ * It might be possible to convert it to a standalone .c file eventually.
172
+ */
173
+
174
+/* Include the generated Neon decoder */
175
+#include "decode-neon-dp.inc.c"
176
+#include "decode-neon-ls.inc.c"
177
+#include "decode-neon-shared.inc.c"
178
diff --git a/target/arm/translate.c b/target/arm/translate.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/target/arm/translate.c
181
+++ b/target/arm/translate.c
182
@@ -XXX,XX +XXX,XX @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
183
184
#define ARM_CP_RW_BIT (1 << 20)
185
186
-/* Include the VFP decoder */
187
+/* Include the VFP and Neon decoders */
188
#include "translate-vfp.inc.c"
189
+#include "translate-neon.inc.c"
190
191
static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
192
{
193
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
194
/* Unconditional instructions. */
195
/* TODO: Perhaps merge these into one decodetree output file. */
196
if (disas_a32_uncond(s, insn) ||
197
- disas_vfp_uncond(s, insn)) {
198
+ disas_vfp_uncond(s, insn) ||
199
+ disas_neon_dp(s, insn) ||
200
+ disas_neon_ls(s, insn) ||
201
+ disas_neon_shared(s, insn)) {
202
return;
203
}
204
/* fall back to legacy decoder */
205
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
206
ARCH(6T2);
207
}
208
209
+ if ((insn & 0xef000000) == 0xef000000) {
210
+ /*
211
+ * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
212
+ * transform into
213
+ * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
214
+ */
215
+ uint32_t a32_insn = (insn & 0xe2ffffff) |
216
+ ((insn & (1 << 28)) >> 4) | (1 << 28);
217
+
218
+ if (disas_neon_dp(s, a32_insn)) {
219
+ return;
220
+ }
221
+ }
222
+
223
+ if ((insn & 0xff100000) == 0xf9000000) {
224
+ /*
225
+ * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
226
+ * transform into
227
+ * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
228
+ */
229
+ uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
230
+
231
+ if (disas_neon_ls(s, a32_insn)) {
232
+ return;
233
+ }
234
+ }
235
+
236
/*
237
* TODO: Perhaps merge these into one decodetree output file.
238
* Note disas_vfp is written for a32 with cond field in the
239
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
240
*/
241
if (disas_t32(s, insn) ||
242
disas_vfp_uncond(s, insn) ||
243
+ disas_neon_shared(s, insn) ||
244
((insn >> 28) == 0xe && disas_vfp(s, insn))) {
245
return;
246
}
30
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
247
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
31
index XXXXXXX..XXXXXXX 100644
248
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/Makefile.objs
249
--- a/target/arm/Makefile.objs
33
+++ b/target/arm/Makefile.objs
250
+++ b/target/arm/Makefile.objs
34
@@ -XXX,XX +XXX,XX @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
251
@@ -XXX,XX +XXX,XX @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
35
     $(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
252
     $(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
36
     "GEN", $(TARGET_DIR)$@)
253
     "GEN", $(TARGET_DIR)$@)
37
254
38
+target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE)
255
+target/arm/decode-neon-shared.inc.c: $(SRC_PATH)/target/arm/neon-shared.decode $(DECODETREE)
39
+    $(call quiet-command,\
256
+    $(call quiet-command,\
40
+     $(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\
257
+     $(PYTHON) $(DECODETREE) --static-decode disas_neon_shared -o $@ $<,\
41
+     "GEN", $(TARGET_DIR)$@)
258
+     "GEN", $(TARGET_DIR)$@)
42
+
259
+
43
+target/arm/decode-vfp-uncond.inc.c: $(SRC_PATH)/target/arm/vfp-uncond.decode $(DECODETREE)
260
+target/arm/decode-neon-dp.inc.c: $(SRC_PATH)/target/arm/neon-dp.decode $(DECODETREE)
44
+    $(call quiet-command,\
261
+    $(call quiet-command,\
45
+     $(PYTHON) $(DECODETREE) --static-decode disas_vfp_uncond -o $@ $<,\
262
+     $(PYTHON) $(DECODETREE) --static-decode disas_neon_dp -o $@ $<,\
46
+     "GEN", $(TARGET_DIR)$@)
263
+     "GEN", $(TARGET_DIR)$@)
47
+
264
+
265
+target/arm/decode-neon-ls.inc.c: $(SRC_PATH)/target/arm/neon-ls.decode $(DECODETREE)
266
+    $(call quiet-command,\
267
+     $(PYTHON) $(DECODETREE) --static-decode disas_neon_ls -o $@ $<,\
268
+     "GEN", $(TARGET_DIR)$@)
269
+
270
target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE)
271
    $(call quiet-command,\
272
     $(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\
273
@@ -XXX,XX +XXX,XX @@ target/arm/decode-t16.inc.c: $(SRC_PATH)/target/arm/t16.decode $(DECODETREE)
274
     "GEN", $(TARGET_DIR)$@)
275
48
target/arm/translate-sve.o: target/arm/decode-sve.inc.c
276
target/arm/translate-sve.o: target/arm/decode-sve.inc.c
49
+target/arm/translate.o: target/arm/decode-vfp.inc.c
277
+target/arm/translate.o: target/arm/decode-neon-shared.inc.c
50
+target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c
278
+target/arm/translate.o: target/arm/decode-neon-dp.inc.c
51
+
279
+target/arm/translate.o: target/arm/decode-neon-ls.inc.c
52
obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
280
target/arm/translate.o: target/arm/decode-vfp.inc.c
53
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
281
target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c
54
new file mode 100644
282
target/arm/translate.o: target/arm/decode-a32.inc.c
55
index XXXXXXX..XXXXXXX
56
--- /dev/null
57
+++ b/target/arm/translate-vfp.inc.c
58
@@ -XXX,XX +XXX,XX @@
59
+/*
60
+ * ARM translation: AArch32 VFP instructions
61
+ *
62
+ * Copyright (c) 2003 Fabrice Bellard
63
+ * Copyright (c) 2005-2007 CodeSourcery
64
+ * Copyright (c) 2007 OpenedHand, Ltd.
65
+ * Copyright (c) 2019 Linaro, Ltd.
66
+ *
67
+ * This library is free software; you can redistribute it and/or
68
+ * modify it under the terms of the GNU Lesser General Public
69
+ * License as published by the Free Software Foundation; either
70
+ * version 2 of the License, or (at your option) any later version.
71
+ *
72
+ * This library is distributed in the hope that it will be useful,
73
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
74
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
75
+ * Lesser General Public License for more details.
76
+ *
77
+ * You should have received a copy of the GNU Lesser General Public
78
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
79
+ */
80
+
81
+/*
82
+ * This file is intended to be included from translate.c; it uses
83
+ * some macros and definitions provided by that file.
84
+ * It might be possible to convert it to a standalone .c file eventually.
85
+ */
86
+
87
+/* Include the generated VFP decoder */
88
+#include "decode-vfp.inc.c"
89
+#include "decode-vfp-uncond.inc.c"
90
diff --git a/target/arm/translate.c b/target/arm/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate.c
93
+++ b/target/arm/translate.c
94
@@ -XXX,XX +XXX,XX @@ static inline void gen_mov_vreg_F0(int dp, int reg)
95
96
#define ARM_CP_RW_BIT (1 << 20)
97
98
+/* Include the VFP decoder */
99
+#include "translate-vfp.inc.c"
100
+
101
static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
102
{
103
tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
104
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
105
return 1;
106
}
107
108
+ /*
109
+ * If the decodetree decoder handles this insn it will always
110
+ * emit code to either execute the insn or generate an appropriate
111
+ * exception; so we don't need to ever return non-zero to tell
112
+ * the calling code to emit an UNDEF exception.
113
+ */
114
+ if (extract32(insn, 28, 4) == 0xf) {
115
+ if (disas_vfp_uncond(s, insn)) {
116
+ return 0;
117
+ }
118
+ } else {
119
+ if (disas_vfp(s, insn)) {
120
+ return 0;
121
+ }
122
+ }
123
+
124
/* FIXME: this access check should not take precedence over UNDEF
125
* for invalid encodings; we will generate incorrect syndrome information
126
* for attempts to execute invalid vfp/neon encodings with FP disabled.
127
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
128
new file mode 100644
129
index XXXXXXX..XXXXXXX
130
--- /dev/null
131
+++ b/target/arm/vfp-uncond.decode
132
@@ -XXX,XX +XXX,XX @@
133
+# AArch32 VFP instruction descriptions (unconditional insns)
134
+#
135
+# Copyright (c) 2019 Linaro, Ltd
136
+#
137
+# This library is free software; you can redistribute it and/or
138
+# modify it under the terms of the GNU Lesser General Public
139
+# License as published by the Free Software Foundation; either
140
+# version 2 of the License, or (at your option) any later version.
141
+#
142
+# This library is distributed in the hope that it will be useful,
143
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
144
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
145
+# Lesser General Public License for more details.
146
+#
147
+# You should have received a copy of the GNU Lesser General Public
148
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
149
+
150
+#
151
+# This file is processed by scripts/decodetree.py
152
+#
153
+# Encodings for the unconditional VFP instructions are here:
154
+# generally anything matching A32
155
+# 1111 1110 .... .... .... 101. ...0 ....
156
+# and T32
157
+# 1111 110. .... .... .... 101. .... ....
158
+# 1111 1110 .... .... .... 101. .... ....
159
+# (but those patterns might also cover some Neon instructions,
160
+# which do not live in this file.)
161
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
162
new file mode 100644
163
index XXXXXXX..XXXXXXX
164
--- /dev/null
165
+++ b/target/arm/vfp.decode
166
@@ -XXX,XX +XXX,XX @@
167
+# AArch32 VFP instruction descriptions (conditional insns)
168
+#
169
+# Copyright (c) 2019 Linaro, Ltd
170
+#
171
+# This library is free software; you can redistribute it and/or
172
+# modify it under the terms of the GNU Lesser General Public
173
+# License as published by the Free Software Foundation; either
174
+# version 2 of the License, or (at your option) any later version.
175
+#
176
+# This library is distributed in the hope that it will be useful,
177
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
178
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
179
+# Lesser General Public License for more details.
180
+#
181
+# You should have received a copy of the GNU Lesser General Public
182
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
183
+
184
+#
185
+# This file is processed by scripts/decodetree.py
186
+#
187
+# Encodings for the conditional VFP instructions are here:
188
+# generally anything matching A32
189
+# cccc 11.. .... .... .... 101. .... ....
190
+# and T32
191
+# 1110 110. .... .... .... 101. .... ....
192
+# 1110 1110 .... .... .... 101. .... ....
193
+# (but those patterns might also cover some Neon instructions,
194
+# which do not live in this file.)
195
--
283
--
196
2.20.1
284
2.20.1
197
285
198
286
diff view generated by jsdifflib
1
Convert the VCVT double/single precision conversion insns to decodetree.
1
Convert the VCMLA (vector) insns in the 3same extension group to
2
decodetree.
2
3
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-5-peter.maydell@linaro.org
5
---
7
---
6
target/arm/translate-vfp.inc.c | 48 ++++++++++++++++++++++++++++++++++
8
target/arm/neon-shared.decode | 11 ++++++++++
7
target/arm/translate.c | 13 +--------
9
target/arm/translate-neon.inc.c | 37 +++++++++++++++++++++++++++++++++
8
target/arm/vfp.decode | 6 +++++
10
target/arm/translate.c | 11 +---------
9
3 files changed, 55 insertions(+), 12 deletions(-)
11
3 files changed, 49 insertions(+), 10 deletions(-)
10
12
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-shared.decode
14
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-shared.decode
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
17
@@ -XXX,XX +XXX,XX @@
16
tcg_temp_free_i64(tmp);
18
# More specifically, this covers:
17
return true;
19
# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
18
}
20
# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
19
+
21
+
20
+static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
22
+# VFP/Neon register fields; same as vfp.decode
23
+%vm_dp 5:1 0:4
24
+%vm_sp 0:4 5:1
25
+%vn_dp 7:1 16:4
26
+%vn_sp 16:4 7:1
27
+%vd_dp 22:1 12:4
28
+%vd_sp 12:4 22:1
29
+
30
+VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
31
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
32
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/translate-neon.inc.c
35
+++ b/target/arm/translate-neon.inc.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "decode-neon-dp.inc.c"
38
#include "decode-neon-ls.inc.c"
39
#include "decode-neon-shared.inc.c"
40
+
41
+static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
21
+{
42
+{
22
+ TCGv_i64 vd;
43
+ int opr_sz;
23
+ TCGv_i32 vm;
44
+ TCGv_ptr fpst;
45
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
46
+
47
+ if (!dc_isar_feature(aa32_vcma, s)
48
+ || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
49
+ return false;
50
+ }
24
+
51
+
25
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ /* UNDEF accesses to D16-D31 if they don't exist. */
26
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
53
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
54
+ ((a->vd | a->vn | a->vm) & 0x10)) {
55
+ return false;
56
+ }
57
+
58
+ if ((a->vn | a->vm | a->vd) & a->q) {
27
+ return false;
59
+ return false;
28
+ }
60
+ }
29
+
61
+
30
+ if (!vfp_access_check(s)) {
62
+ if (!vfp_access_check(s)) {
31
+ return true;
63
+ return true;
32
+ }
64
+ }
33
+
65
+
34
+ vm = tcg_temp_new_i32();
66
+ opr_sz = (1 + a->q) * 8;
35
+ vd = tcg_temp_new_i64();
67
+ fpst = get_fpstatus_ptr(1);
36
+ neon_load_reg32(vm, a->vm);
68
+ fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
37
+ gen_helper_vfp_fcvtds(vd, vm, cpu_env);
69
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
38
+ neon_store_reg64(vd, a->vd);
70
+ vfp_reg_offset(1, a->vn),
39
+ tcg_temp_free_i32(vm);
71
+ vfp_reg_offset(1, a->vm),
40
+ tcg_temp_free_i64(vd);
72
+ fpst, opr_sz, opr_sz, a->rot,
41
+ return true;
73
+ fn_gvec_ptr);
42
+}
74
+ tcg_temp_free_ptr(fpst);
43
+
44
+static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
45
+{
46
+ TCGv_i64 vm;
47
+ TCGv_i32 vd;
48
+
49
+ /* UNDEF accesses to D16-D31 if they don't exist. */
50
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
51
+ return false;
52
+ }
53
+
54
+ if (!vfp_access_check(s)) {
55
+ return true;
56
+ }
57
+
58
+ vd = tcg_temp_new_i32();
59
+ vm = tcg_temp_new_i64();
60
+ neon_load_reg64(vm, a->vm);
61
+ gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
62
+ neon_store_reg32(vd, a->vd);
63
+ tcg_temp_free_i32(vd);
64
+ tcg_temp_free_i64(vm);
65
+ return true;
75
+ return true;
66
+}
76
+}
67
diff --git a/target/arm/translate.c b/target/arm/translate.c
77
diff --git a/target/arm/translate.c b/target/arm/translate.c
68
index XXXXXXX..XXXXXXX 100644
78
index XXXXXXX..XXXXXXX 100644
69
--- a/target/arm/translate.c
79
--- a/target/arm/translate.c
70
+++ b/target/arm/translate.c
80
+++ b/target/arm/translate.c
71
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
81
@@ -XXX,XX +XXX,XX @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
72
return 1;
82
bool is_long = false, q = extract32(insn, 6, 1);
73
case 15:
83
bool ptr_is_env = false;
74
switch (rn) {
84
75
- case 0 ... 14:
85
- if ((insn & 0xfe200f10) == 0xfc200800) {
76
+ case 0 ... 15:
86
- /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
77
/* Already handled by decodetree */
87
- int size = extract32(insn, 20, 1);
78
return 1;
88
- data = extract32(insn, 23, 2); /* rot */
79
default:
89
- if (!dc_isar_feature(aa32_vcma, s)
80
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
90
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
81
if (op == 15) {
91
- return 1;
82
/* rn is opcode, encoded as per VFP_SREG_N. */
92
- }
83
switch (rn) {
93
- fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
84
- case 0x0f: /* vcvt double<->single */
94
- } else if ((insn & 0xfea00f10) == 0xfc800800) {
85
- rd_is_dp = !dp;
95
+ if ((insn & 0xfea00f10) == 0xfc800800) {
86
- break;
96
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
87
-
97
int size = extract32(insn, 20, 1);
88
case 0x10: /* vcvt.fxx.u32 */
98
data = extract32(insn, 24, 1); /* rot */
89
case 0x11: /* vcvt.fxx.s32 */
90
rm_is_dp = false;
91
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
92
switch (op) {
93
case 15: /* extension space */
94
switch (rn) {
95
- case 15: /* single<->double conversion */
96
- if (dp) {
97
- gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
98
- } else {
99
- gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
100
- }
101
- break;
102
case 16: /* fuito */
103
gen_vfp_uito(dp, 0);
104
break;
105
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
106
index XXXXXXX..XXXXXXX 100644
107
--- a/target/arm/vfp.decode
108
+++ b/target/arm/vfp.decode
109
@@ -XXX,XX +XXX,XX @@ VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... \
110
vd=%vd_sp vm=%vm_sp
111
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... \
112
vd=%vd_dp vm=%vm_dp
113
+
114
+# VCVT between single and double: Vm precision depends on size; Vd is its reverse
115
+VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... \
116
+ vd=%vd_dp vm=%vm_sp
117
+VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... \
118
+ vd=%vd_sp vm=%vm_dp
119
--
99
--
120
2.20.1
100
2.20.1
121
101
122
102
diff view generated by jsdifflib
1
Convert the VFP single load/store insns VLDR and VSTR to decodetree.
1
Convert the VCADD (vector) insns to decodetree.
2
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200430181003.21682-6-peter.maydell@linaro.org
5
---
6
---
6
target/arm/translate-vfp.inc.c | 73 ++++++++++++++++++++++++++++++++++
7
target/arm/neon-shared.decode | 3 +++
7
target/arm/translate.c | 22 +---------
8
target/arm/translate-neon.inc.c | 37 +++++++++++++++++++++++++++++++++
8
target/arm/vfp.decode | 7 ++++
9
target/arm/translate.c | 11 +---------
9
3 files changed, 82 insertions(+), 20 deletions(-)
10
3 files changed, 41 insertions(+), 10 deletions(-)
10
11
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
--- a/target/arm/neon-shared.decode
14
+++ b/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/neon-shared.decode
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a)
16
@@ -XXX,XX +XXX,XX @@
16
17
18
VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
19
vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
+
21
+VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
22
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
23
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/translate-neon.inc.c
26
+++ b/target/arm/translate-neon.inc.c
27
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
28
tcg_temp_free_ptr(fpst);
17
return true;
29
return true;
18
}
30
}
19
+
31
+
20
+static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
32
+static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
21
+{
33
+{
22
+ uint32_t offset;
34
+ int opr_sz;
23
+ TCGv_i32 addr;
35
+ TCGv_ptr fpst;
36
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
24
+
37
+
25
+ if (!vfp_access_check(s)) {
38
+ if (!dc_isar_feature(aa32_vcma, s)
26
+ return true;
39
+ || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
40
+ return false;
27
+ }
41
+ }
28
+
42
+
29
+ offset = a->imm << 2;
43
+ /* UNDEF accesses to D16-D31 if they don't exist. */
30
+ if (!a->u) {
44
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
31
+ offset = -offset;
45
+ ((a->vd | a->vn | a->vm) & 0x10)) {
46
+ return false;
32
+ }
47
+ }
33
+
48
+
34
+ if (s->thumb && a->rn == 15) {
49
+ if ((a->vn | a->vm | a->vd) & a->q) {
35
+ /* This is actually UNPREDICTABLE */
36
+ addr = tcg_temp_new_i32();
37
+ tcg_gen_movi_i32(addr, s->pc & ~2);
38
+ } else {
39
+ addr = load_reg(s, a->rn);
40
+ }
41
+ tcg_gen_addi_i32(addr, addr, offset);
42
+ if (a->l) {
43
+ gen_vfp_ld(s, false, addr);
44
+ gen_mov_vreg_F0(false, a->vd);
45
+ } else {
46
+ gen_mov_F0_vreg(false, a->vd);
47
+ gen_vfp_st(s, false, addr);
48
+ }
49
+ tcg_temp_free_i32(addr);
50
+
51
+ return true;
52
+}
53
+
54
+static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
55
+{
56
+ uint32_t offset;
57
+ TCGv_i32 addr;
58
+
59
+ /* UNDEF accesses to D16-D31 if they don't exist */
60
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
61
+ return false;
50
+ return false;
62
+ }
51
+ }
63
+
52
+
64
+ if (!vfp_access_check(s)) {
53
+ if (!vfp_access_check(s)) {
65
+ return true;
54
+ return true;
66
+ }
55
+ }
67
+
56
+
68
+ offset = a->imm << 2;
57
+ opr_sz = (1 + a->q) * 8;
69
+ if (!a->u) {
58
+ fpst = get_fpstatus_ptr(1);
70
+ offset = -offset;
59
+ fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
71
+ }
60
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
72
+
61
+ vfp_reg_offset(1, a->vn),
73
+ if (s->thumb && a->rn == 15) {
62
+ vfp_reg_offset(1, a->vm),
74
+ /* This is actually UNPREDICTABLE */
63
+ fpst, opr_sz, opr_sz, a->rot,
75
+ addr = tcg_temp_new_i32();
64
+ fn_gvec_ptr);
76
+ tcg_gen_movi_i32(addr, s->pc & ~2);
65
+ tcg_temp_free_ptr(fpst);
77
+ } else {
78
+ addr = load_reg(s, a->rn);
79
+ }
80
+ tcg_gen_addi_i32(addr, addr, offset);
81
+ if (a->l) {
82
+ gen_vfp_ld(s, true, addr);
83
+ gen_mov_vreg_F0(true, a->vd);
84
+ } else {
85
+ gen_mov_F0_vreg(true, a->vd);
86
+ gen_vfp_st(s, true, addr);
87
+ }
88
+ tcg_temp_free_i32(addr);
89
+
90
+ return true;
66
+ return true;
91
+}
67
+}
92
diff --git a/target/arm/translate.c b/target/arm/translate.c
68
diff --git a/target/arm/translate.c b/target/arm/translate.c
93
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
94
--- a/target/arm/translate.c
70
--- a/target/arm/translate.c
95
+++ b/target/arm/translate.c
71
+++ b/target/arm/translate.c
96
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
72
@@ -XXX,XX +XXX,XX @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
97
else
73
bool is_long = false, q = extract32(insn, 6, 1);
98
rd = VFP_SREG_D(insn);
74
bool ptr_is_env = false;
99
if ((insn & 0x01200000) == 0x01000000) {
75
100
- /* Single load/store */
76
- if ((insn & 0xfea00f10) == 0xfc800800) {
101
- offset = (insn & 0xff) << 2;
77
- /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
102
- if ((insn & (1 << 23)) == 0)
78
- int size = extract32(insn, 20, 1);
103
- offset = -offset;
79
- data = extract32(insn, 24, 1); /* rot */
104
- if (s->thumb && rn == 15) {
80
- if (!dc_isar_feature(aa32_vcma, s)
105
- /* This is actually UNPREDICTABLE */
81
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
106
- addr = tcg_temp_new_i32();
82
- return 1;
107
- tcg_gen_movi_i32(addr, s->pc & ~2);
83
- }
108
- } else {
84
- fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
109
- addr = load_reg(s, rn);
85
- } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
110
- }
86
+ if ((insn & 0xfeb00f00) == 0xfc200d00) {
111
- tcg_gen_addi_i32(addr, addr, offset);
87
/* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
112
- if (insn & (1 << 20)) {
88
bool u = extract32(insn, 4, 1);
113
- gen_vfp_ld(s, dp, addr);
89
if (!dc_isar_feature(aa32_dp, s)) {
114
- gen_mov_vreg_F0(dp, rd);
115
- } else {
116
- gen_mov_F0_vreg(dp, rd);
117
- gen_vfp_st(s, dp, addr);
118
- }
119
- tcg_temp_free_i32(addr);
120
+ /* Already handled by decodetree */
121
+ return 1;
122
} else {
123
/* load/store multiple */
124
int w = insn & (1 << 21);
125
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/vfp.decode
128
+++ b/target/arm/vfp.decode
129
@@ -XXX,XX +XXX,XX @@ VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... \
130
vm=%vm_sp
131
VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... \
132
vm=%vm_dp
133
+
134
+# Note that the half-precision variants of VLDR and VSTR are
135
+# not part of this decodetree at all because they have bits [9:8] == 0b01
136
+VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 \
137
+ vd=%vd_sp
138
+VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 \
139
+ vd=%vd_dp
140
--
90
--
141
2.20.1
91
2.20.1
142
92
143
93
diff view generated by jsdifflib
1
Convert the VJCVT instruction to decodetree.
1
Convert the V[US]DOT (vector) insns to decodetree.
2
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200430181003.21682-7-peter.maydell@linaro.org
5
---
6
---
6
target/arm/translate-vfp.inc.c | 28 ++++++++++++++++++++++++++++
7
target/arm/neon-shared.decode | 4 ++++
7
target/arm/translate.c | 12 +-----------
8
target/arm/translate-neon.inc.c | 32 ++++++++++++++++++++++++++++++++
8
target/arm/vfp.decode | 4 ++++
9
target/arm/translate.c | 9 +--------
9
3 files changed, 33 insertions(+), 11 deletions(-)
10
3 files changed, 37 insertions(+), 8 deletions(-)
10
11
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
--- a/target/arm/neon-shared.decode
14
+++ b/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/neon-shared.decode
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
16
@@ -XXX,XX +XXX,XX @@ VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
17
18
VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
19
vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
+
21
+# VUDOT and VSDOT
22
+VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
24
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/translate-neon.inc.c
27
+++ b/target/arm/translate-neon.inc.c
28
@@ -XXX,XX +XXX,XX @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
16
tcg_temp_free_ptr(fpst);
29
tcg_temp_free_ptr(fpst);
17
return true;
30
return true;
18
}
31
}
19
+
32
+
20
+static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
33
+static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
21
+{
34
+{
22
+ TCGv_i32 vd;
35
+ int opr_sz;
23
+ TCGv_i64 vm;
36
+ gen_helper_gvec_3 *fn_gvec;
24
+
37
+
25
+ if (!dc_isar_feature(aa32_jscvt, s)) {
38
+ if (!dc_isar_feature(aa32_dp, s)) {
26
+ return false;
39
+ return false;
27
+ }
40
+ }
28
+
41
+
29
+ /* UNDEF accesses to D16-D31 if they don't exist. */
42
+ /* UNDEF accesses to D16-D31 if they don't exist. */
30
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
43
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
44
+ ((a->vd | a->vn | a->vm) & 0x10)) {
45
+ return false;
46
+ }
47
+
48
+ if ((a->vn | a->vm | a->vd) & a->q) {
31
+ return false;
49
+ return false;
32
+ }
50
+ }
33
+
51
+
34
+ if (!vfp_access_check(s)) {
52
+ if (!vfp_access_check(s)) {
35
+ return true;
53
+ return true;
36
+ }
54
+ }
37
+
55
+
38
+ vm = tcg_temp_new_i64();
56
+ opr_sz = (1 + a->q) * 8;
39
+ vd = tcg_temp_new_i32();
57
+ fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
40
+ neon_load_reg64(vm, a->vm);
58
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
41
+ gen_helper_vjcvt(vd, vm, cpu_env);
59
+ vfp_reg_offset(1, a->vn),
42
+ neon_store_reg32(vd, a->vd);
60
+ vfp_reg_offset(1, a->vm),
43
+ tcg_temp_free_i64(vm);
61
+ opr_sz, opr_sz, 0, fn_gvec);
44
+ tcg_temp_free_i32(vd);
45
+ return true;
62
+ return true;
46
+}
63
+}
47
diff --git a/target/arm/translate.c b/target/arm/translate.c
64
diff --git a/target/arm/translate.c b/target/arm/translate.c
48
index XXXXXXX..XXXXXXX 100644
65
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate.c
66
--- a/target/arm/translate.c
50
+++ b/target/arm/translate.c
67
+++ b/target/arm/translate.c
51
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
68
@@ -XXX,XX +XXX,XX @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
52
return 1;
69
bool is_long = false, q = extract32(insn, 6, 1);
53
case 15:
70
bool ptr_is_env = false;
54
switch (rn) {
71
55
- case 0 ... 17:
72
- if ((insn & 0xfeb00f00) == 0xfc200d00) {
56
+ case 0 ... 19:
73
- /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
57
/* Already handled by decodetree */
74
- bool u = extract32(insn, 4, 1);
58
return 1;
75
- if (!dc_isar_feature(aa32_dp, s)) {
59
default:
76
- return 1;
60
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
77
- }
61
rm_is_dp = false;
78
- fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
62
break;
79
- } else if ((insn & 0xff300f10) == 0xfc200810) {
63
80
+ if ((insn & 0xff300f10) == 0xfc200810) {
64
- case 0x13: /* vjcvt */
81
/* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
65
- if (!dp || !dc_isar_feature(aa32_jscvt, s)) {
82
int is_s = extract32(insn, 23, 1);
66
- return 1;
83
if (!dc_isar_feature(aa32_fhm, s)) {
67
- }
68
- rd_is_dp = false;
69
- break;
70
-
71
default:
72
return 1;
73
}
74
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
75
switch (op) {
76
case 15: /* extension space */
77
switch (rn) {
78
- case 19: /* vjcvt */
79
- gen_helper_vjcvt(cpu_F0s, cpu_F0d, cpu_env);
80
- break;
81
case 20: /* fshto */
82
gen_vfp_shto(dp, 16 - rm, 0);
83
break;
84
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
85
index XXXXXXX..XXXXXXX 100644
86
--- a/target/arm/vfp.decode
87
+++ b/target/arm/vfp.decode
88
@@ -XXX,XX +XXX,XX @@ VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
89
vd=%vd_sp vm=%vm_sp
90
VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
91
vd=%vd_dp vm=%vm_sp
92
+
93
+# VJCVT is always dp to sp
94
+VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... \
95
+ vd=%vd_sp vm=%vm_dp
96
--
84
--
97
2.20.1
85
2.20.1
98
86
99
87
diff view generated by jsdifflib
1
Move the trans_*() functions we've just created from translate.c
1
Convert the VFM[AS]L (vector) insns to decodetree. This is the last
2
to translate-vfp.inc.c. This is pure code motion with no textual
2
insn in the legacy decoder for the 3same_ext group, so we can
3
changes (this can be checked with 'git show --color-moved').
3
delete the legacy decoder function for the group entirely.
4
5
Note that in disas_thumb2_insn() the parts of this encoding space
6
where the decodetree decoder returns false will correctly be directed
7
to illegal_op by the "(insn & (1 << 28))" check so they won't fall
8
into disas_coproc_insn() by mistake.
4
9
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20200430181003.21682-8-peter.maydell@linaro.org
7
---
13
---
8
target/arm/translate-vfp.inc.c | 337 +++++++++++++++++++++++++++++++++
14
target/arm/neon-shared.decode | 6 +++
9
target/arm/translate.c | 337 ---------------------------------
15
target/arm/translate-neon.inc.c | 31 +++++++++++
10
2 files changed, 337 insertions(+), 337 deletions(-)
16
target/arm/translate.c | 92 +--------------------------------
17
3 files changed, 38 insertions(+), 91 deletions(-)
11
18
12
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
19
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
13
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-vfp.inc.c
21
--- a/target/arm/neon-shared.decode
15
+++ b/target/arm/translate-vfp.inc.c
22
+++ b/target/arm/neon-shared.decode
16
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check(DisasContext *s)
23
@@ -XXX,XX +XXX,XX @@ VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
17
{
24
# VUDOT and VSDOT
18
return full_vfp_access_check(s, false);
25
VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
26
vm=%vm_dp vn=%vn_dp vd=%vd_dp
27
+
28
+# VFM[AS]L
29
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
30
+ vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
31
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
32
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
33
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/translate-neon.inc.c
36
+++ b/target/arm/translate-neon.inc.c
37
@@ -XXX,XX +XXX,XX @@ static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
38
opr_sz, opr_sz, 0, fn_gvec);
39
return true;
19
}
40
}
20
+
41
+
21
+static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
42
+static bool trans_VFML(DisasContext *s, arg_VFML *a)
22
+{
43
+{
23
+ uint32_t rd, rn, rm;
44
+ int opr_sz;
24
+ bool dp = a->dp;
25
+
45
+
26
+ if (!dc_isar_feature(aa32_vsel, s)) {
46
+ if (!dc_isar_feature(aa32_fhm, s)) {
27
+ return false;
47
+ return false;
28
+ }
48
+ }
29
+
49
+
30
+ /* UNDEF accesses to D16-D31 if they don't exist */
50
+ /* UNDEF accesses to D16-D31 if they don't exist. */
31
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
51
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
32
+ ((a->vm | a->vn | a->vd) & 0x10)) {
52
+ (a->vd & 0x10)) {
33
+ return false;
53
+ return false;
34
+ }
54
+ }
35
+ rd = a->vd;
55
+
36
+ rn = a->vn;
56
+ if (a->vd & a->q) {
37
+ rm = a->vm;
57
+ return false;
58
+ }
38
+
59
+
39
+ if (!vfp_access_check(s)) {
60
+ if (!vfp_access_check(s)) {
40
+ return true;
61
+ return true;
41
+ }
62
+ }
42
+
63
+
43
+ if (dp) {
64
+ opr_sz = (1 + a->q) * 8;
44
+ TCGv_i64 frn, frm, dest;
65
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
45
+ TCGv_i64 tmp, zero, zf, nf, vf;
66
+ vfp_reg_offset(a->q, a->vn),
46
+
67
+ vfp_reg_offset(a->q, a->vm),
47
+ zero = tcg_const_i64(0);
68
+ cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
48
+
69
+ gen_helper_gvec_fmlal_a32);
49
+ frn = tcg_temp_new_i64();
50
+ frm = tcg_temp_new_i64();
51
+ dest = tcg_temp_new_i64();
52
+
53
+ zf = tcg_temp_new_i64();
54
+ nf = tcg_temp_new_i64();
55
+ vf = tcg_temp_new_i64();
56
+
57
+ tcg_gen_extu_i32_i64(zf, cpu_ZF);
58
+ tcg_gen_ext_i32_i64(nf, cpu_NF);
59
+ tcg_gen_ext_i32_i64(vf, cpu_VF);
60
+
61
+ tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
62
+ tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
63
+ switch (a->cc) {
64
+ case 0: /* eq: Z */
65
+ tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
66
+ frn, frm);
67
+ break;
68
+ case 1: /* vs: V */
69
+ tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
70
+ frn, frm);
71
+ break;
72
+ case 2: /* ge: N == V -> N ^ V == 0 */
73
+ tmp = tcg_temp_new_i64();
74
+ tcg_gen_xor_i64(tmp, vf, nf);
75
+ tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
76
+ frn, frm);
77
+ tcg_temp_free_i64(tmp);
78
+ break;
79
+ case 3: /* gt: !Z && N == V */
80
+ tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
81
+ frn, frm);
82
+ tmp = tcg_temp_new_i64();
83
+ tcg_gen_xor_i64(tmp, vf, nf);
84
+ tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
85
+ dest, frm);
86
+ tcg_temp_free_i64(tmp);
87
+ break;
88
+ }
89
+ tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
90
+ tcg_temp_free_i64(frn);
91
+ tcg_temp_free_i64(frm);
92
+ tcg_temp_free_i64(dest);
93
+
94
+ tcg_temp_free_i64(zf);
95
+ tcg_temp_free_i64(nf);
96
+ tcg_temp_free_i64(vf);
97
+
98
+ tcg_temp_free_i64(zero);
99
+ } else {
100
+ TCGv_i32 frn, frm, dest;
101
+ TCGv_i32 tmp, zero;
102
+
103
+ zero = tcg_const_i32(0);
104
+
105
+ frn = tcg_temp_new_i32();
106
+ frm = tcg_temp_new_i32();
107
+ dest = tcg_temp_new_i32();
108
+ tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
109
+ tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
110
+ switch (a->cc) {
111
+ case 0: /* eq: Z */
112
+ tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
113
+ frn, frm);
114
+ break;
115
+ case 1: /* vs: V */
116
+ tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
117
+ frn, frm);
118
+ break;
119
+ case 2: /* ge: N == V -> N ^ V == 0 */
120
+ tmp = tcg_temp_new_i32();
121
+ tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
122
+ tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
123
+ frn, frm);
124
+ tcg_temp_free_i32(tmp);
125
+ break;
126
+ case 3: /* gt: !Z && N == V */
127
+ tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
128
+ frn, frm);
129
+ tmp = tcg_temp_new_i32();
130
+ tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
131
+ tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
132
+ dest, frm);
133
+ tcg_temp_free_i32(tmp);
134
+ break;
135
+ }
136
+ tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
137
+ tcg_temp_free_i32(frn);
138
+ tcg_temp_free_i32(frm);
139
+ tcg_temp_free_i32(dest);
140
+
141
+ tcg_temp_free_i32(zero);
142
+ }
143
+
144
+ return true;
145
+}
146
+
147
+static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
148
+{
149
+ uint32_t rd, rn, rm;
150
+ bool dp = a->dp;
151
+ bool vmin = a->op;
152
+ TCGv_ptr fpst;
153
+
154
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
155
+ return false;
156
+ }
157
+
158
+ /* UNDEF accesses to D16-D31 if they don't exist */
159
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
160
+ ((a->vm | a->vn | a->vd) & 0x10)) {
161
+ return false;
162
+ }
163
+ rd = a->vd;
164
+ rn = a->vn;
165
+ rm = a->vm;
166
+
167
+ if (!vfp_access_check(s)) {
168
+ return true;
169
+ }
170
+
171
+ fpst = get_fpstatus_ptr(0);
172
+
173
+ if (dp) {
174
+ TCGv_i64 frn, frm, dest;
175
+
176
+ frn = tcg_temp_new_i64();
177
+ frm = tcg_temp_new_i64();
178
+ dest = tcg_temp_new_i64();
179
+
180
+ tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
181
+ tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
182
+ if (vmin) {
183
+ gen_helper_vfp_minnumd(dest, frn, frm, fpst);
184
+ } else {
185
+ gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
186
+ }
187
+ tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
188
+ tcg_temp_free_i64(frn);
189
+ tcg_temp_free_i64(frm);
190
+ tcg_temp_free_i64(dest);
191
+ } else {
192
+ TCGv_i32 frn, frm, dest;
193
+
194
+ frn = tcg_temp_new_i32();
195
+ frm = tcg_temp_new_i32();
196
+ dest = tcg_temp_new_i32();
197
+
198
+ tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
199
+ tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
200
+ if (vmin) {
201
+ gen_helper_vfp_minnums(dest, frn, frm, fpst);
202
+ } else {
203
+ gen_helper_vfp_maxnums(dest, frn, frm, fpst);
204
+ }
205
+ tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
206
+ tcg_temp_free_i32(frn);
207
+ tcg_temp_free_i32(frm);
208
+ tcg_temp_free_i32(dest);
209
+ }
210
+
211
+ tcg_temp_free_ptr(fpst);
212
+ return true;
213
+}
214
+
215
+/*
216
+ * Table for converting the most common AArch32 encoding of
217
+ * rounding mode to arm_fprounding order (which matches the
218
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
219
+ */
220
+static const uint8_t fp_decode_rm[] = {
221
+ FPROUNDING_TIEAWAY,
222
+ FPROUNDING_TIEEVEN,
223
+ FPROUNDING_POSINF,
224
+ FPROUNDING_NEGINF,
225
+};
226
+
227
+static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
228
+{
229
+ uint32_t rd, rm;
230
+ bool dp = a->dp;
231
+ TCGv_ptr fpst;
232
+ TCGv_i32 tcg_rmode;
233
+ int rounding = fp_decode_rm[a->rm];
234
+
235
+ if (!dc_isar_feature(aa32_vrint, s)) {
236
+ return false;
237
+ }
238
+
239
+ /* UNDEF accesses to D16-D31 if they don't exist */
240
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
241
+ ((a->vm | a->vd) & 0x10)) {
242
+ return false;
243
+ }
244
+ rd = a->vd;
245
+ rm = a->vm;
246
+
247
+ if (!vfp_access_check(s)) {
248
+ return true;
249
+ }
250
+
251
+ fpst = get_fpstatus_ptr(0);
252
+
253
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
254
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
255
+
256
+ if (dp) {
257
+ TCGv_i64 tcg_op;
258
+ TCGv_i64 tcg_res;
259
+ tcg_op = tcg_temp_new_i64();
260
+ tcg_res = tcg_temp_new_i64();
261
+ tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
262
+ gen_helper_rintd(tcg_res, tcg_op, fpst);
263
+ tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
264
+ tcg_temp_free_i64(tcg_op);
265
+ tcg_temp_free_i64(tcg_res);
266
+ } else {
267
+ TCGv_i32 tcg_op;
268
+ TCGv_i32 tcg_res;
269
+ tcg_op = tcg_temp_new_i32();
270
+ tcg_res = tcg_temp_new_i32();
271
+ tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
272
+ gen_helper_rints(tcg_res, tcg_op, fpst);
273
+ tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
274
+ tcg_temp_free_i32(tcg_op);
275
+ tcg_temp_free_i32(tcg_res);
276
+ }
277
+
278
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
279
+ tcg_temp_free_i32(tcg_rmode);
280
+
281
+ tcg_temp_free_ptr(fpst);
282
+ return true;
283
+}
284
+
285
+static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
286
+{
287
+ uint32_t rd, rm;
288
+ bool dp = a->dp;
289
+ TCGv_ptr fpst;
290
+ TCGv_i32 tcg_rmode, tcg_shift;
291
+ int rounding = fp_decode_rm[a->rm];
292
+ bool is_signed = a->op;
293
+
294
+ if (!dc_isar_feature(aa32_vcvt_dr, s)) {
295
+ return false;
296
+ }
297
+
298
+ /* UNDEF accesses to D16-D31 if they don't exist */
299
+ if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
300
+ return false;
301
+ }
302
+ rd = a->vd;
303
+ rm = a->vm;
304
+
305
+ if (!vfp_access_check(s)) {
306
+ return true;
307
+ }
308
+
309
+ fpst = get_fpstatus_ptr(0);
310
+
311
+ tcg_shift = tcg_const_i32(0);
312
+
313
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
314
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
315
+
316
+ if (dp) {
317
+ TCGv_i64 tcg_double, tcg_res;
318
+ TCGv_i32 tcg_tmp;
319
+ tcg_double = tcg_temp_new_i64();
320
+ tcg_res = tcg_temp_new_i64();
321
+ tcg_tmp = tcg_temp_new_i32();
322
+ tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
323
+ if (is_signed) {
324
+ gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
325
+ } else {
326
+ gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
327
+ }
328
+ tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
329
+ tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
330
+ tcg_temp_free_i32(tcg_tmp);
331
+ tcg_temp_free_i64(tcg_res);
332
+ tcg_temp_free_i64(tcg_double);
333
+ } else {
334
+ TCGv_i32 tcg_single, tcg_res;
335
+ tcg_single = tcg_temp_new_i32();
336
+ tcg_res = tcg_temp_new_i32();
337
+ tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
338
+ if (is_signed) {
339
+ gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
340
+ } else {
341
+ gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
342
+ }
343
+ tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
344
+ tcg_temp_free_i32(tcg_res);
345
+ tcg_temp_free_i32(tcg_single);
346
+ }
347
+
348
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
349
+ tcg_temp_free_i32(tcg_rmode);
350
+
351
+ tcg_temp_free_i32(tcg_shift);
352
+
353
+ tcg_temp_free_ptr(fpst);
354
+
355
+ return true;
70
+ return true;
356
+}
71
+}
357
diff --git a/target/arm/translate.c b/target/arm/translate.c
72
diff --git a/target/arm/translate.c b/target/arm/translate.c
358
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
359
--- a/target/arm/translate.c
74
--- a/target/arm/translate.c
360
+++ b/target/arm/translate.c
75
+++ b/target/arm/translate.c
361
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
76
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
362
tcg_temp_free_i32(tmp);
77
return 0;
363
}
78
}
364
79
365
-static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
80
-/* Advanced SIMD three registers of the same length extension.
81
- * 31 25 23 22 20 16 12 11 10 9 8 3 0
82
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
83
- * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
84
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
85
- */
86
-static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
366
-{
87
-{
367
- uint32_t rd, rn, rm;
88
- gen_helper_gvec_3 *fn_gvec = NULL;
368
- bool dp = a->dp;
89
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
90
- int rd, rn, rm, opr_sz;
91
- int data = 0;
92
- int off_rn, off_rm;
93
- bool is_long = false, q = extract32(insn, 6, 1);
94
- bool ptr_is_env = false;
369
-
95
-
370
- if (!dc_isar_feature(aa32_vsel, s)) {
96
- if ((insn & 0xff300f10) == 0xfc200810) {
371
- return false;
97
- /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
98
- int is_s = extract32(insn, 23, 1);
99
- if (!dc_isar_feature(aa32_fhm, s)) {
100
- return 1;
101
- }
102
- is_long = true;
103
- data = is_s; /* is_2 == 0 */
104
- fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
105
- ptr_is_env = true;
106
- } else {
107
- return 1;
372
- }
108
- }
373
-
109
-
374
- /* UNDEF accesses to D16-D31 if they don't exist */
110
- VFP_DREG_D(rd, insn);
375
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
111
- if (rd & q) {
376
- ((a->vm | a->vn | a->vd) & 0x10)) {
112
- return 1;
377
- return false;
378
- }
113
- }
379
- rd = a->vd;
114
- if (q || !is_long) {
380
- rn = a->vn;
115
- VFP_DREG_N(rn, insn);
381
- rm = a->vm;
116
- VFP_DREG_M(rm, insn);
382
-
117
- if ((rn | rm) & q & !is_long) {
383
- if (!vfp_access_check(s)) {
118
- return 1;
384
- return true;
119
- }
120
- off_rn = vfp_reg_offset(1, rn);
121
- off_rm = vfp_reg_offset(1, rm);
122
- } else {
123
- rn = VFP_SREG_N(insn);
124
- rm = VFP_SREG_M(insn);
125
- off_rn = vfp_reg_offset(0, rn);
126
- off_rm = vfp_reg_offset(0, rm);
385
- }
127
- }
386
-
128
-
387
- if (dp) {
129
- if (s->fp_excp_el) {
388
- TCGv_i64 frn, frm, dest;
130
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
389
- TCGv_i64 tmp, zero, zf, nf, vf;
131
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
390
-
132
- return 0;
391
- zero = tcg_const_i64(0);
133
- }
392
-
134
- if (!s->vfp_enabled) {
393
- frn = tcg_temp_new_i64();
135
- return 1;
394
- frm = tcg_temp_new_i64();
395
- dest = tcg_temp_new_i64();
396
-
397
- zf = tcg_temp_new_i64();
398
- nf = tcg_temp_new_i64();
399
- vf = tcg_temp_new_i64();
400
-
401
- tcg_gen_extu_i32_i64(zf, cpu_ZF);
402
- tcg_gen_ext_i32_i64(nf, cpu_NF);
403
- tcg_gen_ext_i32_i64(vf, cpu_VF);
404
-
405
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
406
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
407
- switch (a->cc) {
408
- case 0: /* eq: Z */
409
- tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
410
- frn, frm);
411
- break;
412
- case 1: /* vs: V */
413
- tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
414
- frn, frm);
415
- break;
416
- case 2: /* ge: N == V -> N ^ V == 0 */
417
- tmp = tcg_temp_new_i64();
418
- tcg_gen_xor_i64(tmp, vf, nf);
419
- tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
420
- frn, frm);
421
- tcg_temp_free_i64(tmp);
422
- break;
423
- case 3: /* gt: !Z && N == V */
424
- tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
425
- frn, frm);
426
- tmp = tcg_temp_new_i64();
427
- tcg_gen_xor_i64(tmp, vf, nf);
428
- tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
429
- dest, frm);
430
- tcg_temp_free_i64(tmp);
431
- break;
432
- }
433
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
434
- tcg_temp_free_i64(frn);
435
- tcg_temp_free_i64(frm);
436
- tcg_temp_free_i64(dest);
437
-
438
- tcg_temp_free_i64(zf);
439
- tcg_temp_free_i64(nf);
440
- tcg_temp_free_i64(vf);
441
-
442
- tcg_temp_free_i64(zero);
443
- } else {
444
- TCGv_i32 frn, frm, dest;
445
- TCGv_i32 tmp, zero;
446
-
447
- zero = tcg_const_i32(0);
448
-
449
- frn = tcg_temp_new_i32();
450
- frm = tcg_temp_new_i32();
451
- dest = tcg_temp_new_i32();
452
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
453
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
454
- switch (a->cc) {
455
- case 0: /* eq: Z */
456
- tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
457
- frn, frm);
458
- break;
459
- case 1: /* vs: V */
460
- tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
461
- frn, frm);
462
- break;
463
- case 2: /* ge: N == V -> N ^ V == 0 */
464
- tmp = tcg_temp_new_i32();
465
- tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
466
- tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
467
- frn, frm);
468
- tcg_temp_free_i32(tmp);
469
- break;
470
- case 3: /* gt: !Z && N == V */
471
- tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
472
- frn, frm);
473
- tmp = tcg_temp_new_i32();
474
- tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
475
- tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
476
- dest, frm);
477
- tcg_temp_free_i32(tmp);
478
- break;
479
- }
480
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
481
- tcg_temp_free_i32(frn);
482
- tcg_temp_free_i32(frm);
483
- tcg_temp_free_i32(dest);
484
-
485
- tcg_temp_free_i32(zero);
486
- }
136
- }
487
-
137
-
488
- return true;
138
- opr_sz = (1 + q) * 8;
139
- if (fn_gvec_ptr) {
140
- TCGv_ptr ptr;
141
- if (ptr_is_env) {
142
- ptr = cpu_env;
143
- } else {
144
- ptr = get_fpstatus_ptr(1);
145
- }
146
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
147
- opr_sz, opr_sz, data, fn_gvec_ptr);
148
- if (!ptr_is_env) {
149
- tcg_temp_free_ptr(ptr);
150
- }
151
- } else {
152
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
153
- opr_sz, opr_sz, data, fn_gvec);
154
- }
155
- return 0;
489
-}
156
-}
490
-
157
-
491
-static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
158
/* Advanced SIMD two registers and a scalar extension.
492
-{
159
* 31 24 23 22 20 16 12 11 10 9 8 3 0
493
- uint32_t rd, rn, rm;
160
* +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
494
- bool dp = a->dp;
161
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
495
- bool vmin = a->op;
162
}
496
- TCGv_ptr fpst;
163
}
497
-
164
}
498
- if (!dc_isar_feature(aa32_vminmaxnm, s)) {
165
- } else if ((insn & 0x0e000a00) == 0x0c000800
499
- return false;
166
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
500
- }
167
- if (disas_neon_insn_3same_ext(s, insn)) {
501
-
168
- goto illegal_op;
502
- /* UNDEF accesses to D16-D31 if they don't exist */
169
- }
503
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
170
- return;
504
- ((a->vm | a->vn | a->vd) & 0x10)) {
171
} else if ((insn & 0x0f000a00) == 0x0e000800
505
- return false;
172
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
506
- }
173
if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
507
- rd = a->vd;
174
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
508
- rn = a->vn;
175
}
509
- rm = a->vm;
176
break;
510
-
177
}
511
- if (!vfp_access_check(s)) {
178
- if ((insn & 0xfe000a00) == 0xfc000800
512
- return true;
179
+ if ((insn & 0xff000a00) == 0xfe000800
513
- }
180
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
514
-
181
/* The Thumb2 and ARM encodings are identical. */
515
- fpst = get_fpstatus_ptr(0);
182
- if (disas_neon_insn_3same_ext(s, insn)) {
516
-
183
- goto illegal_op;
517
- if (dp) {
184
- }
518
- TCGv_i64 frn, frm, dest;
185
- } else if ((insn & 0xff000a00) == 0xfe000800
519
-
186
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
520
- frn = tcg_temp_new_i64();
187
- /* The Thumb2 and ARM encodings are identical. */
521
- frm = tcg_temp_new_i64();
188
if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
522
- dest = tcg_temp_new_i64();
189
goto illegal_op;
523
-
190
}
524
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
525
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
526
- if (vmin) {
527
- gen_helper_vfp_minnumd(dest, frn, frm, fpst);
528
- } else {
529
- gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
530
- }
531
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
532
- tcg_temp_free_i64(frn);
533
- tcg_temp_free_i64(frm);
534
- tcg_temp_free_i64(dest);
535
- } else {
536
- TCGv_i32 frn, frm, dest;
537
-
538
- frn = tcg_temp_new_i32();
539
- frm = tcg_temp_new_i32();
540
- dest = tcg_temp_new_i32();
541
-
542
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
543
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
544
- if (vmin) {
545
- gen_helper_vfp_minnums(dest, frn, frm, fpst);
546
- } else {
547
- gen_helper_vfp_maxnums(dest, frn, frm, fpst);
548
- }
549
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
550
- tcg_temp_free_i32(frn);
551
- tcg_temp_free_i32(frm);
552
- tcg_temp_free_i32(dest);
553
- }
554
-
555
- tcg_temp_free_ptr(fpst);
556
- return true;
557
-}
558
-
559
-/*
560
- * Table for converting the most common AArch32 encoding of
561
- * rounding mode to arm_fprounding order (which matches the
562
- * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
563
- */
564
-static const uint8_t fp_decode_rm[] = {
565
- FPROUNDING_TIEAWAY,
566
- FPROUNDING_TIEEVEN,
567
- FPROUNDING_POSINF,
568
- FPROUNDING_NEGINF,
569
-};
570
-
571
-static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
572
-{
573
- uint32_t rd, rm;
574
- bool dp = a->dp;
575
- TCGv_ptr fpst;
576
- TCGv_i32 tcg_rmode;
577
- int rounding = fp_decode_rm[a->rm];
578
-
579
- if (!dc_isar_feature(aa32_vrint, s)) {
580
- return false;
581
- }
582
-
583
- /* UNDEF accesses to D16-D31 if they don't exist */
584
- if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
585
- ((a->vm | a->vd) & 0x10)) {
586
- return false;
587
- }
588
- rd = a->vd;
589
- rm = a->vm;
590
-
591
- if (!vfp_access_check(s)) {
592
- return true;
593
- }
594
-
595
- fpst = get_fpstatus_ptr(0);
596
-
597
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
598
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
599
-
600
- if (dp) {
601
- TCGv_i64 tcg_op;
602
- TCGv_i64 tcg_res;
603
- tcg_op = tcg_temp_new_i64();
604
- tcg_res = tcg_temp_new_i64();
605
- tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
606
- gen_helper_rintd(tcg_res, tcg_op, fpst);
607
- tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
608
- tcg_temp_free_i64(tcg_op);
609
- tcg_temp_free_i64(tcg_res);
610
- } else {
611
- TCGv_i32 tcg_op;
612
- TCGv_i32 tcg_res;
613
- tcg_op = tcg_temp_new_i32();
614
- tcg_res = tcg_temp_new_i32();
615
- tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
616
- gen_helper_rints(tcg_res, tcg_op, fpst);
617
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
618
- tcg_temp_free_i32(tcg_op);
619
- tcg_temp_free_i32(tcg_res);
620
- }
621
-
622
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
623
- tcg_temp_free_i32(tcg_rmode);
624
-
625
- tcg_temp_free_ptr(fpst);
626
- return true;
627
-}
628
-
629
-static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
630
-{
631
- uint32_t rd, rm;
632
- bool dp = a->dp;
633
- TCGv_ptr fpst;
634
- TCGv_i32 tcg_rmode, tcg_shift;
635
- int rounding = fp_decode_rm[a->rm];
636
- bool is_signed = a->op;
637
-
638
- if (!dc_isar_feature(aa32_vcvt_dr, s)) {
639
- return false;
640
- }
641
-
642
- /* UNDEF accesses to D16-D31 if they don't exist */
643
- if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
644
- return false;
645
- }
646
- rd = a->vd;
647
- rm = a->vm;
648
-
649
- if (!vfp_access_check(s)) {
650
- return true;
651
- }
652
-
653
- fpst = get_fpstatus_ptr(0);
654
-
655
- tcg_shift = tcg_const_i32(0);
656
-
657
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
658
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
659
-
660
- if (dp) {
661
- TCGv_i64 tcg_double, tcg_res;
662
- TCGv_i32 tcg_tmp;
663
- tcg_double = tcg_temp_new_i64();
664
- tcg_res = tcg_temp_new_i64();
665
- tcg_tmp = tcg_temp_new_i32();
666
- tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
667
- if (is_signed) {
668
- gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
669
- } else {
670
- gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
671
- }
672
- tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
673
- tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
674
- tcg_temp_free_i32(tcg_tmp);
675
- tcg_temp_free_i64(tcg_res);
676
- tcg_temp_free_i64(tcg_double);
677
- } else {
678
- TCGv_i32 tcg_single, tcg_res;
679
- tcg_single = tcg_temp_new_i32();
680
- tcg_res = tcg_temp_new_i32();
681
- tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
682
- if (is_signed) {
683
- gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
684
- } else {
685
- gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
686
- }
687
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
688
- tcg_temp_free_i32(tcg_res);
689
- tcg_temp_free_i32(tcg_single);
690
- }
691
-
692
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
693
- tcg_temp_free_i32(tcg_rmode);
694
-
695
- tcg_temp_free_i32(tcg_shift);
696
-
697
- tcg_temp_free_ptr(fpst);
698
-
699
- return true;
700
-}
701
-
702
/*
703
* Disassemble a VFP instruction. Returns nonzero if an error occurred
704
* (ie. an undefined instruction).
705
--
191
--
706
2.20.1
192
2.20.1
707
193
708
194
diff view generated by jsdifflib
1
Convert the VCVT integer-to-float instructions to decodetree.
1
Convert VCMLA (scalar) in the 2reg-scalar-ext group to decodetree.
2
2
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200430181003.21682-9-peter.maydell@linaro.org
5
---
6
---
6
target/arm/translate-vfp.inc.c | 58 ++++++++++++++++++++++++++++++++++
7
target/arm/neon-shared.decode | 5 +++++
7
target/arm/translate.c | 12 +------
8
target/arm/translate-neon.inc.c | 40 +++++++++++++++++++++++++++++++++
8
target/arm/vfp.decode | 6 ++++
9
target/arm/translate.c | 26 +--------------------
9
3 files changed, 65 insertions(+), 11 deletions(-)
10
3 files changed, 46 insertions(+), 25 deletions(-)
10
11
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
--- a/target/arm/neon-shared.decode
14
+++ b/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/neon-shared.decode
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
16
@@ -XXX,XX +XXX,XX @@ VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
16
tcg_temp_free_i64(vm);
17
vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
18
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
19
vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
20
+
21
+VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
22
+ vn=%vn_dp vd=%vd_dp size=0
23
+VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
24
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
28
+++ b/target/arm/translate-neon.inc.c
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VFML(DisasContext *s, arg_VFML *a)
30
gen_helper_gvec_fmlal_a32);
17
return true;
31
return true;
18
}
32
}
19
+
33
+
20
+static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
34
+static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
21
+{
35
+{
22
+ TCGv_i32 vm;
36
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
37
+ int opr_sz;
23
+ TCGv_ptr fpst;
38
+ TCGv_ptr fpst;
24
+
39
+
25
+ if (!vfp_access_check(s)) {
40
+ if (!dc_isar_feature(aa32_vcma, s)) {
26
+ return true;
41
+ return false;
42
+ }
43
+ if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
44
+ return false;
27
+ }
45
+ }
28
+
46
+
29
+ vm = tcg_temp_new_i32();
47
+ /* UNDEF accesses to D16-D31 if they don't exist. */
30
+ neon_load_reg32(vm, a->vm);
48
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
31
+ fpst = get_fpstatus_ptr(false);
49
+ ((a->vd | a->vn | a->vm) & 0x10)) {
32
+ if (a->s) {
50
+ return false;
33
+ /* i32 -> f32 */
34
+ gen_helper_vfp_sitos(vm, vm, fpst);
35
+ } else {
36
+ /* u32 -> f32 */
37
+ gen_helper_vfp_uitos(vm, vm, fpst);
38
+ }
51
+ }
39
+ neon_store_reg32(vm, a->vd);
40
+ tcg_temp_free_i32(vm);
41
+ tcg_temp_free_ptr(fpst);
42
+ return true;
43
+}
44
+
52
+
45
+static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
53
+ if ((a->vd | a->vn) & a->q) {
46
+{
47
+ TCGv_i32 vm;
48
+ TCGv_i64 vd;
49
+ TCGv_ptr fpst;
50
+
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
53
+ return false;
54
+ return false;
54
+ }
55
+ }
55
+
56
+
56
+ if (!vfp_access_check(s)) {
57
+ if (!vfp_access_check(s)) {
57
+ return true;
58
+ return true;
58
+ }
59
+ }
59
+
60
+
60
+ vm = tcg_temp_new_i32();
61
+ fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
61
+ vd = tcg_temp_new_i64();
62
+ : gen_helper_gvec_fcmlah_idx);
62
+ neon_load_reg32(vm, a->vm);
63
+ opr_sz = (1 + a->q) * 8;
63
+ fpst = get_fpstatus_ptr(false);
64
+ fpst = get_fpstatus_ptr(1);
64
+ if (a->s) {
65
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
65
+ /* i32 -> f64 */
66
+ vfp_reg_offset(1, a->vn),
66
+ gen_helper_vfp_sitod(vd, vm, fpst);
67
+ vfp_reg_offset(1, a->vm),
67
+ } else {
68
+ fpst, opr_sz, opr_sz,
68
+ /* u32 -> f64 */
69
+ (a->index << 2) | a->rot, fn_gvec_ptr);
69
+ gen_helper_vfp_uitod(vd, vm, fpst);
70
+ }
71
+ neon_store_reg64(vd, a->vd);
72
+ tcg_temp_free_i32(vm);
73
+ tcg_temp_free_i64(vd);
74
+ tcg_temp_free_ptr(fpst);
70
+ tcg_temp_free_ptr(fpst);
75
+ return true;
71
+ return true;
76
+}
72
+}
77
diff --git a/target/arm/translate.c b/target/arm/translate.c
73
diff --git a/target/arm/translate.c b/target/arm/translate.c
78
index XXXXXXX..XXXXXXX 100644
74
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/translate.c
75
--- a/target/arm/translate.c
80
+++ b/target/arm/translate.c
76
+++ b/target/arm/translate.c
81
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
77
@@ -XXX,XX +XXX,XX @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
82
return 1;
78
bool is_long = false, q = extract32(insn, 6, 1);
83
case 15:
79
bool ptr_is_env = false;
84
switch (rn) {
80
85
- case 0 ... 15:
81
- if ((insn & 0xff000f10) == 0xfe000800) {
86
+ case 0 ... 17:
82
- /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
87
/* Already handled by decodetree */
83
- int rot = extract32(insn, 20, 2);
88
return 1;
84
- int size = extract32(insn, 23, 1);
89
default:
85
- int index;
90
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
86
-
91
if (op == 15) {
87
- if (!dc_isar_feature(aa32_vcma, s)) {
92
/* rn is opcode, encoded as per VFP_SREG_N. */
88
- return 1;
93
switch (rn) {
89
- }
94
- case 0x10: /* vcvt.fxx.u32 */
90
- if (size == 0) {
95
- case 0x11: /* vcvt.fxx.s32 */
91
- if (!dc_isar_feature(aa32_fp16_arith, s)) {
96
- rm_is_dp = false;
92
- return 1;
97
- break;
93
- }
98
case 0x18: /* vcvtr.u32.fxx */
94
- /* For fp16, rm is just Vm, and index is M. */
99
case 0x19: /* vcvtz.u32.fxx */
95
- rm = extract32(insn, 0, 4);
100
case 0x1a: /* vcvtr.s32.fxx */
96
- index = extract32(insn, 5, 1);
101
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
97
- } else {
102
switch (op) {
98
- /* For fp32, rm is the usual M:Vm, and index is 0. */
103
case 15: /* extension space */
99
- VFP_DREG_M(rm, insn);
104
switch (rn) {
100
- index = 0;
105
- case 16: /* fuito */
101
- }
106
- gen_vfp_uito(dp, 0);
102
- data = (index << 2) | rot;
107
- break;
103
- fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
108
- case 17: /* fsito */
104
- : gen_helper_gvec_fcmlah_idx);
109
- gen_vfp_sito(dp, 0);
105
- } else if ((insn & 0xffb00f00) == 0xfe200d00) {
110
- break;
106
+ if ((insn & 0xffb00f00) == 0xfe200d00) {
111
case 19: /* vjcvt */
107
/* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
112
gen_helper_vjcvt(cpu_F0s, cpu_F0d, cpu_env);
108
int u = extract32(insn, 4, 1);
113
break;
109
114
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
115
index XXXXXXX..XXXXXXX 100644
116
--- a/target/arm/vfp.decode
117
+++ b/target/arm/vfp.decode
118
@@ -XXX,XX +XXX,XX @@ VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... \
119
vd=%vd_dp vm=%vm_sp
120
VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... \
121
vd=%vd_sp vm=%vm_dp
122
+
123
+# VCVT from integer to floating point: Vm always single; Vd depends on size
124
+VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
125
+ vd=%vd_sp vm=%vm_sp
126
+VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
127
+ vd=%vd_dp vm=%vm_sp
128
--
110
--
129
2.20.1
111
2.20.1
130
112
131
113
diff view generated by jsdifflib
1
Convert the float-to-integer VCVT instructions to decodetree.
1
Convert the V[US]DOT (scalar) insns in the 2reg-scalar-ext group
2
Since these are the last unconverted instructions, we can
2
to decodetree.
3
delete the old decoder structure entirely now.
4
3
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-10-peter.maydell@linaro.org
7
---
7
---
8
target/arm/translate-vfp.inc.c | 72 ++++++++++
8
target/arm/neon-shared.decode | 3 +++
9
target/arm/translate.c | 241 +--------------------------------
9
target/arm/translate-neon.inc.c | 35 +++++++++++++++++++++++++++++++++
10
target/arm/vfp.decode | 6 +
10
target/arm/translate.c | 13 +-----------
11
3 files changed, 80 insertions(+), 239 deletions(-)
11
3 files changed, 39 insertions(+), 12 deletions(-)
12
12
13
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-shared.decode
16
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-shared.decode
17
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
17
@@ -XXX,XX +XXX,XX @@ VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
18
vn=%vn_dp vd=%vd_dp size=0
19
VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0
21
+
22
+VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
24
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/translate-neon.inc.c
27
+++ b/target/arm/translate-neon.inc.c
28
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
18
tcg_temp_free_ptr(fpst);
29
tcg_temp_free_ptr(fpst);
19
return true;
30
return true;
20
}
31
}
21
+
32
+
22
+static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
33
+static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
23
+{
34
+{
24
+ TCGv_i32 vm;
35
+ gen_helper_gvec_3 *fn_gvec;
36
+ int opr_sz;
25
+ TCGv_ptr fpst;
37
+ TCGv_ptr fpst;
26
+
38
+
27
+ if (!vfp_access_check(s)) {
39
+ if (!dc_isar_feature(aa32_dp, s)) {
28
+ return true;
40
+ return false;
29
+ }
41
+ }
30
+
42
+
31
+ fpst = get_fpstatus_ptr(false);
43
+ /* UNDEF accesses to D16-D31 if they don't exist. */
32
+ vm = tcg_temp_new_i32();
44
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
33
+ neon_load_reg32(vm, a->vm);
45
+ ((a->vd | a->vn) & 0x10)) {
46
+ return false;
47
+ }
34
+
48
+
35
+ if (a->s) {
49
+ if ((a->vd | a->vn) & a->q) {
36
+ if (a->rz) {
37
+ gen_helper_vfp_tosizs(vm, vm, fpst);
38
+ } else {
39
+ gen_helper_vfp_tosis(vm, vm, fpst);
40
+ }
41
+ } else {
42
+ if (a->rz) {
43
+ gen_helper_vfp_touizs(vm, vm, fpst);
44
+ } else {
45
+ gen_helper_vfp_touis(vm, vm, fpst);
46
+ }
47
+ }
48
+ neon_store_reg32(vm, a->vd);
49
+ tcg_temp_free_i32(vm);
50
+ tcg_temp_free_ptr(fpst);
51
+ return true;
52
+}
53
+
54
+static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
55
+{
56
+ TCGv_i32 vd;
57
+ TCGv_i64 vm;
58
+ TCGv_ptr fpst;
59
+
60
+ /* UNDEF accesses to D16-D31 if they don't exist. */
61
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
62
+ return false;
50
+ return false;
63
+ }
51
+ }
64
+
52
+
65
+ if (!vfp_access_check(s)) {
53
+ if (!vfp_access_check(s)) {
66
+ return true;
54
+ return true;
67
+ }
55
+ }
68
+
56
+
69
+ fpst = get_fpstatus_ptr(false);
57
+ fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
70
+ vm = tcg_temp_new_i64();
58
+ opr_sz = (1 + a->q) * 8;
71
+ vd = tcg_temp_new_i32();
59
+ fpst = get_fpstatus_ptr(1);
72
+ neon_load_reg64(vm, a->vm);
60
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
73
+
61
+ vfp_reg_offset(1, a->vn),
74
+ if (a->s) {
62
+ vfp_reg_offset(1, a->rm),
75
+ if (a->rz) {
63
+ opr_sz, opr_sz, a->index, fn_gvec);
76
+ gen_helper_vfp_tosizd(vd, vm, fpst);
77
+ } else {
78
+ gen_helper_vfp_tosid(vd, vm, fpst);
79
+ }
80
+ } else {
81
+ if (a->rz) {
82
+ gen_helper_vfp_touizd(vd, vm, fpst);
83
+ } else {
84
+ gen_helper_vfp_touid(vd, vm, fpst);
85
+ }
86
+ }
87
+ neon_store_reg32(vd, a->vd);
88
+ tcg_temp_free_i32(vd);
89
+ tcg_temp_free_i64(vm);
90
+ tcg_temp_free_ptr(fpst);
64
+ tcg_temp_free_ptr(fpst);
91
+ return true;
65
+ return true;
92
+}
66
+}
93
diff --git a/target/arm/translate.c b/target/arm/translate.c
67
diff --git a/target/arm/translate.c b/target/arm/translate.c
94
index XXXXXXX..XXXXXXX 100644
68
index XXXXXXX..XXXXXXX 100644
95
--- a/target/arm/translate.c
69
--- a/target/arm/translate.c
96
+++ b/target/arm/translate.c
70
+++ b/target/arm/translate.c
97
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_##name(int dp, int neon) \
71
@@ -XXX,XX +XXX,XX @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
98
tcg_temp_free_ptr(statusptr); \
72
bool is_long = false, q = extract32(insn, 6, 1);
99
}
73
bool ptr_is_env = false;
100
74
101
-VFP_GEN_FTOI(toui)
75
- if ((insn & 0xffb00f00) == 0xfe200d00) {
102
VFP_GEN_FTOI(touiz)
76
- /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
103
-VFP_GEN_FTOI(tosi)
77
- int u = extract32(insn, 4, 1);
104
VFP_GEN_FTOI(tosiz)
105
#undef VFP_GEN_FTOI
106
107
@@ -XXX,XX +XXX,XX @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
108
}
109
110
#define tcg_gen_ld_f32 tcg_gen_ld_i32
111
-#define tcg_gen_ld_f64 tcg_gen_ld_i64
112
#define tcg_gen_st_f32 tcg_gen_st_i32
113
-#define tcg_gen_st_f64 tcg_gen_st_i64
114
-
78
-
115
-static inline void gen_mov_F0_vreg(int dp, int reg)
79
- if (!dc_isar_feature(aa32_dp, s)) {
116
-{
117
- if (dp)
118
- tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
119
- else
120
- tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
121
-}
122
-
123
-static inline void gen_mov_F1_vreg(int dp, int reg)
124
-{
125
- if (dp)
126
- tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
127
- else
128
- tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
129
-}
130
-
131
-static inline void gen_mov_vreg_F0(int dp, int reg)
132
-{
133
- if (dp)
134
- tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
135
- else
136
- tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
137
-}
138
139
#define ARM_CP_RW_BIT (1 << 20)
140
141
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
142
*/
143
static int disas_vfp_insn(DisasContext *s, uint32_t insn)
144
{
145
- uint32_t rd, rn, rm, op, delta_d, delta_m, bank_mask;
146
- int dp, veclen;
147
-
148
if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
149
return 1;
150
}
151
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
152
return 0;
153
}
154
}
155
-
156
- if (extract32(insn, 28, 4) == 0xf) {
157
- /*
158
- * Encodings with T=1 (Thumb) or unconditional (ARM): these
159
- * were all handled by the decodetree decoder, so any insn
160
- * patterns which get here must be UNDEF.
161
- */
162
- return 1;
163
- }
164
-
165
- /*
166
- * FIXME: this access check should not take precedence over UNDEF
167
- * for invalid encodings; we will generate incorrect syndrome information
168
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
169
- */
170
- if (!vfp_access_check(s)) {
171
- return 0;
172
- }
173
-
174
- dp = ((insn & 0xf00) == 0xb00);
175
- switch ((insn >> 24) & 0xf) {
176
- case 0xe:
177
- if (insn & (1 << 4)) {
178
- /* already handled by decodetree */
179
- return 1;
80
- return 1;
180
- } else {
181
- /* data processing */
182
- bool rd_is_dp = dp;
183
- bool rm_is_dp = dp;
184
- bool no_output = false;
185
-
186
- /* The opcode is in bits 23, 21, 20 and 6. */
187
- op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
188
- rn = VFP_SREG_N(insn);
189
-
190
- switch (op) {
191
- case 0 ... 14:
192
- /* Already handled by decodetree */
193
- return 1;
194
- case 15:
195
- switch (rn) {
196
- case 0 ... 23:
197
- case 28 ... 31:
198
- /* Already handled by decodetree */
199
- return 1;
200
- default:
201
- break;
202
- }
203
- default:
204
- break;
205
- }
206
-
207
- if (op == 15) {
208
- /* rn is opcode, encoded as per VFP_SREG_N. */
209
- switch (rn) {
210
- case 0x18: /* vcvtr.u32.fxx */
211
- case 0x19: /* vcvtz.u32.fxx */
212
- case 0x1a: /* vcvtr.s32.fxx */
213
- case 0x1b: /* vcvtz.s32.fxx */
214
- rd_is_dp = false;
215
- break;
216
-
217
- default:
218
- return 1;
219
- }
220
- } else if (dp) {
221
- /* rn is register number */
222
- VFP_DREG_N(rn, insn);
223
- }
224
-
225
- if (rd_is_dp) {
226
- VFP_DREG_D(rd, insn);
227
- } else {
228
- rd = VFP_SREG_D(insn);
229
- }
230
- if (rm_is_dp) {
231
- VFP_DREG_M(rm, insn);
232
- } else {
233
- rm = VFP_SREG_M(insn);
234
- }
235
-
236
- veclen = s->vec_len;
237
- if (op == 15 && rn > 3) {
238
- veclen = 0;
239
- }
240
-
241
- /* Shut up compiler warnings. */
242
- delta_m = 0;
243
- delta_d = 0;
244
- bank_mask = 0;
245
-
246
- if (veclen > 0) {
247
- if (dp)
248
- bank_mask = 0xc;
249
- else
250
- bank_mask = 0x18;
251
-
252
- /* Figure out what type of vector operation this is. */
253
- if ((rd & bank_mask) == 0) {
254
- /* scalar */
255
- veclen = 0;
256
- } else {
257
- if (dp)
258
- delta_d = (s->vec_stride >> 1) + 1;
259
- else
260
- delta_d = s->vec_stride + 1;
261
-
262
- if ((rm & bank_mask) == 0) {
263
- /* mixed scalar/vector */
264
- delta_m = 0;
265
- } else {
266
- /* vector */
267
- delta_m = delta_d;
268
- }
269
- }
270
- }
271
-
272
- /* Load the initial operands. */
273
- if (op == 15) {
274
- switch (rn) {
275
- default:
276
- /* One source operand. */
277
- gen_mov_F0_vreg(rm_is_dp, rm);
278
- break;
279
- }
280
- } else {
281
- /* Two source operands. */
282
- gen_mov_F0_vreg(dp, rn);
283
- gen_mov_F1_vreg(dp, rm);
284
- }
285
-
286
- for (;;) {
287
- /* Perform the calculation. */
288
- switch (op) {
289
- case 15: /* extension space */
290
- switch (rn) {
291
- case 24: /* ftoui */
292
- gen_vfp_toui(dp, 0);
293
- break;
294
- case 25: /* ftouiz */
295
- gen_vfp_touiz(dp, 0);
296
- break;
297
- case 26: /* ftosi */
298
- gen_vfp_tosi(dp, 0);
299
- break;
300
- case 27: /* ftosiz */
301
- gen_vfp_tosiz(dp, 0);
302
- break;
303
- default: /* undefined */
304
- g_assert_not_reached();
305
- }
306
- break;
307
- default: /* undefined */
308
- return 1;
309
- }
310
-
311
- /* Write back the result, if any. */
312
- if (!no_output) {
313
- gen_mov_vreg_F0(rd_is_dp, rd);
314
- }
315
-
316
- /* break out of the loop if we have finished */
317
- if (veclen == 0) {
318
- break;
319
- }
320
-
321
- if (op == 15 && delta_m == 0) {
322
- /* single source one-many */
323
- while (veclen--) {
324
- rd = ((rd + delta_d) & (bank_mask - 1))
325
- | (rd & bank_mask);
326
- gen_mov_vreg_F0(dp, rd);
327
- }
328
- break;
329
- }
330
- /* Setup the next operands. */
331
- veclen--;
332
- rd = ((rd + delta_d) & (bank_mask - 1))
333
- | (rd & bank_mask);
334
-
335
- if (op == 15) {
336
- /* One source operand. */
337
- rm = ((rm + delta_m) & (bank_mask - 1))
338
- | (rm & bank_mask);
339
- gen_mov_F0_vreg(dp, rm);
340
- } else {
341
- /* Two source operands. */
342
- rn = ((rn + delta_d) & (bank_mask - 1))
343
- | (rn & bank_mask);
344
- gen_mov_F0_vreg(dp, rn);
345
- if (delta_m) {
346
- rm = ((rm + delta_m) & (bank_mask - 1))
347
- | (rm & bank_mask);
348
- gen_mov_F1_vreg(dp, rm);
349
- }
350
- }
351
- }
352
- }
81
- }
353
- break;
82
- fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
354
- case 0xc:
83
- /* rm is just Vm, and index is M. */
355
- case 0xd:
84
- data = extract32(insn, 5, 1); /* index */
356
- /* Already handled by decodetree */
85
- rm = extract32(insn, 0, 4);
357
- return 1;
86
- } else if ((insn & 0xffa00f10) == 0xfe000810) {
358
- default:
87
+ if ((insn & 0xffa00f10) == 0xfe000810) {
359
- /* Should never happen. */
88
/* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
360
- return 1;
89
int is_s = extract32(insn, 20, 1);
361
- }
90
int vm20 = extract32(insn, 0, 3);
362
- return 0;
363
+ /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
364
+ return 1;
365
}
366
367
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
368
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
369
index XXXXXXX..XXXXXXX 100644
370
--- a/target/arm/vfp.decode
371
+++ b/target/arm/vfp.decode
372
@@ -XXX,XX +XXX,XX @@ VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
373
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
374
VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
375
vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
376
+
377
+# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
378
+VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
379
+ vd=%vd_sp vm=%vm_sp
380
+VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
381
+ vd=%vd_sp vm=%vm_dp
382
--
91
--
383
2.20.1
92
2.20.1
384
93
385
94
diff view generated by jsdifflib
1
Convert the VCVTT and VCVTB instructions which convert from
1
Convert the VFM[AS]L (scalar) insns in the 2reg-scalar-ext group
2
f32 and f64 to f16 to decodetree.
2
to decodetree. These are the last ones in the group so we can remove
3
3
all the legacy decode for the group.
4
Since we're no longer constrained to the old decoder's style
4
5
using cpu_F0s and cpu_F0d we can perform a direct 16 bit
5
Note that in disas_thumb2_insn() the parts of this encoding space
6
store of the right half of the input single-precision register
6
where the decodetree decoder returns false will correctly be directed
7
rather than doing a load/modify/store sequence on the full
7
to illegal_op by the "(insn & (1 << 28))" check so they won't fall
8
32 bits.
8
into disas_coproc_insn() by mistake.
9
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20200430181003.21682-11-peter.maydell@linaro.org
12
---
13
---
13
target/arm/translate-vfp.inc.c | 62 ++++++++++++++++++++++++++
14
target/arm/neon-shared.decode | 7 +++
14
target/arm/translate.c | 79 +---------------------------------
15
target/arm/translate-neon.inc.c | 32 ++++++++++
15
target/arm/vfp.decode | 6 +++
16
target/arm/translate.c | 107 +-------------------------------
16
3 files changed, 69 insertions(+), 78 deletions(-)
17
3 files changed, 40 insertions(+), 106 deletions(-)
17
18
18
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
19
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
19
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate-vfp.inc.c
21
--- a/target/arm/neon-shared.decode
21
+++ b/target/arm/translate-vfp.inc.c
22
+++ b/target/arm/neon-shared.decode
22
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
23
@@ -XXX,XX +XXX,XX @@ VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
23
tcg_temp_free_i64(vd);
24
25
VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
26
vm=%vm_dp vn=%vn_dp vd=%vd_dp
27
+
28
+%vfml_scalar_q0_rm 0:3 5:1
29
+%vfml_scalar_q1_index 5:1 3:1
30
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
31
+ rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
32
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
33
+ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-neon.inc.c
37
+++ b/target/arm/translate-neon.inc.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
39
tcg_temp_free_ptr(fpst);
24
return true;
40
return true;
25
}
41
}
26
+
42
+
27
+static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
43
+static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
28
+{
44
+{
29
+ TCGv_ptr fpst;
45
+ int opr_sz;
30
+ TCGv_i32 ahp_mode;
46
+
31
+ TCGv_i32 tmp;
47
+ if (!dc_isar_feature(aa32_fhm, s)) {
32
+
48
+ return false;
33
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
49
+ }
50
+
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
53
+ ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
54
+ return false;
55
+ }
56
+
57
+ if (a->vd & a->q) {
34
+ return false;
58
+ return false;
35
+ }
59
+ }
36
+
60
+
37
+ if (!vfp_access_check(s)) {
61
+ if (!vfp_access_check(s)) {
38
+ return true;
62
+ return true;
39
+ }
63
+ }
40
+
64
+
41
+ fpst = get_fpstatus_ptr(false);
65
+ opr_sz = (1 + a->q) * 8;
42
+ ahp_mode = get_ahp_flag();
66
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
43
+ tmp = tcg_temp_new_i32();
67
+ vfp_reg_offset(a->q, a->vn),
44
+
68
+ vfp_reg_offset(a->q, a->rm),
45
+ neon_load_reg32(tmp, a->vm);
69
+ cpu_env, opr_sz, opr_sz,
46
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
70
+ (a->index << 2) | a->s, /* is_2 == 0 */
47
+ tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
71
+ gen_helper_gvec_fmlal_idx_a32);
48
+ tcg_temp_free_i32(ahp_mode);
49
+ tcg_temp_free_ptr(fpst);
50
+ tcg_temp_free_i32(tmp);
51
+ return true;
52
+}
53
+
54
+static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
55
+{
56
+ TCGv_ptr fpst;
57
+ TCGv_i32 ahp_mode;
58
+ TCGv_i32 tmp;
59
+ TCGv_i64 vm;
60
+
61
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
62
+ return false;
63
+ }
64
+
65
+ /* UNDEF accesses to D16-D31 if they don't exist. */
66
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
67
+ return false;
68
+ }
69
+
70
+ if (!vfp_access_check(s)) {
71
+ return true;
72
+ }
73
+
74
+ fpst = get_fpstatus_ptr(false);
75
+ ahp_mode = get_ahp_flag();
76
+ tmp = tcg_temp_new_i32();
77
+ vm = tcg_temp_new_i64();
78
+
79
+ neon_load_reg64(vm, a->vm);
80
+ gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
81
+ tcg_temp_free_i64(vm);
82
+ tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
83
+ tcg_temp_free_i32(ahp_mode);
84
+ tcg_temp_free_ptr(fpst);
85
+ tcg_temp_free_i32(tmp);
86
+ return true;
72
+ return true;
87
+}
73
+}
88
diff --git a/target/arm/translate.c b/target/arm/translate.c
74
diff --git a/target/arm/translate.c b/target/arm/translate.c
89
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
90
--- a/target/arm/translate.c
76
--- a/target/arm/translate.c
91
+++ b/target/arm/translate.c
77
+++ b/target/arm/translate.c
92
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
78
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
93
#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
79
}
80
81
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
82
-#define VFP_SREG(insn, bigbit, smallbit) \
83
- ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
84
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
85
if (dc_isar_feature(aa32_simd_r32, s)) { \
86
reg = (((insn) >> (bigbit)) & 0x0f) \
87
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
88
reg = ((insn) >> (bigbit)) & 0x0f; \
89
}} while (0)
90
91
-#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
92
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
93
-#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
94
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
95
-#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
94
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
96
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
95
97
96
-/* Move between integer and VFP cores. */
98
static void gen_neon_dup_low16(TCGv_i32 var)
97
-static TCGv_i32 gen_vfp_mrs(void)
99
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
100
return 0;
101
}
102
103
-/* Advanced SIMD two registers and a scalar extension.
104
- * 31 24 23 22 20 16 12 11 10 9 8 3 0
105
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
106
- * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
107
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
108
- *
109
- */
110
-
111
-static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
98
-{
112
-{
99
- TCGv_i32 tmp = tcg_temp_new_i32();
113
- gen_helper_gvec_3 *fn_gvec = NULL;
100
- tcg_gen_mov_i32(tmp, cpu_F0s);
114
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
101
- return tmp;
115
- int rd, rn, rm, opr_sz, data;
116
- int off_rn, off_rm;
117
- bool is_long = false, q = extract32(insn, 6, 1);
118
- bool ptr_is_env = false;
119
-
120
- if ((insn & 0xffa00f10) == 0xfe000810) {
121
- /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
122
- int is_s = extract32(insn, 20, 1);
123
- int vm20 = extract32(insn, 0, 3);
124
- int vm3 = extract32(insn, 3, 1);
125
- int m = extract32(insn, 5, 1);
126
- int index;
127
-
128
- if (!dc_isar_feature(aa32_fhm, s)) {
129
- return 1;
130
- }
131
- if (q) {
132
- rm = vm20;
133
- index = m * 2 + vm3;
134
- } else {
135
- rm = vm20 * 2 + m;
136
- index = vm3;
137
- }
138
- is_long = true;
139
- data = (index << 2) | is_s; /* is_2 == 0 */
140
- fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
141
- ptr_is_env = true;
142
- } else {
143
- return 1;
144
- }
145
-
146
- VFP_DREG_D(rd, insn);
147
- if (rd & q) {
148
- return 1;
149
- }
150
- if (q || !is_long) {
151
- VFP_DREG_N(rn, insn);
152
- if (rn & q & !is_long) {
153
- return 1;
154
- }
155
- off_rn = vfp_reg_offset(1, rn);
156
- off_rm = vfp_reg_offset(1, rm);
157
- } else {
158
- rn = VFP_SREG_N(insn);
159
- off_rn = vfp_reg_offset(0, rn);
160
- off_rm = vfp_reg_offset(0, rm);
161
- }
162
- if (s->fp_excp_el) {
163
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
164
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
165
- return 0;
166
- }
167
- if (!s->vfp_enabled) {
168
- return 1;
169
- }
170
-
171
- opr_sz = (1 + q) * 8;
172
- if (fn_gvec_ptr) {
173
- TCGv_ptr ptr;
174
- if (ptr_is_env) {
175
- ptr = cpu_env;
176
- } else {
177
- ptr = get_fpstatus_ptr(1);
178
- }
179
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
180
- opr_sz, opr_sz, data, fn_gvec_ptr);
181
- if (!ptr_is_env) {
182
- tcg_temp_free_ptr(ptr);
183
- }
184
- } else {
185
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
186
- opr_sz, opr_sz, data, fn_gvec);
187
- }
188
- return 0;
102
-}
189
-}
103
-
190
-
104
-static void gen_vfp_msr(TCGv_i32 tmp)
191
static int disas_coproc_insn(DisasContext *s, uint32_t insn)
105
-{
106
- tcg_gen_mov_i32(cpu_F0s, tmp);
107
- tcg_temp_free_i32(tmp);
108
-}
109
-
110
static void gen_neon_dup_low16(TCGv_i32 var)
111
{
192
{
112
TCGv_i32 tmp = tcg_temp_new_i32();
193
int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
113
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
194
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
114
{
195
}
115
uint32_t rd, rn, rm, op, delta_d, delta_m, bank_mask;
196
}
116
int dp, veclen;
197
}
117
- TCGv_i32 tmp;
198
- } else if ((insn & 0x0f000a00) == 0x0e000800
118
- TCGv_i32 tmp2;
199
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
119
200
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
120
if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
201
- goto illegal_op;
121
return 1;
202
- }
122
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
203
- return;
123
return 1;
204
}
124
case 15:
205
goto illegal_op;
125
switch (rn) {
206
}
126
- case 0 ... 5:
207
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
127
- case 8 ... 11:
208
}
128
+ case 0 ... 11:
209
break;
129
/* Already handled by decodetree */
210
}
130
return 1;
211
- if ((insn & 0xff000a00) == 0xfe000800
131
default:
212
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
132
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
213
- /* The Thumb2 and ARM encodings are identical. */
133
if (op == 15) {
214
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
134
/* rn is opcode, encoded as per VFP_SREG_N. */
215
- goto illegal_op;
135
switch (rn) {
216
- }
136
- case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
217
- } else if (((insn >> 24) & 3) == 3) {
137
- case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
218
+ if (((insn >> 24) & 3) == 3) {
138
- if (dp) {
219
/* Translate into the equivalent ARM encoding. */
139
- if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
220
insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
140
- return 1;
221
if (disas_neon_data_insn(s, insn)) {
141
- }
142
- } else {
143
- if (!dc_isar_feature(aa32_fp16_spconv, s)) {
144
- return 1;
145
- }
146
- }
147
- rd_is_dp = false;
148
- break;
149
-
150
case 0x0c: /* vrintr */
151
case 0x0d: /* vrintz */
152
case 0x0e: /* vrintx */
153
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
154
switch (op) {
155
case 15: /* extension space */
156
switch (rn) {
157
- case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
158
- {
159
- TCGv_ptr fpst = get_fpstatus_ptr(false);
160
- TCGv_i32 ahp = get_ahp_flag();
161
- tmp = tcg_temp_new_i32();
162
-
163
- if (dp) {
164
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
165
- fpst, ahp);
166
- } else {
167
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
168
- fpst, ahp);
169
- }
170
- tcg_temp_free_i32(ahp);
171
- tcg_temp_free_ptr(fpst);
172
- gen_mov_F0_vreg(0, rd);
173
- tmp2 = gen_vfp_mrs();
174
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
175
- tcg_gen_or_i32(tmp, tmp, tmp2);
176
- tcg_temp_free_i32(tmp2);
177
- gen_vfp_msr(tmp);
178
- break;
179
- }
180
- case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
181
- {
182
- TCGv_ptr fpst = get_fpstatus_ptr(false);
183
- TCGv_i32 ahp = get_ahp_flag();
184
- tmp = tcg_temp_new_i32();
185
- if (dp) {
186
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
187
- fpst, ahp);
188
- } else {
189
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
190
- fpst, ahp);
191
- }
192
- tcg_temp_free_i32(ahp);
193
- tcg_temp_free_ptr(fpst);
194
- tcg_gen_shli_i32(tmp, tmp, 16);
195
- gen_mov_F0_vreg(0, rd);
196
- tmp2 = gen_vfp_mrs();
197
- tcg_gen_ext16u_i32(tmp2, tmp2);
198
- tcg_gen_or_i32(tmp, tmp, tmp2);
199
- tcg_temp_free_i32(tmp2);
200
- gen_vfp_msr(tmp);
201
- break;
202
- }
203
case 12: /* vrintr */
204
{
205
TCGv_ptr fpst = get_fpstatus_ptr(0);
206
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
207
index XXXXXXX..XXXXXXX 100644
208
--- a/target/arm/vfp.decode
209
+++ b/target/arm/vfp.decode
210
@@ -XXX,XX +XXX,XX @@ VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
211
vd=%vd_sp vm=%vm_sp
212
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
213
vd=%vd_dp vm=%vm_sp
214
+
215
+# VCVTB and VCVTT to f16: Vd format is always vd_sp; Vm format depends on size bit
216
+VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
217
+ vd=%vd_sp vm=%vm_sp
218
+VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
219
+ vd=%vd_sp vm=%vm_dp
220
--
222
--
221
2.20.1
223
2.20.1
222
224
223
225
diff view generated by jsdifflib
1
Convert the VFP load/store multiple insns to decodetree.
1
Convert the Neon "load/store multiple structures" insns to decodetree.
2
This includes tightening up the UNDEF checking for pre-VFPv3
3
CPUs which only have D0-D15 : they now UNDEF for any access
4
to D16-D31, not merely when the smallest register in the
5
transfer list is in D16-D31.
6
7
This conversion does not try to share code between the single
8
precision and the double precision versions; this looks a bit
9
duplicative of code, but it leaves the door open for a future
10
refactoring which gets rid of the use of the "F0" registers
11
by inlining the various functions like gen_vfp_ld() and
12
gen_mov_F0_reg() which are hiding "if (dp) { ... } else { ... }"
13
conditionalisation.
14
2
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200430181003.21682-12-peter.maydell@linaro.org
17
---
6
---
18
target/arm/translate-vfp.inc.c | 162 +++++++++++++++++++++++++++++++++
7
target/arm/neon-ls.decode | 7 ++
19
target/arm/translate.c | 97 +-------------------
8
target/arm/translate-neon.inc.c | 124 ++++++++++++++++++++++++++++++++
20
target/arm/vfp.decode | 18 ++++
9
target/arm/translate.c | 91 +----------------------
21
3 files changed, 183 insertions(+), 94 deletions(-)
10
3 files changed, 133 insertions(+), 89 deletions(-)
22
11
23
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
24
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/translate-vfp.inc.c
14
--- a/target/arm/neon-ls.decode
26
+++ b/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/neon-ls.decode
27
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
16
@@ -XXX,XX +XXX,XX @@
28
17
# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
18
# This file works on the A32 encoding only; calling code for T32 has to
19
# transform the insn into the A32 version first.
20
+
21
+%vd_dp 22:1 12:4
22
+
23
+# Neon load/store multiple structures
24
+
25
+VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
26
+ vd=%vd_dp
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-neon.inc.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
32
gen_helper_gvec_fmlal_idx_a32);
29
return true;
33
return true;
30
}
34
}
31
+
35
+
32
+static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
36
+static struct {
37
+ int nregs;
38
+ int interleave;
39
+ int spacing;
40
+} const neon_ls_element_type[11] = {
41
+ {1, 4, 1},
42
+ {1, 4, 2},
43
+ {4, 1, 1},
44
+ {2, 2, 2},
45
+ {1, 3, 1},
46
+ {1, 3, 2},
47
+ {3, 1, 1},
48
+ {1, 1, 1},
49
+ {1, 2, 1},
50
+ {1, 2, 2},
51
+ {2, 1, 1}
52
+};
53
+
54
+static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
55
+ int stride)
33
+{
56
+{
34
+ uint32_t offset;
57
+ if (rm != 15) {
35
+ TCGv_i32 addr;
58
+ TCGv_i32 base;
36
+ int i, n;
59
+
37
+
60
+ base = load_reg(s, rn);
38
+ n = a->imm;
61
+ if (rm == 13) {
39
+
62
+ tcg_gen_addi_i32(base, base, stride);
40
+ if (n == 0 || (a->vd + n) > 32) {
63
+ } else {
41
+ /*
64
+ TCGv_i32 index;
42
+ * UNPREDICTABLE cases for bad immediates: we choose to
65
+ index = load_reg(s, rm);
43
+ * UNDEF to avoid generating huge numbers of TCG ops
66
+ tcg_gen_add_i32(base, base, index);
44
+ */
67
+ tcg_temp_free_i32(index);
45
+ return false;
68
+ }
46
+ }
69
+ store_reg(s, rn, base);
47
+ if (a->rn == 15 && a->w) {
70
+ }
48
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
71
+}
72
+
73
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
74
+{
75
+ /* Neon load/store multiple structures */
76
+ int nregs, interleave, spacing, reg, n;
77
+ MemOp endian = s->be_data;
78
+ int mmu_idx = get_mem_index(s);
79
+ int size = a->size;
80
+ TCGv_i64 tmp64;
81
+ TCGv_i32 addr, tmp;
82
+
83
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
84
+ return false;
85
+ }
86
+
87
+ /* UNDEF accesses to D16-D31 if they don't exist */
88
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
89
+ return false;
90
+ }
91
+ if (a->itype > 10) {
92
+ return false;
93
+ }
94
+ /* Catch UNDEF cases for bad values of align field */
95
+ switch (a->itype & 0xc) {
96
+ case 4:
97
+ if (a->align >= 2) {
98
+ return false;
99
+ }
100
+ break;
101
+ case 8:
102
+ if (a->align == 3) {
103
+ return false;
104
+ }
105
+ break;
106
+ default:
107
+ break;
108
+ }
109
+ nregs = neon_ls_element_type[a->itype].nregs;
110
+ interleave = neon_ls_element_type[a->itype].interleave;
111
+ spacing = neon_ls_element_type[a->itype].spacing;
112
+ if (size == 3 && (interleave | spacing) != 1) {
49
+ return false;
113
+ return false;
50
+ }
114
+ }
51
+
115
+
52
+ if (!vfp_access_check(s)) {
116
+ if (!vfp_access_check(s)) {
53
+ return true;
117
+ return true;
54
+ }
118
+ }
55
+
119
+
56
+ if (s->thumb && a->rn == 15) {
120
+ /* For our purposes, bytes are always little-endian. */
57
+ /* This is actually UNPREDICTABLE */
121
+ if (size == 0) {
58
+ addr = tcg_temp_new_i32();
122
+ endian = MO_LE;
59
+ tcg_gen_movi_i32(addr, s->pc & ~2);
123
+ }
60
+ } else {
124
+ /*
61
+ addr = load_reg(s, a->rn);
125
+ * Consecutive little-endian elements from a single register
62
+ }
126
+ * can be promoted to a larger little-endian operation.
63
+ if (a->p) {
127
+ */
64
+ /* pre-decrement */
128
+ if (interleave == 1 && endian == MO_LE) {
65
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
129
+ size = 3;
66
+ }
130
+ }
67
+
131
+ tmp64 = tcg_temp_new_i64();
68
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
132
+ addr = tcg_temp_new_i32();
69
+ /*
133
+ tmp = tcg_const_i32(1 << size);
70
+ * Here 'addr' is the lowest address we will store to,
134
+ load_reg_var(s, addr, a->rn);
71
+ * and is either the old SP (if post-increment) or
135
+ for (reg = 0; reg < nregs; reg++) {
72
+ * the new SP (if pre-decrement). For post-increment
136
+ for (n = 0; n < 8 >> size; n++) {
73
+ * where the old value is below the limit and the new
137
+ int xs;
74
+ * value is above, it is UNKNOWN whether the limit check
138
+ for (xs = 0; xs < interleave; xs++) {
75
+ * triggers; we choose to trigger.
139
+ int tt = a->vd + reg + spacing * xs;
76
+ */
140
+
77
+ gen_helper_v8m_stackcheck(cpu_env, addr);
141
+ if (a->l) {
78
+ }
142
+ gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
79
+
143
+ neon_store_element64(tt, n, size, tmp64);
80
+ offset = 4;
144
+ } else {
81
+ for (i = 0; i < n; i++) {
145
+ neon_load_element64(tmp64, tt, n, size);
82
+ if (a->l) {
146
+ gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
83
+ /* load */
147
+ }
84
+ gen_vfp_ld(s, false, addr);
148
+ tcg_gen_add_i32(addr, addr, tmp);
85
+ gen_mov_vreg_F0(false, a->vd + i);
149
+ }
86
+ } else {
150
+ }
87
+ /* store */
151
+ }
88
+ gen_mov_F0_vreg(false, a->vd + i);
152
+ tcg_temp_free_i32(addr);
89
+ gen_vfp_st(s, false, addr);
153
+ tcg_temp_free_i32(tmp);
90
+ }
154
+ tcg_temp_free_i64(tmp64);
91
+ tcg_gen_addi_i32(addr, addr, offset);
155
+
92
+ }
156
+ gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
93
+ if (a->w) {
94
+ /* writeback */
95
+ if (a->p) {
96
+ offset = -offset * n;
97
+ tcg_gen_addi_i32(addr, addr, offset);
98
+ }
99
+ store_reg(s, a->rn, addr);
100
+ } else {
101
+ tcg_temp_free_i32(addr);
102
+ }
103
+
104
+ return true;
105
+}
106
+
107
+static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
108
+{
109
+ uint32_t offset;
110
+ TCGv_i32 addr;
111
+ int i, n;
112
+
113
+ n = a->imm >> 1;
114
+
115
+ if (n == 0 || (a->vd + n) > 32 || n > 16) {
116
+ /*
117
+ * UNPREDICTABLE cases for bad immediates: we choose to
118
+ * UNDEF to avoid generating huge numbers of TCG ops
119
+ */
120
+ return false;
121
+ }
122
+ if (a->rn == 15 && a->w) {
123
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
124
+ return false;
125
+ }
126
+
127
+ /* UNDEF accesses to D16-D31 if they don't exist */
128
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd + n) > 16) {
129
+ return false;
130
+ }
131
+
132
+ if (!vfp_access_check(s)) {
133
+ return true;
134
+ }
135
+
136
+ if (s->thumb && a->rn == 15) {
137
+ /* This is actually UNPREDICTABLE */
138
+ addr = tcg_temp_new_i32();
139
+ tcg_gen_movi_i32(addr, s->pc & ~2);
140
+ } else {
141
+ addr = load_reg(s, a->rn);
142
+ }
143
+ if (a->p) {
144
+ /* pre-decrement */
145
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
146
+ }
147
+
148
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
149
+ /*
150
+ * Here 'addr' is the lowest address we will store to,
151
+ * and is either the old SP (if post-increment) or
152
+ * the new SP (if pre-decrement). For post-increment
153
+ * where the old value is below the limit and the new
154
+ * value is above, it is UNKNOWN whether the limit check
155
+ * triggers; we choose to trigger.
156
+ */
157
+ gen_helper_v8m_stackcheck(cpu_env, addr);
158
+ }
159
+
160
+ offset = 8;
161
+ for (i = 0; i < n; i++) {
162
+ if (a->l) {
163
+ /* load */
164
+ gen_vfp_ld(s, true, addr);
165
+ gen_mov_vreg_F0(true, a->vd + i);
166
+ } else {
167
+ /* store */
168
+ gen_mov_F0_vreg(true, a->vd + i);
169
+ gen_vfp_st(s, true, addr);
170
+ }
171
+ tcg_gen_addi_i32(addr, addr, offset);
172
+ }
173
+ if (a->w) {
174
+ /* writeback */
175
+ if (a->p) {
176
+ offset = -offset * n;
177
+ } else if (a->imm & 1) {
178
+ offset = 4;
179
+ } else {
180
+ offset = 0;
181
+ }
182
+
183
+ if (offset != 0) {
184
+ tcg_gen_addi_i32(addr, addr, offset);
185
+ }
186
+ store_reg(s, a->rn, addr);
187
+ } else {
188
+ tcg_temp_free_i32(addr);
189
+ }
190
+
191
+ return true;
157
+ return true;
192
+}
158
+}
193
diff --git a/target/arm/translate.c b/target/arm/translate.c
159
diff --git a/target/arm/translate.c b/target/arm/translate.c
194
index XXXXXXX..XXXXXXX 100644
160
index XXXXXXX..XXXXXXX 100644
195
--- a/target/arm/translate.c
161
--- a/target/arm/translate.c
196
+++ b/target/arm/translate.c
162
+++ b/target/arm/translate.c
197
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
163
@@ -XXX,XX +XXX,XX @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
198
*/
164
}
199
static int disas_vfp_insn(DisasContext *s, uint32_t insn)
165
166
167
-static struct {
168
- int nregs;
169
- int interleave;
170
- int spacing;
171
-} const neon_ls_element_type[11] = {
172
- {1, 4, 1},
173
- {1, 4, 2},
174
- {4, 1, 1},
175
- {2, 2, 2},
176
- {1, 3, 1},
177
- {1, 3, 2},
178
- {3, 1, 1},
179
- {1, 1, 1},
180
- {1, 2, 1},
181
- {1, 2, 2},
182
- {2, 1, 1}
183
-};
184
-
185
/* Translate a NEON load/store element instruction. Return nonzero if the
186
instruction is invalid. */
187
static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
200
{
188
{
201
- uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
189
int rd, rn, rm;
202
+ uint32_t rd, rn, rm, op, i, n, delta_d, delta_m, bank_mask;
190
- int op;
203
int dp, veclen;
191
int nregs;
204
- TCGv_i32 addr;
192
- int interleave;
193
- int spacing;
194
int stride;
195
int size;
196
int reg;
197
int load;
198
- int n;
199
int vec_size;
200
- int mmu_idx;
201
- MemOp endian;
202
TCGv_i32 addr;
205
TCGv_i32 tmp;
203
TCGv_i32 tmp;
206
TCGv_i32 tmp2;
204
- TCGv_i32 tmp2;
207
205
- TCGv_i64 tmp64;
208
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
206
209
break;
207
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
210
case 0xc:
208
return 1;
211
case 0xd:
209
@@ -XXX,XX +XXX,XX @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
212
- if ((insn & 0x03e00000) == 0x00400000) {
210
rn = (insn >> 16) & 0xf;
213
- /* Already handled by decodetree */
211
rm = insn & 0xf;
212
load = (insn & (1 << 21)) != 0;
213
- endian = s->be_data;
214
- mmu_idx = get_mem_index(s);
215
if ((insn & (1 << 23)) == 0) {
216
- /* Load store all elements. */
217
- op = (insn >> 8) & 0xf;
218
- size = (insn >> 6) & 3;
219
- if (op > 10)
214
- return 1;
220
- return 1;
215
- } else {
221
- /* Catch UNDEF cases for bad values of align field */
216
- /* Load/store */
222
- switch (op & 0xc) {
217
- rn = (insn >> 16) & 0xf;
223
- case 4:
218
- if (dp)
224
- if (((insn >> 5) & 1) == 1) {
219
- VFP_DREG_D(rd, insn);
220
- else
221
- rd = VFP_SREG_D(insn);
222
- if ((insn & 0x01200000) == 0x01000000) {
223
- /* Already handled by decodetree */
224
- return 1;
225
- return 1;
225
- } else {
226
- }
226
- /* load/store multiple */
227
- break;
227
- int w = insn & (1 << 21);
228
- case 8:
228
- if (dp)
229
- if (((insn >> 4) & 3) == 3) {
229
- n = (insn >> 1) & 0x7f;
230
- return 1;
230
- else
231
- }
231
- n = insn & 0xff;
232
- break;
233
- default:
234
- break;
235
- }
236
- nregs = neon_ls_element_type[op].nregs;
237
- interleave = neon_ls_element_type[op].interleave;
238
- spacing = neon_ls_element_type[op].spacing;
239
- if (size == 3 && (interleave | spacing) != 1) {
240
- return 1;
241
- }
242
- /* For our purposes, bytes are always little-endian. */
243
- if (size == 0) {
244
- endian = MO_LE;
245
- }
246
- /* Consecutive little-endian elements from a single register
247
- * can be promoted to a larger little-endian operation.
248
- */
249
- if (interleave == 1 && endian == MO_LE) {
250
- size = 3;
251
- }
252
- tmp64 = tcg_temp_new_i64();
253
- addr = tcg_temp_new_i32();
254
- tmp2 = tcg_const_i32(1 << size);
255
- load_reg_var(s, addr, rn);
256
- for (reg = 0; reg < nregs; reg++) {
257
- for (n = 0; n < 8 >> size; n++) {
258
- int xs;
259
- for (xs = 0; xs < interleave; xs++) {
260
- int tt = rd + reg + spacing * xs;
232
-
261
-
233
- if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
262
- if (load) {
234
- /* P == U , W == 1 => UNDEF */
263
- gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
235
- return 1;
264
- neon_store_element64(tt, n, size, tmp64);
236
- }
237
- if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
238
- /* UNPREDICTABLE cases for bad immediates: we choose to
239
- * UNDEF to avoid generating huge numbers of TCG ops
240
- */
241
- return 1;
242
- }
243
- if (rn == 15 && w) {
244
- /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
245
- return 1;
246
- }
247
-
248
- if (s->thumb && rn == 15) {
249
- /* This is actually UNPREDICTABLE */
250
- addr = tcg_temp_new_i32();
251
- tcg_gen_movi_i32(addr, s->pc & ~2);
252
- } else {
253
- addr = load_reg(s, rn);
254
- }
255
- if (insn & (1 << 24)) /* pre-decrement */
256
- tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
257
-
258
- if (s->v8m_stackcheck && rn == 13 && w) {
259
- /*
260
- * Here 'addr' is the lowest address we will store to,
261
- * and is either the old SP (if post-increment) or
262
- * the new SP (if pre-decrement). For post-increment
263
- * where the old value is below the limit and the new
264
- * value is above, it is UNKNOWN whether the limit check
265
- * triggers; we choose to trigger.
266
- */
267
- gen_helper_v8m_stackcheck(cpu_env, addr);
268
- }
269
-
270
- if (dp)
271
- offset = 8;
272
- else
273
- offset = 4;
274
- for (i = 0; i < n; i++) {
275
- if (insn & ARM_CP_RW_BIT) {
276
- /* load */
277
- gen_vfp_ld(s, dp, addr);
278
- gen_mov_vreg_F0(dp, rd + i);
279
- } else {
265
- } else {
280
- /* store */
266
- neon_load_element64(tmp64, tt, n, size);
281
- gen_mov_F0_vreg(dp, rd + i);
267
- gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
282
- gen_vfp_st(s, dp, addr);
283
- }
268
- }
284
- tcg_gen_addi_i32(addr, addr, offset);
269
- tcg_gen_add_i32(addr, addr, tmp2);
285
- }
286
- if (w) {
287
- /* writeback */
288
- if (insn & (1 << 24))
289
- offset = -offset * n;
290
- else if (dp && (insn & 1))
291
- offset = 4;
292
- else
293
- offset = 0;
294
-
295
- if (offset != 0)
296
- tcg_gen_addi_i32(addr, addr, offset);
297
- store_reg(s, rn, addr);
298
- } else {
299
- tcg_temp_free_i32(addr);
300
- }
270
- }
301
- }
271
- }
302
- }
272
- }
303
- break;
273
- tcg_temp_free_i32(addr);
304
+ /* Already handled by decodetree */
274
- tcg_temp_free_i32(tmp2);
275
- tcg_temp_free_i64(tmp64);
276
- stride = nregs * interleave * 8;
277
+ /* Load store all elements -- handled already by decodetree */
305
+ return 1;
278
+ return 1;
306
default:
279
} else {
307
/* Should never happen. */
280
size = (insn >> 10) & 3;
308
return 1;
281
if (size == 3) {
309
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
310
index XXXXXXX..XXXXXXX 100644
311
--- a/target/arm/vfp.decode
312
+++ b/target/arm/vfp.decode
313
@@ -XXX,XX +XXX,XX @@ VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 \
314
vd=%vd_sp
315
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 \
316
vd=%vd_dp
317
+
318
+# We split the load/store multiple up into two patterns to avoid
319
+# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
320
+# grouping:
321
+# P=0 U=0 W=0 is 64-bit VMOV
322
+# P=1 W=0 is VLDR/VSTR
323
+# P=U W=1 is UNDEF
324
+# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
325
+# These include FSTM/FLDM.
326
+VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
327
+ vd=%vd_sp p=0 u=1
328
+VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
329
+ vd=%vd_dp p=0 u=1
330
+
331
+VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
332
+ vd=%vd_sp p=1 u=0 w=1
333
+VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
334
+ vd=%vd_dp p=1 u=0 w=1
335
--
282
--
336
2.20.1
283
2.20.1
337
284
338
285
diff view generated by jsdifflib
1
Convert the VFP two-register transfer instructions to decodetree
1
Convert the Neon "load single structure to all lanes" insns to
2
(in the v8 Arm ARM these are the "Advanced SIMD and floating-point
2
decodetree.
3
64-bit move" encoding group).
4
5
Again, we expand out the sequences involving gen_vfp_msr() and
6
gen_msr_vfp().
7
3
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-13-peter.maydell@linaro.org
10
---
7
---
11
target/arm/translate-vfp.inc.c | 70 ++++++++++++++++++++++++++++++++++
8
target/arm/neon-ls.decode | 5 +++
12
target/arm/translate.c | 46 +---------------------
9
target/arm/translate-neon.inc.c | 73 +++++++++++++++++++++++++++++++++
13
target/arm/vfp.decode | 5 +++
10
target/arm/translate.c | 55 +------------------------
14
3 files changed, 77 insertions(+), 44 deletions(-)
11
3 files changed, 80 insertions(+), 53 deletions(-)
15
12
16
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-ls.decode
19
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-ls.decode
20
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
17
@@ -XXX,XX +XXX,XX @@
21
18
19
VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
20
vd=%vd_dp
21
+
22
+# Neon load single element to all lanes
23
+
24
+VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
25
+ vd=%vd_dp
26
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-neon.inc.c
29
+++ b/target/arm/translate-neon.inc.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
31
gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
22
return true;
32
return true;
23
}
33
}
24
+
34
+
25
+static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
35
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
26
+{
36
+{
27
+ TCGv_i32 tmp;
37
+ /* Neon load single structure to all lanes */
38
+ int reg, stride, vec_size;
39
+ int vd = a->vd;
40
+ int size = a->size;
41
+ int nregs = a->n + 1;
42
+ TCGv_i32 addr, tmp;
28
+
43
+
29
+ /*
44
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
30
+ * VMOV between two general-purpose registers and two single precision
45
+ return false;
31
+ * floating point registers
32
+ */
33
+ if (!vfp_access_check(s)) {
34
+ return true;
35
+ }
46
+ }
36
+
47
+
37
+ if (a->op) {
48
+ /* UNDEF accesses to D16-D31 if they don't exist */
38
+ /* fpreg to gpreg */
49
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
39
+ tmp = tcg_temp_new_i32();
50
+ return false;
40
+ neon_load_reg32(tmp, a->vm);
41
+ store_reg(s, a->rt, tmp);
42
+ tmp = tcg_temp_new_i32();
43
+ neon_load_reg32(tmp, a->vm + 1);
44
+ store_reg(s, a->rt2, tmp);
45
+ } else {
46
+ /* gpreg to fpreg */
47
+ tmp = load_reg(s, a->rt);
48
+ neon_store_reg32(tmp, a->vm);
49
+ tmp = load_reg(s, a->rt2);
50
+ neon_store_reg32(tmp, a->vm + 1);
51
+ }
51
+ }
52
+
52
+
53
+ return true;
53
+ if (size == 3) {
54
+}
54
+ if (nregs != 4 || a->a == 0) {
55
+
55
+ return false;
56
+static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a)
56
+ }
57
+{
57
+ /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
58
+ TCGv_i32 tmp;
58
+ size = 2;
59
+
59
+ }
60
+ /*
60
+ if (nregs == 1 && a->a == 1 && size == 0) {
61
+ * VMOV between two general-purpose registers and one double precision
61
+ return false;
62
+ * floating point register
62
+ }
63
+ */
63
+ if (nregs == 3 && a->a == 1) {
64
+
65
+ /* UNDEF accesses to D16-D31 if they don't exist */
66
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
67
+ return false;
64
+ return false;
68
+ }
65
+ }
69
+
66
+
70
+ if (!vfp_access_check(s)) {
67
+ if (!vfp_access_check(s)) {
71
+ return true;
68
+ return true;
72
+ }
69
+ }
73
+
70
+
74
+ if (a->op) {
71
+ /*
75
+ /* fpreg to gpreg */
72
+ * VLD1 to all lanes: T bit indicates how many Dregs to write.
76
+ tmp = tcg_temp_new_i32();
73
+ * VLD2/3/4 to all lanes: T bit indicates register stride.
77
+ neon_load_reg32(tmp, a->vm * 2);
74
+ */
78
+ store_reg(s, a->rt, tmp);
75
+ stride = a->t ? 2 : 1;
79
+ tmp = tcg_temp_new_i32();
76
+ vec_size = nregs == 1 ? stride * 8 : 8;
80
+ neon_load_reg32(tmp, a->vm * 2 + 1);
77
+
81
+ store_reg(s, a->rt2, tmp);
78
+ tmp = tcg_temp_new_i32();
82
+ } else {
79
+ addr = tcg_temp_new_i32();
83
+ /* gpreg to fpreg */
80
+ load_reg_var(s, addr, a->rn);
84
+ tmp = load_reg(s, a->rt);
81
+ for (reg = 0; reg < nregs; reg++) {
85
+ neon_store_reg32(tmp, a->vm * 2);
82
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
86
+ tcg_temp_free_i32(tmp);
83
+ s->be_data | size);
87
+ tmp = load_reg(s, a->rt2);
84
+ if ((vd & 1) && vec_size == 16) {
88
+ neon_store_reg32(tmp, a->vm * 2 + 1);
85
+ /*
89
+ tcg_temp_free_i32(tmp);
86
+ * We cannot write 16 bytes at once because the
87
+ * destination is unaligned.
88
+ */
89
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
90
+ 8, 8, tmp);
91
+ tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
92
+ neon_reg_offset(vd, 0), 8, 8);
93
+ } else {
94
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
95
+ vec_size, vec_size, tmp);
96
+ }
97
+ tcg_gen_addi_i32(addr, addr, 1 << size);
98
+ vd += stride;
90
+ }
99
+ }
100
+ tcg_temp_free_i32(tmp);
101
+ tcg_temp_free_i32(addr);
102
+
103
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
91
+
104
+
92
+ return true;
105
+ return true;
93
+}
106
+}
94
diff --git a/target/arm/translate.c b/target/arm/translate.c
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
95
index XXXXXXX..XXXXXXX 100644
108
index XXXXXXX..XXXXXXX 100644
96
--- a/target/arm/translate.c
109
--- a/target/arm/translate.c
97
+++ b/target/arm/translate.c
110
+++ b/target/arm/translate.c
98
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
99
case 0xc:
112
int size;
100
case 0xd:
113
int reg;
101
if ((insn & 0x03e00000) == 0x00400000) {
114
int load;
102
- /* two-register transfer */
115
- int vec_size;
103
- rn = (insn >> 16) & 0xf;
116
TCGv_i32 addr;
104
- rd = (insn >> 12) & 0xf;
117
TCGv_i32 tmp;
105
- if (dp) {
118
106
- VFP_DREG_M(rm, insn);
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
107
- } else {
120
} else {
108
- rm = VFP_SREG_M(insn);
121
size = (insn >> 10) & 3;
122
if (size == 3) {
123
- /* Load single element to all lanes. */
124
- int a = (insn >> 4) & 1;
125
- if (!load) {
126
- return 1;
109
- }
127
- }
128
- size = (insn >> 6) & 3;
129
- nregs = ((insn >> 8) & 3) + 1;
110
-
130
-
111
- if (insn & ARM_CP_RW_BIT) {
131
- if (size == 3) {
112
- /* vfp->arm */
132
- if (nregs != 4 || a == 0) {
113
- if (dp) {
133
- return 1;
114
- gen_mov_F0_vreg(0, rm * 2);
134
- }
115
- tmp = gen_vfp_mrs();
135
- /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
116
- store_reg(s, rd, tmp);
136
- size = 2;
117
- gen_mov_F0_vreg(0, rm * 2 + 1);
137
- }
118
- tmp = gen_vfp_mrs();
138
- if (nregs == 1 && a == 1 && size == 0) {
119
- store_reg(s, rn, tmp);
139
- return 1;
140
- }
141
- if (nregs == 3 && a == 1) {
142
- return 1;
143
- }
144
- addr = tcg_temp_new_i32();
145
- load_reg_var(s, addr, rn);
146
-
147
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
148
- * VLD2/3/4 to all lanes: bit 5 indicates register stride.
149
- */
150
- stride = (insn & (1 << 5)) ? 2 : 1;
151
- vec_size = nregs == 1 ? stride * 8 : 8;
152
-
153
- tmp = tcg_temp_new_i32();
154
- for (reg = 0; reg < nregs; reg++) {
155
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
156
- s->be_data | size);
157
- if ((rd & 1) && vec_size == 16) {
158
- /* We cannot write 16 bytes at once because the
159
- * destination is unaligned.
160
- */
161
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
162
- 8, 8, tmp);
163
- tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
164
- neon_reg_offset(rd, 0), 8, 8);
120
- } else {
165
- } else {
121
- gen_mov_F0_vreg(0, rm);
166
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
122
- tmp = gen_vfp_mrs();
167
- vec_size, vec_size, tmp);
123
- store_reg(s, rd, tmp);
124
- gen_mov_F0_vreg(0, rm + 1);
125
- tmp = gen_vfp_mrs();
126
- store_reg(s, rn, tmp);
127
- }
168
- }
128
- } else {
169
- tcg_gen_addi_i32(addr, addr, 1 << size);
129
- /* arm->vfp */
170
- rd += stride;
130
- if (dp) {
131
- tmp = load_reg(s, rd);
132
- gen_vfp_msr(tmp);
133
- gen_mov_vreg_F0(0, rm * 2);
134
- tmp = load_reg(s, rn);
135
- gen_vfp_msr(tmp);
136
- gen_mov_vreg_F0(0, rm * 2 + 1);
137
- } else {
138
- tmp = load_reg(s, rd);
139
- gen_vfp_msr(tmp);
140
- gen_mov_vreg_F0(0, rm);
141
- tmp = load_reg(s, rn);
142
- gen_vfp_msr(tmp);
143
- gen_mov_vreg_F0(0, rm + 1);
144
- }
145
- }
171
- }
146
+ /* Already handled by decodetree */
172
- tcg_temp_free_i32(tmp);
173
- tcg_temp_free_i32(addr);
174
- stride = (1 << size) * nregs;
175
+ /* Load single element to all lanes -- handled by decodetree */
147
+ return 1;
176
+ return 1;
148
} else {
177
} else {
149
/* Load/store */
178
/* Single element. */
150
rn = (insn >> 16) & 0xf;
179
int idx = (insn >> 4) & 0xf;
151
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
152
index XXXXXXX..XXXXXXX 100644
153
--- a/target/arm/vfp.decode
154
+++ b/target/arm/vfp.decode
155
@@ -XXX,XX +XXX,XX @@ VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
156
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
157
VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 \
158
vn=%vn_sp
159
+
160
+VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... \
161
+ vm=%vm_sp
162
+VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... \
163
+ vm=%vm_dp
164
--
180
--
165
2.20.1
181
2.20.1
166
182
167
183
diff view generated by jsdifflib
1
Convert the "single-precision" register moves to decodetree:
1
Convert the Neon "load/store single structure to one lane" insns to
2
* VMSR
2
decodetree.
3
* VMRS
3
4
* VMOV between general purpose register and single precision
4
As this is the last set of insns in the neon load/store group,
5
5
we can remove the whole disas_neon_ls_insn() function.
6
Note that the VMSR/VMRS conversions make our handling of
7
the "should this UNDEF?" checks consistent between the two
8
instructions:
9
* VMSR to MVFR0, MVFR1, MVFR2 now UNDEF from EL0
10
(previously was a nop)
11
* VMSR to FPSID now UNDEFs from EL0 or if VFPv3 or better
12
(previously was a nop)
13
* VMSR to FPINST and FPINST2 now UNDEF if VFPv3 or better
14
(previously would write to the register, which had no
15
guest-visible effect because we always UNDEF reads)
16
17
We also tighten up the decode: we were previously underdecoding
18
some SBZ or SBO bits.
19
20
The conversion of VMOV_single includes the expansion out of the
21
gen_mov_F0_vreg()/gen_vfp_mrs() and gen_mov_vreg_F0()/gen_vfp_msr()
22
sequences into the simpler direct load/store of the TCG temp via
23
neon_{load,store}_reg32(): we know in the new function that we're
24
always single-precision, we don't need to use the old-and-deprecated
25
cpu_F0* TCG globals, and we don't happen to have the declaration of
26
gen_vfp_msr() and gen_vfp_mrs() at the point in the file where the
27
new function is.
28
6
29
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
30
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20200430181003.21682-14-peter.maydell@linaro.org
31
---
10
---
32
target/arm/translate-vfp.inc.c | 161 +++++++++++++++++++++++++++++++++
11
target/arm/neon-ls.decode | 11 +++
33
target/arm/translate.c | 148 +-----------------------------
12
target/arm/translate-neon.inc.c | 89 +++++++++++++++++++
34
target/arm/vfp.decode | 4 +
13
target/arm/translate.c | 147 --------------------------------
35
3 files changed, 168 insertions(+), 145 deletions(-)
14
3 files changed, 100 insertions(+), 147 deletions(-)
36
15
37
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
16
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
38
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-vfp.inc.c
18
--- a/target/arm/neon-ls.decode
40
+++ b/target/arm/translate-vfp.inc.c
19
+++ b/target/arm/neon-ls.decode
41
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
20
@@ -XXX,XX +XXX,XX @@ VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
21
22
VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
23
vd=%vd_dp
24
+
25
+# Neon load/store single structure to one lane
26
+%imm1_5_p1 5:1 !function=plus1
27
+%imm1_6_p1 6:1 !function=plus1
28
+
29
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
30
+ vd=%vd_dp size=0 stride=1
31
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \
32
+ vd=%vd_dp size=1 stride=%imm1_5_p1
33
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \
34
+ vd=%vd_dp size=2 stride=%imm1_6_p1
35
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/translate-neon.inc.c
38
+++ b/target/arm/translate-neon.inc.c
39
@@ -XXX,XX +XXX,XX @@
40
* It might be possible to convert it to a standalone .c file eventually.
41
*/
42
43
+static inline int plus1(DisasContext *s, int x)
44
+{
45
+ return x + 1;
46
+}
47
+
48
/* Include the generated Neon decoder */
49
#include "decode-neon-dp.inc.c"
50
#include "decode-neon-ls.inc.c"
51
@@ -XXX,XX +XXX,XX @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
42
52
43
return true;
53
return true;
44
}
54
}
45
+
55
+
46
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
56
+static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
47
+{
57
+{
48
+ TCGv_i32 tmp;
58
+ /* Neon load/store single structure to one lane */
49
+ bool ignore_vfp_enabled = false;
59
+ int reg;
50
+
60
+ int nregs = a->n + 1;
51
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
61
+ int vd = a->vd;
52
+ /*
62
+ TCGv_i32 addr, tmp;
53
+ * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
63
+
54
+ * Writes to R15 are UNPREDICTABLE; we choose to undef.
64
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
55
+ */
65
+ return false;
56
+ if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) {
66
+ }
67
+
68
+ /* UNDEF accesses to D16-D31 if they don't exist */
69
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
70
+ return false;
71
+ }
72
+
73
+ /* Catch the UNDEF cases. This is unavoidably a bit messy. */
74
+ switch (nregs) {
75
+ case 1:
76
+ if (((a->align & (1 << a->size)) != 0) ||
77
+ (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
57
+ return false;
78
+ return false;
58
+ }
79
+ }
59
+ }
80
+ break;
60
+
81
+ case 3:
61
+ switch (a->reg) {
82
+ if ((a->align & 1) != 0) {
62
+ case ARM_VFP_FPSID:
63
+ /*
64
+ * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
65
+ * all ID registers to privileged access only.
66
+ */
67
+ if (IS_USER(s) && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
68
+ return false;
83
+ return false;
69
+ }
84
+ }
70
+ ignore_vfp_enabled = true;
85
+ /* fall through */
86
+ case 2:
87
+ if (a->size == 2 && (a->align & 2) != 0) {
88
+ return false;
89
+ }
71
+ break;
90
+ break;
72
+ case ARM_VFP_MVFR0:
91
+ case 4:
73
+ case ARM_VFP_MVFR1:
92
+ if ((a->size == 2) && ((a->align & 3) == 3)) {
74
+ if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
75
+ return false;
76
+ }
77
+ ignore_vfp_enabled = true;
78
+ break;
79
+ case ARM_VFP_MVFR2:
80
+ if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
81
+ return false;
82
+ }
83
+ ignore_vfp_enabled = true;
84
+ break;
85
+ case ARM_VFP_FPSCR:
86
+ break;
87
+ case ARM_VFP_FPEXC:
88
+ if (IS_USER(s)) {
89
+ return false;
90
+ }
91
+ ignore_vfp_enabled = true;
92
+ break;
93
+ case ARM_VFP_FPINST:
94
+ case ARM_VFP_FPINST2:
95
+ /* Not present in VFPv3 */
96
+ if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
97
+ return false;
93
+ return false;
98
+ }
94
+ }
99
+ break;
95
+ break;
100
+ default:
96
+ default:
97
+ abort();
98
+ }
99
+ if ((vd + a->stride * (nregs - 1)) > 31) {
100
+ /*
101
+ * Attempts to write off the end of the register file are
102
+ * UNPREDICTABLE; we choose to UNDEF because otherwise we would
103
+ * access off the end of the array that holds the register data.
104
+ */
101
+ return false;
105
+ return false;
102
+ }
106
+ }
103
+
104
+ if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
105
+ return true;
106
+ }
107
+
108
+ if (a->l) {
109
+ /* VMRS, move VFP special register to gp register */
110
+ switch (a->reg) {
111
+ case ARM_VFP_FPSID:
112
+ case ARM_VFP_FPEXC:
113
+ case ARM_VFP_FPINST:
114
+ case ARM_VFP_FPINST2:
115
+ case ARM_VFP_MVFR0:
116
+ case ARM_VFP_MVFR1:
117
+ case ARM_VFP_MVFR2:
118
+ tmp = load_cpu_field(vfp.xregs[a->reg]);
119
+ break;
120
+ case ARM_VFP_FPSCR:
121
+ if (a->rt == 15) {
122
+ tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
123
+ tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
124
+ } else {
125
+ tmp = tcg_temp_new_i32();
126
+ gen_helper_vfp_get_fpscr(tmp, cpu_env);
127
+ }
128
+ break;
129
+ default:
130
+ g_assert_not_reached();
131
+ }
132
+
133
+ if (a->rt == 15) {
134
+ /* Set the 4 flag bits in the CPSR. */
135
+ gen_set_nzcv(tmp);
136
+ tcg_temp_free_i32(tmp);
137
+ } else {
138
+ store_reg(s, a->rt, tmp);
139
+ }
140
+ } else {
141
+ /* VMSR, move gp register to VFP special register */
142
+ switch (a->reg) {
143
+ case ARM_VFP_FPSID:
144
+ case ARM_VFP_MVFR0:
145
+ case ARM_VFP_MVFR1:
146
+ case ARM_VFP_MVFR2:
147
+ /* Writes are ignored. */
148
+ break;
149
+ case ARM_VFP_FPSCR:
150
+ tmp = load_reg(s, a->rt);
151
+ gen_helper_vfp_set_fpscr(cpu_env, tmp);
152
+ tcg_temp_free_i32(tmp);
153
+ gen_lookup_tb(s);
154
+ break;
155
+ case ARM_VFP_FPEXC:
156
+ /*
157
+ * TODO: VFP subarchitecture support.
158
+ * For now, keep the EN bit only
159
+ */
160
+ tmp = load_reg(s, a->rt);
161
+ tcg_gen_andi_i32(tmp, tmp, 1 << 30);
162
+ store_cpu_field(tmp, vfp.xregs[a->reg]);
163
+ gen_lookup_tb(s);
164
+ break;
165
+ case ARM_VFP_FPINST:
166
+ case ARM_VFP_FPINST2:
167
+ tmp = load_reg(s, a->rt);
168
+ store_cpu_field(tmp, vfp.xregs[a->reg]);
169
+ break;
170
+ default:
171
+ g_assert_not_reached();
172
+ }
173
+ }
174
+
175
+ return true;
176
+}
177
+
178
+static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
179
+{
180
+ TCGv_i32 tmp;
181
+
107
+
182
+ if (!vfp_access_check(s)) {
108
+ if (!vfp_access_check(s)) {
183
+ return true;
109
+ return true;
184
+ }
110
+ }
185
+
111
+
186
+ if (a->l) {
112
+ tmp = tcg_temp_new_i32();
187
+ /* VFP to general purpose register */
113
+ addr = tcg_temp_new_i32();
188
+ tmp = tcg_temp_new_i32();
114
+ load_reg_var(s, addr, a->rn);
189
+ neon_load_reg32(tmp, a->vn);
115
+ /*
190
+ if (a->rt == 15) {
116
+ * TODO: if we implemented alignment exceptions, we should check
191
+ /* Set the 4 flag bits in the CPSR. */
117
+ * addr against the alignment encoded in a->align here.
192
+ gen_set_nzcv(tmp);
118
+ */
193
+ tcg_temp_free_i32(tmp);
119
+ for (reg = 0; reg < nregs; reg++) {
194
+ } else {
120
+ if (a->l) {
195
+ store_reg(s, a->rt, tmp);
121
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
196
+ }
122
+ s->be_data | a->size);
197
+ } else {
123
+ neon_store_element(vd, a->reg_idx, a->size, tmp);
198
+ /* general purpose register to VFP */
124
+ } else { /* Store */
199
+ tmp = load_reg(s, a->rt);
125
+ neon_load_element(tmp, vd, a->reg_idx, a->size);
200
+ neon_store_reg32(tmp, a->vn);
126
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
201
+ tcg_temp_free_i32(tmp);
127
+ s->be_data | a->size);
202
+ }
128
+ }
129
+ vd += a->stride;
130
+ tcg_gen_addi_i32(addr, addr, 1 << a->size);
131
+ }
132
+ tcg_temp_free_i32(addr);
133
+ tcg_temp_free_i32(tmp);
134
+
135
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
203
+
136
+
204
+ return true;
137
+ return true;
205
+}
138
+}
206
diff --git a/target/arm/translate.c b/target/arm/translate.c
139
diff --git a/target/arm/translate.c b/target/arm/translate.c
207
index XXXXXXX..XXXXXXX 100644
140
index XXXXXXX..XXXXXXX 100644
208
--- a/target/arm/translate.c
141
--- a/target/arm/translate.c
209
+++ b/target/arm/translate.c
142
+++ b/target/arm/translate.c
210
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
143
@@ -XXX,XX +XXX,XX @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
211
TCGv_i32 addr;
144
tcg_temp_free_i32(rd);
212
TCGv_i32 tmp;
145
}
213
TCGv_i32 tmp2;
146
214
- bool ignore_vfp_enabled = false;
147
-
215
148
-/* Translate a NEON load/store element instruction. Return nonzero if the
216
if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
149
- instruction is invalid. */
217
return 1;
150
-static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
218
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
151
-{
219
* for invalid encodings; we will generate incorrect syndrome information
152
- int rd, rn, rm;
220
* for attempts to execute invalid vfp/neon encodings with FP disabled.
153
- int nregs;
221
*/
154
- int stride;
222
- if ((insn & 0x0fe00fff) == 0x0ee00a10) {
155
- int size;
223
- rn = (insn >> 16) & 0xf;
156
- int reg;
224
- if (rn == ARM_VFP_FPSID || rn == ARM_VFP_FPEXC || rn == ARM_VFP_MVFR2
157
- int load;
225
- || rn == ARM_VFP_MVFR1 || rn == ARM_VFP_MVFR0) {
158
- TCGv_i32 addr;
226
- ignore_vfp_enabled = true;
159
- TCGv_i32 tmp;
160
-
161
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
162
- return 1;
163
- }
164
-
165
- /* FIXME: this access check should not take precedence over UNDEF
166
- * for invalid encodings; we will generate incorrect syndrome information
167
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
168
- */
169
- if (s->fp_excp_el) {
170
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
171
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
172
- return 0;
173
- }
174
-
175
- if (!s->vfp_enabled)
176
- return 1;
177
- VFP_DREG_D(rd, insn);
178
- rn = (insn >> 16) & 0xf;
179
- rm = insn & 0xf;
180
- load = (insn & (1 << 21)) != 0;
181
- if ((insn & (1 << 23)) == 0) {
182
- /* Load store all elements -- handled already by decodetree */
183
- return 1;
184
- } else {
185
- size = (insn >> 10) & 3;
186
- if (size == 3) {
187
- /* Load single element to all lanes -- handled by decodetree */
188
- return 1;
189
- } else {
190
- /* Single element. */
191
- int idx = (insn >> 4) & 0xf;
192
- int reg_idx;
193
- switch (size) {
194
- case 0:
195
- reg_idx = (insn >> 5) & 7;
196
- stride = 1;
197
- break;
198
- case 1:
199
- reg_idx = (insn >> 6) & 3;
200
- stride = (insn & (1 << 5)) ? 2 : 1;
201
- break;
202
- case 2:
203
- reg_idx = (insn >> 7) & 1;
204
- stride = (insn & (1 << 6)) ? 2 : 1;
205
- break;
206
- default:
207
- abort();
208
- }
209
- nregs = ((insn >> 8) & 3) + 1;
210
- /* Catch the UNDEF cases. This is unavoidably a bit messy. */
211
- switch (nregs) {
212
- case 1:
213
- if (((idx & (1 << size)) != 0) ||
214
- (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
215
- return 1;
216
- }
217
- break;
218
- case 3:
219
- if ((idx & 1) != 0) {
220
- return 1;
221
- }
222
- /* fall through */
223
- case 2:
224
- if (size == 2 && (idx & 2) != 0) {
225
- return 1;
226
- }
227
- break;
228
- case 4:
229
- if ((size == 2) && ((idx & 3) == 3)) {
230
- return 1;
231
- }
232
- break;
233
- default:
234
- abort();
235
- }
236
- if ((rd + stride * (nregs - 1)) > 31) {
237
- /* Attempts to write off the end of the register file
238
- * are UNPREDICTABLE; we choose to UNDEF because otherwise
239
- * the neon_load_reg() would write off the end of the array.
240
- */
241
- return 1;
242
- }
243
- tmp = tcg_temp_new_i32();
244
- addr = tcg_temp_new_i32();
245
- load_reg_var(s, addr, rn);
246
- for (reg = 0; reg < nregs; reg++) {
247
- if (load) {
248
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
249
- s->be_data | size);
250
- neon_store_element(rd, reg_idx, size, tmp);
251
- } else { /* Store */
252
- neon_load_element(tmp, rd, reg_idx, size);
253
- gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
254
- s->be_data | size);
255
- }
256
- rd += stride;
257
- tcg_gen_addi_i32(addr, addr, 1 << size);
258
- }
259
- tcg_temp_free_i32(addr);
260
- tcg_temp_free_i32(tmp);
261
- stride = nregs * (1 << size);
227
- }
262
- }
228
- }
263
- }
229
- if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
264
- if (rm != 15) {
230
+ if (!vfp_access_check(s)) {
265
- TCGv_i32 base;
231
return 0;
266
-
232
}
267
- base = load_reg(s, rn);
233
268
- if (rm == 13) {
234
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
269
- tcg_gen_addi_i32(base, base, stride);
235
switch ((insn >> 24) & 0xf) {
270
- } else {
236
case 0xe:
271
- TCGv_i32 index;
237
if (insn & (1 << 4)) {
272
- index = load_reg(s, rm);
238
- /* single register transfer */
273
- tcg_gen_add_i32(base, base, index);
239
- rd = (insn >> 12) & 0xf;
274
- tcg_temp_free_i32(index);
240
- if (dp) {
275
- }
241
- /* already handled by decodetree */
276
- store_reg(s, rn, base);
242
- return 1;
277
- }
243
- } else { /* !dp */
278
- return 0;
244
- bool is_sysreg;
279
-}
245
-
280
-
246
- if ((insn & 0x6f) != 0x00)
281
static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
247
- return 1;
282
{
248
- rn = VFP_SREG_N(insn);
283
switch (size) {
249
-
284
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
250
- is_sysreg = extract32(insn, 21, 1);
285
}
251
-
286
return;
252
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
287
}
253
- /*
288
- if ((insn & 0x0f100000) == 0x04000000) {
254
- * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
289
- /* NEON load/store. */
255
- * Writes to R15 are UNPREDICTABLE; we choose to undef.
290
- if (disas_neon_ls_insn(s, insn)) {
256
- */
291
- goto illegal_op;
257
- if (is_sysreg && (rd == 15 || (rn >> 1) != ARM_VFP_FPSCR)) {
292
- }
258
- return 1;
293
- return;
259
- }
294
- }
260
- }
295
if ((insn & 0x0e000f00) == 0x0c000100) {
261
-
296
if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
262
- if (insn & ARM_CP_RW_BIT) {
297
/* iWMMXt register transfer. */
263
- /* vfp->arm */
298
@@ -XXX,XX +XXX,XX @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
264
- if (is_sysreg) {
299
}
265
- /* system register */
300
break;
266
- rn >>= 1;
301
case 12:
267
-
302
- if ((insn & 0x01100000) == 0x01000000) {
268
- switch (rn) {
303
- if (disas_neon_ls_insn(s, insn)) {
269
- case ARM_VFP_FPSID:
304
- goto illegal_op;
270
- /* VFP2 allows access to FSID from userspace.
305
- }
271
- VFP3 restricts all id registers to privileged
306
- break;
272
- accesses. */
307
- }
273
- if (IS_USER(s)
308
goto illegal_op;
274
- && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
309
default:
275
- return 1;
310
illegal_op:
276
- }
277
- tmp = load_cpu_field(vfp.xregs[rn]);
278
- break;
279
- case ARM_VFP_FPEXC:
280
- if (IS_USER(s))
281
- return 1;
282
- tmp = load_cpu_field(vfp.xregs[rn]);
283
- break;
284
- case ARM_VFP_FPINST:
285
- case ARM_VFP_FPINST2:
286
- /* Not present in VFP3. */
287
- if (IS_USER(s)
288
- || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
289
- return 1;
290
- }
291
- tmp = load_cpu_field(vfp.xregs[rn]);
292
- break;
293
- case ARM_VFP_FPSCR:
294
- if (rd == 15) {
295
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
296
- tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
297
- } else {
298
- tmp = tcg_temp_new_i32();
299
- gen_helper_vfp_get_fpscr(tmp, cpu_env);
300
- }
301
- break;
302
- case ARM_VFP_MVFR2:
303
- if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
304
- return 1;
305
- }
306
- /* fall through */
307
- case ARM_VFP_MVFR0:
308
- case ARM_VFP_MVFR1:
309
- if (IS_USER(s)
310
- || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
311
- return 1;
312
- }
313
- tmp = load_cpu_field(vfp.xregs[rn]);
314
- break;
315
- default:
316
- return 1;
317
- }
318
- } else {
319
- gen_mov_F0_vreg(0, rn);
320
- tmp = gen_vfp_mrs();
321
- }
322
- if (rd == 15) {
323
- /* Set the 4 flag bits in the CPSR. */
324
- gen_set_nzcv(tmp);
325
- tcg_temp_free_i32(tmp);
326
- } else {
327
- store_reg(s, rd, tmp);
328
- }
329
- } else {
330
- /* arm->vfp */
331
- if (is_sysreg) {
332
- rn >>= 1;
333
- /* system register */
334
- switch (rn) {
335
- case ARM_VFP_FPSID:
336
- case ARM_VFP_MVFR0:
337
- case ARM_VFP_MVFR1:
338
- /* Writes are ignored. */
339
- break;
340
- case ARM_VFP_FPSCR:
341
- tmp = load_reg(s, rd);
342
- gen_helper_vfp_set_fpscr(cpu_env, tmp);
343
- tcg_temp_free_i32(tmp);
344
- gen_lookup_tb(s);
345
- break;
346
- case ARM_VFP_FPEXC:
347
- if (IS_USER(s))
348
- return 1;
349
- /* TODO: VFP subarchitecture support.
350
- * For now, keep the EN bit only */
351
- tmp = load_reg(s, rd);
352
- tcg_gen_andi_i32(tmp, tmp, 1 << 30);
353
- store_cpu_field(tmp, vfp.xregs[rn]);
354
- gen_lookup_tb(s);
355
- break;
356
- case ARM_VFP_FPINST:
357
- case ARM_VFP_FPINST2:
358
- if (IS_USER(s)) {
359
- return 1;
360
- }
361
- tmp = load_reg(s, rd);
362
- store_cpu_field(tmp, vfp.xregs[rn]);
363
- break;
364
- default:
365
- return 1;
366
- }
367
- } else {
368
- tmp = load_reg(s, rd);
369
- gen_vfp_msr(tmp);
370
- gen_mov_vreg_F0(0, rn);
371
- }
372
- }
373
- }
374
+ /* already handled by decodetree */
375
+ return 1;
376
} else {
377
/* data processing */
378
bool rd_is_dp = dp;
379
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
380
index XXXXXXX..XXXXXXX 100644
381
--- a/target/arm/vfp.decode
382
+++ b/target/arm/vfp.decode
383
@@ -XXX,XX +XXX,XX @@ VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
384
385
VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
386
vn=%vn_dp
387
+
388
+VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
389
+VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 \
390
+ vn=%vn_sp
391
--
311
--
392
2.20.1
312
2.20.1
393
313
394
314
diff view generated by jsdifflib
1
Convert the VFP VMOV (immediate) instruction to decodetree.
1
Convert the Neon 3-reg-same VADD and VSUB insns to decodetree.
2
3
Note that we don't need the neon_3r_sizes[op] check here because all
4
size values are OK for VADD and VSUB; we'll add this when we convert
5
the first insn that has size restrictions.
6
7
For this we need one of the GVecGen*Fn typedefs currently in
8
translate-a64.h; move them all to translate.h as a block so they
9
are visible to the 32-bit decoder.
2
10
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20200430181003.21682-15-peter.maydell@linaro.org
5
---
14
---
6
target/arm/translate-vfp.inc.c | 129 +++++++++++++++++++++++++++++++++
15
target/arm/translate-a64.h | 9 --------
7
target/arm/translate.c | 27 +------
16
target/arm/translate.h | 9 ++++++++
8
target/arm/vfp.decode | 5 ++
17
target/arm/neon-dp.decode | 17 +++++++++++++++
9
3 files changed, 136 insertions(+), 25 deletions(-)
18
target/arm/translate-neon.inc.c | 38 +++++++++++++++++++++++++++++++++
19
target/arm/translate.c | 14 ++++--------
20
5 files changed, 68 insertions(+), 19 deletions(-)
10
21
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
22
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
12
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
24
--- a/target/arm/translate-a64.h
14
+++ b/target/arm/translate-vfp.inc.c
25
+++ b/target/arm/translate-a64.h
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
26
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
27
28
bool disas_sve(DisasContext *, uint32_t);
29
30
-/* Note that the gvec expanders operate on offsets + sizes. */
31
-typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
32
-typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
33
- uint32_t, uint32_t);
34
-typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
35
- uint32_t, uint32_t, uint32_t);
36
-typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
37
- uint32_t, uint32_t, uint32_t);
38
-
39
#endif /* TARGET_ARM_TRANSLATE_A64_H */
40
diff --git a/target/arm/translate.h b/target/arm/translate.h
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/translate.h
43
+++ b/target/arm/translate.h
44
@@ -XXX,XX +XXX,XX @@ void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
45
#define dc_isar_feature(name, ctx) \
46
({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
47
48
+/* Note that the gvec expanders operate on offsets + sizes. */
49
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
50
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
51
+ uint32_t, uint32_t);
52
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
53
+ uint32_t, uint32_t, uint32_t);
54
+typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
55
+ uint32_t, uint32_t, uint32_t);
56
+
57
#endif /* TARGET_ARM_TRANSLATE_H */
58
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/neon-dp.decode
61
+++ b/target/arm/neon-dp.decode
62
@@ -XXX,XX +XXX,XX @@
63
#
64
# This file is processed by scripts/decodetree.py
65
#
66
+# VFP/Neon register fields; same as vfp.decode
67
+%vm_dp 5:1 0:4
68
+%vn_dp 7:1 16:4
69
+%vd_dp 22:1 12:4
70
71
# Encodings for Neon data processing instructions where the T32 encoding
72
# is a simple transformation of the A32 encoding.
73
@@ -XXX,XX +XXX,XX @@
74
# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
75
# This file works on the A32 encoding only; calling code for T32 has to
76
# transform the insn into the A32 version first.
77
+
78
+######################################################################
79
+# 3-reg-same grouping:
80
+# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4
81
+######################################################################
82
+
83
+&3same vm vn vd q size
84
+
85
+@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
86
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
87
+
88
+VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
89
+VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
90
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/translate-neon.inc.c
93
+++ b/target/arm/translate-neon.inc.c
94
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
16
95
17
return true;
96
return true;
18
}
97
}
19
+
98
+
20
+static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
99
+static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
21
+{
100
+{
22
+ uint32_t delta_d = 0;
101
+ int vec_size = a->q ? 16 : 8;
23
+ uint32_t bank_mask = 0;
102
+ int rd_ofs = neon_reg_offset(a->vd, 0);
24
+ int veclen = s->vec_len;
103
+ int rn_ofs = neon_reg_offset(a->vn, 0);
25
+ TCGv_i32 fd;
104
+ int rm_ofs = neon_reg_offset(a->vm, 0);
26
+ uint32_t n, i, vd;
27
+
105
+
28
+ vd = a->vd;
106
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
29
+
30
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
31
+ (veclen != 0 || s->vec_stride != 0)) {
32
+ return false;
107
+ return false;
33
+ }
108
+ }
34
+
109
+
35
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
110
+ /* UNDEF accesses to D16-D31 if they don't exist. */
111
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
112
+ ((a->vd | a->vn | a->vm) & 0x10)) {
113
+ return false;
114
+ }
115
+
116
+ if ((a->vn | a->vm | a->vd) & a->q) {
36
+ return false;
117
+ return false;
37
+ }
118
+ }
38
+
119
+
39
+ if (!vfp_access_check(s)) {
120
+ if (!vfp_access_check(s)) {
40
+ return true;
121
+ return true;
41
+ }
122
+ }
42
+
123
+
43
+ if (veclen > 0) {
124
+ fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
44
+ bank_mask = 0x18;
45
+ /* Figure out what type of vector operation this is. */
46
+ if ((vd & bank_mask) == 0) {
47
+ /* scalar */
48
+ veclen = 0;
49
+ } else {
50
+ delta_d = s->vec_stride + 1;
51
+ }
52
+ }
53
+
54
+ n = (a->imm4h << 28) & 0x80000000;
55
+ i = ((a->imm4h << 4) & 0x70) | a->imm4l;
56
+ if (i & 0x40) {
57
+ i |= 0x780;
58
+ } else {
59
+ i |= 0x800;
60
+ }
61
+ n |= i << 19;
62
+
63
+ fd = tcg_temp_new_i32();
64
+ tcg_gen_movi_i32(fd, n);
65
+
66
+ for (;;) {
67
+ neon_store_reg32(fd, vd);
68
+
69
+ if (veclen == 0) {
70
+ break;
71
+ }
72
+
73
+ /* Set up the operands for the next iteration */
74
+ veclen--;
75
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
76
+ }
77
+
78
+ tcg_temp_free_i32(fd);
79
+ return true;
125
+ return true;
80
+}
126
+}
81
+
127
+
82
+static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
128
+#define DO_3SAME(INSN, FUNC) \
83
+{
129
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
84
+ uint32_t delta_d = 0;
130
+ { \
85
+ uint32_t bank_mask = 0;
131
+ return do_3same(s, a, FUNC); \
86
+ int veclen = s->vec_len;
87
+ TCGv_i64 fd;
88
+ uint32_t n, i, vd;
89
+
90
+ vd = a->vd;
91
+
92
+ /* UNDEF accesses to D16-D31 if they don't exist. */
93
+ if (!dc_isar_feature(aa32_fp_d32, s) && (vd & 0x10)) {
94
+ return false;
95
+ }
132
+ }
96
+
133
+
97
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
134
+DO_3SAME(VADD, tcg_gen_gvec_add)
98
+ (veclen != 0 || s->vec_stride != 0)) {
135
+DO_3SAME(VSUB, tcg_gen_gvec_sub)
99
+ return false;
100
+ }
101
+
102
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
103
+ return false;
104
+ }
105
+
106
+ if (!vfp_access_check(s)) {
107
+ return true;
108
+ }
109
+
110
+ if (veclen > 0) {
111
+ bank_mask = 0xc;
112
+ /* Figure out what type of vector operation this is. */
113
+ if ((vd & bank_mask) == 0) {
114
+ /* scalar */
115
+ veclen = 0;
116
+ } else {
117
+ delta_d = (s->vec_stride >> 1) + 1;
118
+ }
119
+ }
120
+
121
+ n = (a->imm4h << 28) & 0x80000000;
122
+ i = ((a->imm4h << 4) & 0x70) | a->imm4l;
123
+ if (i & 0x40) {
124
+ i |= 0x3f80;
125
+ } else {
126
+ i |= 0x4000;
127
+ }
128
+ n |= i << 16;
129
+
130
+ fd = tcg_temp_new_i64();
131
+ tcg_gen_movi_i64(fd, ((uint64_t)n) << 32);
132
+
133
+ for (;;) {
134
+ neon_store_reg64(fd, vd);
135
+
136
+ if (veclen == 0) {
137
+ break;
138
+ }
139
+
140
+ /* Set up the operands for the next iteration */
141
+ veclen--;
142
+ vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
143
+ }
144
+
145
+ tcg_temp_free_i64(fd);
146
+ return true;
147
+}
148
diff --git a/target/arm/translate.c b/target/arm/translate.c
136
diff --git a/target/arm/translate.c b/target/arm/translate.c
149
index XXXXXXX..XXXXXXX 100644
137
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate.c
138
--- a/target/arm/translate.c
151
+++ b/target/arm/translate.c
139
+++ b/target/arm/translate.c
152
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
140
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
153
*/
141
}
154
static int disas_vfp_insn(DisasContext *s, uint32_t insn)
142
return 0;
155
{
143
156
- uint32_t rd, rn, rm, op, i, n, delta_d, delta_m, bank_mask;
144
- case NEON_3R_VADD_VSUB:
157
+ uint32_t rd, rn, rm, op, delta_d, delta_m, bank_mask;
145
- if (u) {
158
int dp, veclen;
146
- tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
159
TCGv_i32 tmp;
147
- vec_size, vec_size);
160
TCGv_i32 tmp2;
148
- } else {
161
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
149
- tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
162
rn = VFP_SREG_N(insn);
150
- vec_size, vec_size);
163
151
- }
164
switch (op) {
152
- return 0;
165
- case 0 ... 13:
166
+ case 0 ... 14:
167
/* Already handled by decodetree */
168
return 1;
169
default:
170
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
171
for (;;) {
172
/* Perform the calculation. */
173
switch (op) {
174
- case 14: /* fconst */
175
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
176
- return 1;
177
- }
178
-
153
-
179
- n = (insn << 12) & 0x80000000;
154
case NEON_3R_VQADD:
180
- i = ((insn >> 12) & 0x70) | (insn & 0xf);
155
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
181
- if (dp) {
156
rn_ofs, rm_ofs, vec_size, vec_size,
182
- if (i & 0x40)
157
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
183
- i |= 0x3f80;
158
tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
184
- else
159
u ? &ushl_op[size] : &sshl_op[size]);
185
- i |= 0x4000;
160
return 0;
186
- n |= i << 16;
187
- tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
188
- } else {
189
- if (i & 0x40)
190
- i |= 0x780;
191
- else
192
- i |= 0x800;
193
- n |= i << 19;
194
- tcg_gen_movi_i32(cpu_F0s, n);
195
- }
196
- break;
197
case 15: /* extension space */
198
switch (rn) {
199
case 0: /* cpy */
200
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
201
index XXXXXXX..XXXXXXX 100644
202
--- a/target/arm/vfp.decode
203
+++ b/target/arm/vfp.decode
204
@@ -XXX,XX +XXX,XX @@ VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
205
vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=2
206
VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
207
vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2
208
+
161
+
209
+VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
162
+ case NEON_3R_VADD_VSUB:
210
+ vd=%vd_sp
163
+ /* Already handled by decodetree */
211
+VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
164
+ return 1;
212
+ vd=%vd_dp
165
}
166
167
if (size == 3) {
213
--
168
--
214
2.20.1
169
2.20.1
215
170
216
171
diff view generated by jsdifflib
1
Convert the VFP fused multiply-add instructions (VFNMA, VFNMS,
1
Convert the Neon logic ops in the 3-reg-same grouping to decodetree.
2
VFMA, VFMS) to decodetree.
2
Note that for the logic ops the 'size' field forms part of their
3
3
decode and the actual operations are always bitwise.
4
Note that in the old decode structure we were implementing
5
these to honour the VFP vector stride/length. These instructions
6
were introduced in VFPv4, and in the v7A architecture they
7
are UNPREDICTABLE if the vector stride or length are non-zero.
8
In v8A they must UNDEF if stride or length are non-zero, like
9
all VFP instructions; we choose to UNDEF always.
10
4
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20200430181003.21682-16-peter.maydell@linaro.org
13
---
8
---
14
target/arm/translate-vfp.inc.c | 121 +++++++++++++++++++++++++++++++++
9
target/arm/neon-dp.decode | 12 +++++++++++
15
target/arm/translate.c | 53 +--------------
10
target/arm/translate-neon.inc.c | 19 +++++++++++++++++
16
target/arm/vfp.decode | 9 +++
11
target/arm/translate.c | 38 +--------------------------------
17
3 files changed, 131 insertions(+), 52 deletions(-)
12
3 files changed, 32 insertions(+), 37 deletions(-)
18
13
19
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
14
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate-vfp.inc.c
16
--- a/target/arm/neon-dp.decode
22
+++ b/target/arm/translate-vfp.inc.c
17
+++ b/target/arm/neon-dp.decode
23
@@ -XXX,XX +XXX,XX @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
18
@@ -XXX,XX +XXX,XX @@
24
{
19
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
25
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
20
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
26
}
21
22
+@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
23
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
27
+
24
+
28
+static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
25
+VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic
29
+{
26
+VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic
30
+ /*
27
+VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic
31
+ * VFNMA : fd = muladd(-fd, fn, fm)
28
+VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic
32
+ * VFNMS : fd = muladd(-fd, -fn, fm)
29
+VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic
33
+ * VFMA : fd = muladd( fd, fn, fm)
30
+VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
34
+ * VFMS : fd = muladd( fd, -fn, fm)
31
+VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
35
+ *
32
+VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
36
+ * These are fused multiply-add, and must be done as one floating
37
+ * point operation with no rounding between the multiplication and
38
+ * addition steps. NB that doing the negations here as separate
39
+ * steps is correct : an input NaN should come out with its sign
40
+ * bit flipped if it is a negated-input.
41
+ */
42
+ TCGv_ptr fpst;
43
+ TCGv_i32 vn, vm, vd;
44
+
33
+
45
+ /*
34
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
46
+ * Present in VFPv4 only.
35
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
47
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
48
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
37
index XXXXXXX..XXXXXXX 100644
49
+ */
38
--- a/target/arm/translate-neon.inc.c
50
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
39
+++ b/target/arm/translate-neon.inc.c
51
+ (s->vec_len != 0 || s->vec_stride != 0)) {
40
@@ -XXX,XX +XXX,XX @@ static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
52
+ return false;
41
53
+ }
42
DO_3SAME(VADD, tcg_gen_gvec_add)
43
DO_3SAME(VSUB, tcg_gen_gvec_sub)
44
+DO_3SAME(VAND, tcg_gen_gvec_and)
45
+DO_3SAME(VBIC, tcg_gen_gvec_andc)
46
+DO_3SAME(VORR, tcg_gen_gvec_or)
47
+DO_3SAME(VORN, tcg_gen_gvec_orc)
48
+DO_3SAME(VEOR, tcg_gen_gvec_xor)
54
+
49
+
55
+ if (!vfp_access_check(s)) {
50
+/* These insns are all gvec_bitsel but with the inputs in various orders. */
56
+ return true;
51
+#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
57
+ }
52
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
53
+ uint32_t rn_ofs, uint32_t rm_ofs, \
54
+ uint32_t oprsz, uint32_t maxsz) \
55
+ { \
56
+ tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
57
+ } \
58
+ DO_3SAME(INSN, gen_##INSN##_3s)
58
+
59
+
59
+ vn = tcg_temp_new_i32();
60
+DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
60
+ vm = tcg_temp_new_i32();
61
+DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
61
+ vd = tcg_temp_new_i32();
62
+DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
62
+
63
+ neon_load_reg32(vn, a->vn);
64
+ neon_load_reg32(vm, a->vm);
65
+ if (a->o2) {
66
+ /* VFNMS, VFMS */
67
+ gen_helper_vfp_negs(vn, vn);
68
+ }
69
+ neon_load_reg32(vd, a->vd);
70
+ if (a->o1 & 1) {
71
+ /* VFNMA, VFNMS */
72
+ gen_helper_vfp_negs(vd, vd);
73
+ }
74
+ fpst = get_fpstatus_ptr(0);
75
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
76
+ neon_store_reg32(vd, a->vd);
77
+
78
+ tcg_temp_free_ptr(fpst);
79
+ tcg_temp_free_i32(vn);
80
+ tcg_temp_free_i32(vm);
81
+ tcg_temp_free_i32(vd);
82
+
83
+ return true;
84
+}
85
+
86
+static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
87
+{
88
+ /*
89
+ * VFNMA : fd = muladd(-fd, fn, fm)
90
+ * VFNMS : fd = muladd(-fd, -fn, fm)
91
+ * VFMA : fd = muladd( fd, fn, fm)
92
+ * VFMS : fd = muladd( fd, -fn, fm)
93
+ *
94
+ * These are fused multiply-add, and must be done as one floating
95
+ * point operation with no rounding between the multiplication and
96
+ * addition steps. NB that doing the negations here as separate
97
+ * steps is correct : an input NaN should come out with its sign
98
+ * bit flipped if it is a negated-input.
99
+ */
100
+ TCGv_ptr fpst;
101
+ TCGv_i64 vn, vm, vd;
102
+
103
+ /*
104
+ * Present in VFPv4 only.
105
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
106
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
107
+ */
108
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
109
+ (s->vec_len != 0 || s->vec_stride != 0)) {
110
+ return false;
111
+ }
112
+
113
+ /* UNDEF accesses to D16-D31 if they don't exist. */
114
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
115
+ return false;
116
+ }
117
+
118
+ if (!vfp_access_check(s)) {
119
+ return true;
120
+ }
121
+
122
+ vn = tcg_temp_new_i64();
123
+ vm = tcg_temp_new_i64();
124
+ vd = tcg_temp_new_i64();
125
+
126
+ neon_load_reg64(vn, a->vn);
127
+ neon_load_reg64(vm, a->vm);
128
+ if (a->o2) {
129
+ /* VFNMS, VFMS */
130
+ gen_helper_vfp_negd(vn, vn);
131
+ }
132
+ neon_load_reg64(vd, a->vd);
133
+ if (a->o1 & 1) {
134
+ /* VFNMA, VFNMS */
135
+ gen_helper_vfp_negd(vd, vd);
136
+ }
137
+ fpst = get_fpstatus_ptr(0);
138
+ gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
139
+ neon_store_reg64(vd, a->vd);
140
+
141
+ tcg_temp_free_ptr(fpst);
142
+ tcg_temp_free_i64(vn);
143
+ tcg_temp_free_i64(vm);
144
+ tcg_temp_free_i64(vd);
145
+
146
+ return true;
147
+}
148
diff --git a/target/arm/translate.c b/target/arm/translate.c
63
diff --git a/target/arm/translate.c b/target/arm/translate.c
149
index XXXXXXX..XXXXXXX 100644
64
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate.c
65
--- a/target/arm/translate.c
151
+++ b/target/arm/translate.c
66
+++ b/target/arm/translate.c
152
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
67
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
153
rn = VFP_SREG_N(insn);
68
}
154
69
return 1;
155
switch (op) {
70
156
- case 0 ... 8:
71
- case NEON_3R_LOGIC: /* Logic ops. */
157
+ case 0 ... 13:
72
- switch ((u << 2) | size) {
158
/* Already handled by decodetree */
73
- case 0: /* VAND */
159
return 1;
74
- tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
160
default:
75
- vec_size, vec_size);
161
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
76
- break;
162
for (;;) {
77
- case 1: /* VBIC */
163
/* Perform the calculation. */
78
- tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
164
switch (op) {
79
- vec_size, vec_size);
165
- case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
80
- break;
166
- case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
81
- case 2: /* VORR */
167
- case 12: /* VFMA : fd = muladd( fd, fn, fm) */
82
- tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
168
- case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
83
- vec_size, vec_size);
169
- /* These are fused multiply-add, and must be done as one
84
- break;
170
- * floating point operation with no rounding between the
85
- case 3: /* VORN */
171
- * multiplication and addition steps.
86
- tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
172
- * NB that doing the negations here as separate steps is
87
- vec_size, vec_size);
173
- * correct : an input NaN should come out with its sign bit
88
- break;
174
- * flipped if it is a negated-input.
89
- case 4: /* VEOR */
175
- */
90
- tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
176
- if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
91
- vec_size, vec_size);
177
- return 1;
92
- break;
178
- }
93
- case 5: /* VBSL */
179
- if (dp) {
94
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
180
- TCGv_ptr fpst;
95
- vec_size, vec_size);
181
- TCGv_i64 frd;
96
- break;
182
- if (op & 1) {
97
- case 6: /* VBIT */
183
- /* VFNMS, VFMS */
98
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
184
- gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
99
- vec_size, vec_size);
185
- }
100
- break;
186
- frd = tcg_temp_new_i64();
101
- case 7: /* VBIF */
187
- tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
102
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
188
- if (op & 2) {
103
- vec_size, vec_size);
189
- /* VFNMA, VFNMS */
104
- break;
190
- gen_helper_vfp_negd(frd, frd);
105
- }
191
- }
106
- return 0;
192
- fpst = get_fpstatus_ptr(0);
107
-
193
- gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
108
case NEON_3R_VQADD:
194
- cpu_F1d, frd, fpst);
109
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
195
- tcg_temp_free_ptr(fpst);
110
rn_ofs, rm_ofs, vec_size, vec_size,
196
- tcg_temp_free_i64(frd);
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
197
- } else {
112
return 0;
198
- TCGv_ptr fpst;
113
199
- TCGv_i32 frd;
114
case NEON_3R_VADD_VSUB:
200
- if (op & 1) {
115
+ case NEON_3R_LOGIC:
201
- /* VFNMS, VFMS */
116
/* Already handled by decodetree */
202
- gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
117
return 1;
203
- }
118
}
204
- frd = tcg_temp_new_i32();
205
- tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
206
- if (op & 2) {
207
- gen_helper_vfp_negs(frd, frd);
208
- }
209
- fpst = get_fpstatus_ptr(0);
210
- gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
211
- cpu_F1s, frd, fpst);
212
- tcg_temp_free_ptr(fpst);
213
- tcg_temp_free_i32(frd);
214
- }
215
- break;
216
case 14: /* fconst */
217
if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
218
return 1;
219
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
220
index XXXXXXX..XXXXXXX 100644
221
--- a/target/arm/vfp.decode
222
+++ b/target/arm/vfp.decode
223
@@ -XXX,XX +XXX,XX @@ VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... \
224
vm=%vm_sp vn=%vn_sp vd=%vd_sp
225
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... \
226
vm=%vm_dp vn=%vn_dp vd=%vd_dp
227
+
228
+VFM_sp ---- 1110 1.01 .... .... 1010 . o2:1 . 0 .... \
229
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=1
230
+VFM_dp ---- 1110 1.01 .... .... 1011 . o2:1 . 0 .... \
231
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=1
232
+VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
233
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=2
234
+VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
235
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2
236
--
119
--
237
2.20.1
120
2.20.1
238
121
239
122
diff view generated by jsdifflib
1
Convert the VFP comparison instructions to decodetree.
1
Convert the Neon 3-reg-same VMAX and VMIN insns to decodetree.
2
3
Note that comparison instructions should not honour the VFP
4
short-vector length and stride information: they are scalar-only
5
operations. This applies to all the 2-operand instructions except
6
for VMOV, VABS, VNEG and VSQRT. (In the old decoder this is
7
implemented via the "if (op == 15 && rn > 3) { veclen = 0; }" check.)
8
2
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20200430181003.21682-17-peter.maydell@linaro.org
11
---
6
---
12
target/arm/translate-vfp.inc.c | 75 ++++++++++++++++++++++++++++++++++
7
target/arm/neon-dp.decode | 5 +++++
13
target/arm/translate.c | 51 +----------------------
8
target/arm/translate-neon.inc.c | 14 ++++++++++++++
14
target/arm/vfp.decode | 5 +++
9
target/arm/translate.c | 21 ++-------------------
15
3 files changed, 81 insertions(+), 50 deletions(-)
10
3 files changed, 21 insertions(+), 19 deletions(-)
16
11
17
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-vfp.inc.c
14
--- a/target/arm/neon-dp.decode
20
+++ b/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/neon-dp.decode
21
@@ -XXX,XX +XXX,XX @@ static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
16
@@ -XXX,XX +XXX,XX @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
22
{
17
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
23
return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
18
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
24
}
19
20
+VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
21
+VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
22
+VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
23
+VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
25
+
24
+
26
+static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
25
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
27
+{
26
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
28
+ TCGv_i32 vd, vm;
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-neon.inc.c
31
@@ -XXX,XX +XXX,XX @@ DO_3SAME(VEOR, tcg_gen_gvec_xor)
32
DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
33
DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
34
DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
29
+
35
+
30
+ /* Vm/M bits must be zero for the Z variant */
36
+#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
31
+ if (a->z && a->vm != 0) {
37
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
32
+ return false;
38
+ { \
39
+ if (a->size == 3) { \
40
+ return false; \
41
+ } \
42
+ return do_3same(s, a, FUNC); \
33
+ }
43
+ }
34
+
44
+
35
+ if (!vfp_access_check(s)) {
45
+DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
36
+ return true;
46
+DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
37
+ }
47
+DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
38
+
48
+DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
39
+ vd = tcg_temp_new_i32();
40
+ vm = tcg_temp_new_i32();
41
+
42
+ neon_load_reg32(vd, a->vd);
43
+ if (a->z) {
44
+ tcg_gen_movi_i32(vm, 0);
45
+ } else {
46
+ neon_load_reg32(vm, a->vm);
47
+ }
48
+
49
+ if (a->e) {
50
+ gen_helper_vfp_cmpes(vd, vm, cpu_env);
51
+ } else {
52
+ gen_helper_vfp_cmps(vd, vm, cpu_env);
53
+ }
54
+
55
+ tcg_temp_free_i32(vd);
56
+ tcg_temp_free_i32(vm);
57
+
58
+ return true;
59
+}
60
+
61
+static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
62
+{
63
+ TCGv_i64 vd, vm;
64
+
65
+ /* Vm/M bits must be zero for the Z variant */
66
+ if (a->z && a->vm != 0) {
67
+ return false;
68
+ }
69
+
70
+ /* UNDEF accesses to D16-D31 if they don't exist. */
71
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) {
72
+ return false;
73
+ }
74
+
75
+ if (!vfp_access_check(s)) {
76
+ return true;
77
+ }
78
+
79
+ vd = tcg_temp_new_i64();
80
+ vm = tcg_temp_new_i64();
81
+
82
+ neon_load_reg64(vd, a->vd);
83
+ if (a->z) {
84
+ tcg_gen_movi_i64(vm, 0);
85
+ } else {
86
+ neon_load_reg64(vm, a->vm);
87
+ }
88
+
89
+ if (a->e) {
90
+ gen_helper_vfp_cmped(vd, vm, cpu_env);
91
+ } else {
92
+ gen_helper_vfp_cmpd(vd, vm, cpu_env);
93
+ }
94
+
95
+ tcg_temp_free_i64(vd);
96
+ tcg_temp_free_i64(vm);
97
+
98
+ return true;
99
+}
100
diff --git a/target/arm/translate.c b/target/arm/translate.c
49
diff --git a/target/arm/translate.c b/target/arm/translate.c
101
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
102
--- a/target/arm/translate.c
51
--- a/target/arm/translate.c
103
+++ b/target/arm/translate.c
52
+++ b/target/arm/translate.c
104
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_neg(int dp)
53
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
105
gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
54
rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
106
}
55
return 0;
107
56
108
-static inline void gen_vfp_cmp(int dp)
57
- case NEON_3R_VMAX:
109
-{
58
- if (u) {
110
- if (dp)
59
- tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
111
- gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
60
- vec_size, vec_size);
112
- else
61
- } else {
113
- gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
62
- tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
114
-}
63
- vec_size, vec_size);
64
- }
65
- return 0;
66
- case NEON_3R_VMIN:
67
- if (u) {
68
- tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
69
- vec_size, vec_size);
70
- } else {
71
- tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
72
- vec_size, vec_size);
73
- }
74
- return 0;
115
-
75
-
116
-static inline void gen_vfp_cmpe(int dp)
76
case NEON_3R_VSHL:
117
-{
77
/* Note the operation is vshl vd,vm,vn */
118
- if (dp)
78
tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
119
- gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
79
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
- else
80
121
- gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
81
case NEON_3R_VADD_VSUB:
122
-}
82
case NEON_3R_LOGIC:
123
-
83
+ case NEON_3R_VMAX:
124
-static inline void gen_vfp_F1_ld0(int dp)
84
+ case NEON_3R_VMIN:
125
-{
85
/* Already handled by decodetree */
126
- if (dp)
86
return 1;
127
- tcg_gen_movi_i64(cpu_F1d, 0);
87
}
128
- else
129
- tcg_gen_movi_i32(cpu_F1s, 0);
130
-}
131
-
132
#define VFP_GEN_ITOF(name) \
133
static inline void gen_vfp_##name(int dp, int neon) \
134
{ \
135
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
136
case 15:
137
switch (rn) {
138
case 0 ... 3:
139
+ case 8 ... 11:
140
/* Already handled by decodetree */
141
return 1;
142
default:
143
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
144
rd_is_dp = false;
145
break;
146
147
- case 0x08: case 0x0a: /* vcmp, vcmpz */
148
- case 0x09: case 0x0b: /* vcmpe, vcmpez */
149
- no_output = true;
150
- break;
151
-
152
case 0x0c: /* vrintr */
153
case 0x0d: /* vrintz */
154
case 0x0e: /* vrintx */
155
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
156
/* Load the initial operands. */
157
if (op == 15) {
158
switch (rn) {
159
- case 0x08: case 0x09: /* Compare */
160
- gen_mov_F0_vreg(dp, rd);
161
- gen_mov_F1_vreg(dp, rm);
162
- break;
163
- case 0x0a: case 0x0b: /* Compare with zero */
164
- gen_mov_F0_vreg(dp, rd);
165
- gen_vfp_F1_ld0(dp);
166
- break;
167
case 0x14: /* vcvt fp <-> fixed */
168
case 0x15:
169
case 0x16:
170
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
171
gen_vfp_msr(tmp);
172
break;
173
}
174
- case 8: /* cmp */
175
- gen_vfp_cmp(dp);
176
- break;
177
- case 9: /* cmpe */
178
- gen_vfp_cmpe(dp);
179
- break;
180
- case 10: /* cmpz */
181
- gen_vfp_cmp(dp);
182
- break;
183
- case 11: /* cmpez */
184
- gen_vfp_F1_ld0(dp);
185
- gen_vfp_cmpe(dp);
186
- break;
187
case 12: /* vrintr */
188
{
189
TCGv_ptr fpst = get_fpstatus_ptr(0);
190
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
191
index XXXXXXX..XXXXXXX 100644
192
--- a/target/arm/vfp.decode
193
+++ b/target/arm/vfp.decode
194
@@ -XXX,XX +XXX,XX @@ VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... \
195
vd=%vd_sp vm=%vm_sp
196
VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... \
197
vd=%vd_dp vm=%vm_dp
198
+
199
+VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
200
+ vd=%vd_sp vm=%vm_sp
201
+VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
202
+ vd=%vd_dp vm=%vm_dp
203
--
88
--
204
2.20.1
89
2.20.1
205
90
206
91
diff view generated by jsdifflib
1
Convert the VDIV instruction to decodetree.
1
Convert the Neon comparison ops in the 3-reg-same grouping
2
to decodetree.
2
3
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-18-peter.maydell@linaro.org
5
---
7
---
6
target/arm/translate-vfp.inc.c | 10 ++++++++++
8
target/arm/neon-dp.decode | 8 ++++++++
7
target/arm/translate.c | 21 +--------------------
9
target/arm/translate-neon.inc.c | 22 ++++++++++++++++++++++
8
target/arm/vfp.decode | 5 +++++
10
target/arm/translate.c | 23 +++--------------------
9
3 files changed, 16 insertions(+), 20 deletions(-)
11
3 files changed, 33 insertions(+), 20 deletions(-)
10
12
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-dp.decode
14
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-dp.decode
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_sp *a)
17
@@ -XXX,XX +XXX,XX @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
16
{
18
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
17
return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
19
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
18
}
20
21
+VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
22
+VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
23
+VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
24
+VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
19
+
25
+
20
+static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
26
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
27
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
28
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
29
@@ -XXX,XX +XXX,XX @@ VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
30
31
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
32
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
33
+
34
+VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
35
+VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-neon.inc.c
39
+++ b/target/arm/translate-neon.inc.c
40
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
41
DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
42
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
43
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
44
+
45
+#define DO_3SAME_CMP(INSN, COND) \
46
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
47
+ uint32_t rn_ofs, uint32_t rm_ofs, \
48
+ uint32_t oprsz, uint32_t maxsz) \
49
+ { \
50
+ tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
51
+ } \
52
+ DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
53
+
54
+DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
55
+DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
56
+DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
57
+DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
58
+DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
59
+
60
+static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
61
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
21
+{
62
+{
22
+ return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
63
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
23
+}
64
+}
24
+
65
+DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
25
+static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
26
+{
27
+ return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
28
+}
29
diff --git a/target/arm/translate.c b/target/arm/translate.c
66
diff --git a/target/arm/translate.c b/target/arm/translate.c
30
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate.c
68
--- a/target/arm/translate.c
32
+++ b/target/arm/translate.c
69
+++ b/target/arm/translate.c
33
@@ -XXX,XX +XXX,XX @@ static TCGv_ptr get_fpstatus_ptr(int neon)
70
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
34
return statusptr;
71
u ? &mls_op[size] : &mla_op[size]);
35
}
72
return 0;
36
73
37
-#define VFP_OP2(name) \
74
- case NEON_3R_VTST_VCEQ:
38
-static inline void gen_vfp_##name(int dp) \
75
- if (u) { /* VCEQ */
39
-{ \
76
- tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
40
- TCGv_ptr fpst = get_fpstatus_ptr(0); \
77
- vec_size, vec_size);
41
- if (dp) { \
78
- } else { /* VTST */
42
- gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
79
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
43
- } else { \
80
- vec_size, vec_size, &cmtst_op[size]);
44
- gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
81
- }
45
- } \
82
- return 0;
46
- tcg_temp_free_ptr(fpst); \
47
-}
48
-
83
-
49
-VFP_OP2(div)
84
- case NEON_3R_VCGT:
85
- tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
86
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
87
- return 0;
50
-
88
-
51
-#undef VFP_OP2
89
- case NEON_3R_VCGE:
90
- tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
91
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
92
- return 0;
52
-
93
-
53
static inline void gen_vfp_abs(int dp)
94
case NEON_3R_VSHL:
54
{
95
/* Note the operation is vshl vd,vm,vn */
55
if (dp)
96
tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
56
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
97
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
57
rn = VFP_SREG_N(insn);
98
case NEON_3R_LOGIC:
58
99
case NEON_3R_VMAX:
59
switch (op) {
100
case NEON_3R_VMIN:
60
- case 0 ... 7:
101
+ case NEON_3R_VTST_VCEQ:
61
+ case 0 ... 8:
102
+ case NEON_3R_VCGT:
62
/* Already handled by decodetree */
103
+ case NEON_3R_VCGE:
63
return 1;
104
/* Already handled by decodetree */
64
default:
105
return 1;
65
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
106
}
66
for (;;) {
67
/* Perform the calculation. */
68
switch (op) {
69
- case 8: /* div: fn / fm */
70
- gen_vfp_div(dp);
71
- break;
72
case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
73
case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
74
case 12: /* VFMA : fd = muladd( fd, fn, fm) */
75
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/vfp.decode
78
+++ b/target/arm/vfp.decode
79
@@ -XXX,XX +XXX,XX @@ VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... \
80
vm=%vm_sp vn=%vn_sp vd=%vd_sp
81
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... \
82
vm=%vm_dp vn=%vn_dp vd=%vd_dp
83
+
84
+VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... \
85
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
86
+VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... \
87
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
88
--
107
--
89
2.20.1
108
2.20.1
90
109
91
110
diff view generated by jsdifflib
1
Convert the VCVT (between floating-point and fixed-point) instructions
1
Convert the Neon VQADD/VQSUB insns in the 3-reg-same grouping
2
to decodetree.
2
to decodetree.
3
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-19-peter.maydell@linaro.org
6
---
7
---
7
target/arm/translate-vfp.inc.c | 124 +++++++++++++++++++++++++++++++++
8
target/arm/neon-dp.decode | 6 ++++++
8
target/arm/translate.c | 57 +--------------
9
target/arm/translate-neon.inc.c | 15 +++++++++++++++
9
target/arm/vfp.decode | 10 +++
10
target/arm/translate.c | 14 ++------------
10
3 files changed, 136 insertions(+), 55 deletions(-)
11
3 files changed, 23 insertions(+), 12 deletions(-)
11
12
12
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-dp.decode
15
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-dp.decode
16
@@ -XXX,XX +XXX,XX @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
17
@@ -XXX,XX +XXX,XX @@
17
tcg_temp_free_i32(vd);
18
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
18
return true;
19
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
21
+VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
22
+VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
23
+
24
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
25
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
26
27
@@ -XXX,XX +XXX,XX @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
28
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
29
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
30
31
+VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
32
+VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
33
+
34
VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
35
VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
36
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
37
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-neon.inc.c
40
+++ b/target/arm/translate-neon.inc.c
41
@@ -XXX,XX +XXX,XX @@ static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
42
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
19
}
43
}
44
DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
20
+
45
+
21
+static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
46
+#define DO_3SAME_GVEC4(INSN, OPARRAY) \
22
+{
47
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
23
+ TCGv_i32 vd, shift;
48
+ uint32_t rn_ofs, uint32_t rm_ofs, \
24
+ TCGv_ptr fpst;
49
+ uint32_t oprsz, uint32_t maxsz) \
25
+ int frac_bits;
50
+ { \
51
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
52
+ rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
53
+ } \
54
+ DO_3SAME(INSN, gen_##INSN##_3s)
26
+
55
+
27
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
56
+DO_3SAME_GVEC4(VQADD_S, sqadd_op)
28
+ return false;
57
+DO_3SAME_GVEC4(VQADD_U, uqadd_op)
29
+ }
58
+DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
30
+
59
+DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
31
+ if (!vfp_access_check(s)) {
32
+ return true;
33
+ }
34
+
35
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
36
+
37
+ vd = tcg_temp_new_i32();
38
+ neon_load_reg32(vd, a->vd);
39
+
40
+ fpst = get_fpstatus_ptr(false);
41
+ shift = tcg_const_i32(frac_bits);
42
+
43
+ /* Switch on op:U:sx bits */
44
+ switch (a->opc) {
45
+ case 0:
46
+ gen_helper_vfp_shtos(vd, vd, shift, fpst);
47
+ break;
48
+ case 1:
49
+ gen_helper_vfp_sltos(vd, vd, shift, fpst);
50
+ break;
51
+ case 2:
52
+ gen_helper_vfp_uhtos(vd, vd, shift, fpst);
53
+ break;
54
+ case 3:
55
+ gen_helper_vfp_ultos(vd, vd, shift, fpst);
56
+ break;
57
+ case 4:
58
+ gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
59
+ break;
60
+ case 5:
61
+ gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
62
+ break;
63
+ case 6:
64
+ gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
65
+ break;
66
+ case 7:
67
+ gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
68
+ break;
69
+ default:
70
+ g_assert_not_reached();
71
+ }
72
+
73
+ neon_store_reg32(vd, a->vd);
74
+ tcg_temp_free_i32(vd);
75
+ tcg_temp_free_i32(shift);
76
+ tcg_temp_free_ptr(fpst);
77
+ return true;
78
+}
79
+
80
+static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
81
+{
82
+ TCGv_i64 vd;
83
+ TCGv_i32 shift;
84
+ TCGv_ptr fpst;
85
+ int frac_bits;
86
+
87
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
88
+ return false;
89
+ }
90
+
91
+ /* UNDEF accesses to D16-D31 if they don't exist. */
92
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
93
+ return false;
94
+ }
95
+
96
+ if (!vfp_access_check(s)) {
97
+ return true;
98
+ }
99
+
100
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
101
+
102
+ vd = tcg_temp_new_i64();
103
+ neon_load_reg64(vd, a->vd);
104
+
105
+ fpst = get_fpstatus_ptr(false);
106
+ shift = tcg_const_i32(frac_bits);
107
+
108
+ /* Switch on op:U:sx bits */
109
+ switch (a->opc) {
110
+ case 0:
111
+ gen_helper_vfp_shtod(vd, vd, shift, fpst);
112
+ break;
113
+ case 1:
114
+ gen_helper_vfp_sltod(vd, vd, shift, fpst);
115
+ break;
116
+ case 2:
117
+ gen_helper_vfp_uhtod(vd, vd, shift, fpst);
118
+ break;
119
+ case 3:
120
+ gen_helper_vfp_ultod(vd, vd, shift, fpst);
121
+ break;
122
+ case 4:
123
+ gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
124
+ break;
125
+ case 5:
126
+ gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
127
+ break;
128
+ case 6:
129
+ gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
130
+ break;
131
+ case 7:
132
+ gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
133
+ break;
134
+ default:
135
+ g_assert_not_reached();
136
+ }
137
+
138
+ neon_store_reg64(vd, a->vd);
139
+ tcg_temp_free_i64(vd);
140
+ tcg_temp_free_i32(shift);
141
+ tcg_temp_free_ptr(fpst);
142
+ return true;
143
+}
144
diff --git a/target/arm/translate.c b/target/arm/translate.c
60
diff --git a/target/arm/translate.c b/target/arm/translate.c
145
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate.c
62
--- a/target/arm/translate.c
147
+++ b/target/arm/translate.c
63
+++ b/target/arm/translate.c
148
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_##name(int dp, int shift, int neon) \
64
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
149
tcg_temp_free_i32(tmp_shift); \
65
}
150
tcg_temp_free_ptr(statusptr); \
66
return 1;
151
}
67
152
-VFP_GEN_FIX(tosh, _round_to_zero)
68
- case NEON_3R_VQADD:
153
VFP_GEN_FIX(tosl, _round_to_zero)
69
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
154
-VFP_GEN_FIX(touh, _round_to_zero)
70
- rn_ofs, rm_ofs, vec_size, vec_size,
155
VFP_GEN_FIX(toul, _round_to_zero)
71
- (u ? uqadd_op : sqadd_op) + size);
156
-VFP_GEN_FIX(shto, )
72
- return 0;
157
VFP_GEN_FIX(slto, )
158
-VFP_GEN_FIX(uhto, )
159
VFP_GEN_FIX(ulto, )
160
#undef VFP_GEN_FIX
161
162
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
163
return 1;
164
case 15:
165
switch (rn) {
166
- case 0 ... 19:
167
+ case 0 ... 23:
168
+ case 28 ... 31:
169
/* Already handled by decodetree */
170
return 1;
171
default:
172
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
173
rd_is_dp = false;
174
break;
175
176
- case 0x14: /* vcvt fp <-> fixed */
177
- case 0x15:
178
- case 0x16:
179
- case 0x17:
180
- case 0x1c:
181
- case 0x1d:
182
- case 0x1e:
183
- case 0x1f:
184
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
185
- return 1;
186
- }
187
- /* Immediate frac_bits has same format as SREG_M. */
188
- rm_is_dp = false;
189
- break;
190
-
73
-
191
default:
74
- case NEON_3R_VQSUB:
192
return 1;
75
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
193
}
76
- rn_ofs, rm_ofs, vec_size, vec_size,
194
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
77
- (u ? uqsub_op : sqsub_op) + size);
195
/* Load the initial operands. */
78
- return 0;
196
if (op == 15) {
79
-
197
switch (rn) {
80
case NEON_3R_VMUL: /* VMUL */
198
- case 0x14: /* vcvt fp <-> fixed */
81
if (u) {
199
- case 0x15:
82
/* Polynomial case allows only P8. */
200
- case 0x16:
83
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
201
- case 0x17:
84
case NEON_3R_VTST_VCEQ:
202
- case 0x1c:
85
case NEON_3R_VCGT:
203
- case 0x1d:
86
case NEON_3R_VCGE:
204
- case 0x1e:
87
+ case NEON_3R_VQADD:
205
- case 0x1f:
88
+ case NEON_3R_VQSUB:
206
- /* Source and destination the same. */
89
/* Already handled by decodetree */
207
- gen_mov_F0_vreg(dp, rd);
90
return 1;
208
- break;
91
}
209
default:
210
/* One source operand. */
211
gen_mov_F0_vreg(rm_is_dp, rm);
212
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
213
switch (op) {
214
case 15: /* extension space */
215
switch (rn) {
216
- case 20: /* fshto */
217
- gen_vfp_shto(dp, 16 - rm, 0);
218
- break;
219
- case 21: /* fslto */
220
- gen_vfp_slto(dp, 32 - rm, 0);
221
- break;
222
- case 22: /* fuhto */
223
- gen_vfp_uhto(dp, 16 - rm, 0);
224
- break;
225
- case 23: /* fulto */
226
- gen_vfp_ulto(dp, 32 - rm, 0);
227
- break;
228
case 24: /* ftoui */
229
gen_vfp_toui(dp, 0);
230
break;
231
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
232
case 27: /* ftosiz */
233
gen_vfp_tosiz(dp, 0);
234
break;
235
- case 28: /* ftosh */
236
- gen_vfp_tosh(dp, 16 - rm, 0);
237
- break;
238
- case 29: /* ftosl */
239
- gen_vfp_tosl(dp, 32 - rm, 0);
240
- break;
241
- case 30: /* ftouh */
242
- gen_vfp_touh(dp, 16 - rm, 0);
243
- break;
244
- case 31: /* ftoul */
245
- gen_vfp_toul(dp, 32 - rm, 0);
246
- break;
247
default: /* undefined */
248
g_assert_not_reached();
249
}
250
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
251
index XXXXXXX..XXXXXXX 100644
252
--- a/target/arm/vfp.decode
253
+++ b/target/arm/vfp.decode
254
@@ -XXX,XX +XXX,XX @@ VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
255
# VJCVT is always dp to sp
256
VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... \
257
vd=%vd_sp vm=%vm_dp
258
+
259
+# VCVT between floating-point and fixed-point. The immediate value
260
+# is in the same format as a Vm single-precision register number.
261
+# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
262
+# for the convenience of the trans_VCVT_fix functions.
263
+%vcvt_fix_op 18:1 16:1 7:1
264
+VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
265
+ vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
266
+VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
267
+ vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
268
--
92
--
269
2.20.1
93
2.20.1
270
94
271
95
diff view generated by jsdifflib
1
Convert the VCVTT, VCVTB instructions that deal with conversion
1
Convert the Neon VMUL, VMLA, VMLS and VSHL insns in the
2
from half-precision floats to f32 or 64 to decodetree.
2
3-reg-same grouping to decodetree.
3
4
Since we're no longer constrained to the old decoder's style
5
using cpu_F0s and cpu_F0d we can perform a direct 16 bit
6
load of the right half of the input single-precision register
7
rather than loading the full 32 bits and then doing a
8
separate shift or sign-extension.
9
3
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20200430181003.21682-20-peter.maydell@linaro.org
12
---
7
---
13
target/arm/translate-vfp.inc.c | 82 ++++++++++++++++++++++++++++++++++
8
target/arm/neon-dp.decode | 9 +++++++
14
target/arm/translate.c | 56 +----------------------
9
target/arm/translate-neon.inc.c | 44 +++++++++++++++++++++++++++++++++
15
target/arm/vfp.decode | 6 +++
10
target/arm/translate.c | 28 +++------------------
16
3 files changed, 89 insertions(+), 55 deletions(-)
11
3 files changed, 56 insertions(+), 25 deletions(-)
17
12
18
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate-vfp.inc.c
15
--- a/target/arm/neon-dp.decode
21
+++ b/target/arm/translate-vfp.inc.c
16
+++ b/target/arm/neon-dp.decode
22
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
23
#include "decode-vfp.inc.c"
18
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
24
#include "decode-vfp-uncond.inc.c"
19
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
25
20
26
+/*
21
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
27
+ * Return the offset of a 16-bit half of the specified VFP single-precision
22
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
28
+ * register. If top is true, returns the top 16 bits; otherwise the bottom
23
+
29
+ * 16 bits.
24
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
30
+ */
25
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
31
+static inline long vfp_f16_offset(unsigned reg, bool top)
26
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
27
@@ -XXX,XX +XXX,XX @@ VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
28
29
VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
30
VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
31
+
32
+VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same
33
+VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
34
+
35
+VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
36
+VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
37
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-neon.inc.c
40
+++ b/target/arm/translate-neon.inc.c
41
@@ -XXX,XX +XXX,XX @@ DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
42
DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
43
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
44
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
45
+DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
46
47
#define DO_3SAME_CMP(INSN, COND) \
48
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
49
@@ -XXX,XX +XXX,XX @@ DO_3SAME_GVEC4(VQADD_S, sqadd_op)
50
DO_3SAME_GVEC4(VQADD_U, uqadd_op)
51
DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
52
DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
53
+
54
+static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
55
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
32
+{
56
+{
33
+ long offs = vfp_reg_offset(false, reg);
57
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz,
34
+#ifdef HOST_WORDS_BIGENDIAN
58
+ 0, gen_helper_gvec_pmul_b);
35
+ if (!top) {
36
+ offs += 2;
37
+ }
38
+#else
39
+ if (top) {
40
+ offs += 2;
41
+ }
42
+#endif
43
+ return offs;
44
+}
59
+}
45
+
60
+
46
/*
61
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
47
* Check that VFP access is enabled. If it is, do the necessary
48
* M-profile lazy-FP handling and then return true.
49
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
50
51
return true;
52
}
53
+
54
+static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
55
+{
62
+{
56
+ TCGv_ptr fpst;
63
+ if (a->size != 0) {
57
+ TCGv_i32 ahp_mode;
58
+ TCGv_i32 tmp;
59
+
60
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
61
+ return false;
64
+ return false;
62
+ }
65
+ }
63
+
66
+ return do_3same(s, a, gen_VMUL_p_3s);
64
+ if (!vfp_access_check(s)) {
65
+ return true;
66
+ }
67
+
68
+ fpst = get_fpstatus_ptr(false);
69
+ ahp_mode = get_ahp_flag();
70
+ tmp = tcg_temp_new_i32();
71
+ /* The T bit tells us if we want the low or high 16 bits of Vm */
72
+ tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
73
+ gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
74
+ neon_store_reg32(tmp, a->vd);
75
+ tcg_temp_free_i32(ahp_mode);
76
+ tcg_temp_free_ptr(fpst);
77
+ tcg_temp_free_i32(tmp);
78
+ return true;
79
+}
67
+}
80
+
68
+
81
+static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
69
+#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
82
+{
70
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
83
+ TCGv_ptr fpst;
71
+ uint32_t rn_ofs, uint32_t rm_ofs, \
84
+ TCGv_i32 ahp_mode;
72
+ uint32_t oprsz, uint32_t maxsz) \
85
+ TCGv_i32 tmp;
73
+ { \
86
+ TCGv_i64 vd;
74
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
75
+ oprsz, maxsz, &OPARRAY[vece]); \
76
+ } \
77
+ DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
87
+
78
+
88
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
89
+ return false;
90
+ }
91
+
79
+
92
+ /* UNDEF accesses to D16-D31 if they don't exist. */
80
+DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
93
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
81
+DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
94
+ return false;
95
+ }
96
+
82
+
97
+ if (!vfp_access_check(s)) {
83
+#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
98
+ return true;
84
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
99
+ }
85
+ uint32_t rn_ofs, uint32_t rm_ofs, \
86
+ uint32_t oprsz, uint32_t maxsz) \
87
+ { \
88
+ /* Note the operation is vshl vd,vm,vn */ \
89
+ tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
90
+ oprsz, maxsz, &OPARRAY[vece]); \
91
+ } \
92
+ DO_3SAME(INSN, gen_##INSN##_3s)
100
+
93
+
101
+ fpst = get_fpstatus_ptr(false);
94
+DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
102
+ ahp_mode = get_ahp_flag();
95
+DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
103
+ tmp = tcg_temp_new_i32();
104
+ /* The T bit tells us if we want the low or high 16 bits of Vm */
105
+ tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
106
+ vd = tcg_temp_new_i64();
107
+ gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
108
+ neon_store_reg64(vd, a->vd);
109
+ tcg_temp_free_i32(ahp_mode);
110
+ tcg_temp_free_ptr(fpst);
111
+ tcg_temp_free_i32(tmp);
112
+ tcg_temp_free_i64(vd);
113
+ return true;
114
+}
115
diff --git a/target/arm/translate.c b/target/arm/translate.c
96
diff --git a/target/arm/translate.c b/target/arm/translate.c
116
index XXXXXXX..XXXXXXX 100644
97
index XXXXXXX..XXXXXXX 100644
117
--- a/target/arm/translate.c
98
--- a/target/arm/translate.c
118
+++ b/target/arm/translate.c
99
+++ b/target/arm/translate.c
119
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
100
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
return 1;
101
}
121
case 15:
102
return 1;
122
switch (rn) {
103
123
- case 0 ... 3:
104
- case NEON_3R_VMUL: /* VMUL */
124
+ case 0 ... 5:
105
- if (u) {
125
case 8 ... 11:
106
- /* Polynomial case allows only P8. */
126
/* Already handled by decodetree */
107
- if (size != 0) {
127
return 1;
108
- return 1;
128
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
109
- }
129
if (op == 15) {
110
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
130
/* rn is opcode, encoded as per VFP_SREG_N. */
111
- 0, gen_helper_gvec_pmul_b);
131
switch (rn) {
112
- } else {
132
- case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
113
- tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
133
- case 0x05: /* vcvtt.f64.f16, vcvtt.f32.f16 */
114
- vec_size, vec_size);
134
- /*
115
- }
135
- * VCVTB, VCVTT: only present with the halfprec extension
116
- return 0;
136
- * UNPREDICTABLE if bit 8 is set prior to ARMv8
117
-
137
- * (we choose to UNDEF)
118
- case NEON_3R_VML: /* VMLA, VMLS */
138
- */
119
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
139
- if (dp) {
120
- u ? &mls_op[size] : &mla_op[size]);
140
- if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
121
- return 0;
141
- return 1;
122
-
142
- }
123
- case NEON_3R_VSHL:
143
- } else {
124
- /* Note the operation is vshl vd,vm,vn */
144
- if (!dc_isar_feature(aa32_fp16_spconv, s)) {
125
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
145
- return 1;
126
- u ? &ushl_op[size] : &sshl_op[size]);
146
- }
127
- return 0;
147
- }
128
-
148
- rm_is_dp = false;
129
case NEON_3R_VADD_VSUB:
149
- break;
130
case NEON_3R_LOGIC:
150
case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
131
case NEON_3R_VMAX:
151
case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
152
if (dp) {
133
case NEON_3R_VCGE:
153
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
134
case NEON_3R_VQADD:
154
switch (op) {
135
case NEON_3R_VQSUB:
155
case 15: /* extension space */
136
+ case NEON_3R_VMUL:
156
switch (rn) {
137
+ case NEON_3R_VML:
157
- case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
138
+ case NEON_3R_VSHL:
158
- {
139
/* Already handled by decodetree */
159
- TCGv_ptr fpst = get_fpstatus_ptr(false);
140
return 1;
160
- TCGv_i32 ahp_mode = get_ahp_flag();
141
}
161
- tmp = gen_vfp_mrs();
162
- tcg_gen_ext16u_i32(tmp, tmp);
163
- if (dp) {
164
- gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
165
- fpst, ahp_mode);
166
- } else {
167
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
168
- fpst, ahp_mode);
169
- }
170
- tcg_temp_free_i32(ahp_mode);
171
- tcg_temp_free_ptr(fpst);
172
- tcg_temp_free_i32(tmp);
173
- break;
174
- }
175
- case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
176
- {
177
- TCGv_ptr fpst = get_fpstatus_ptr(false);
178
- TCGv_i32 ahp = get_ahp_flag();
179
- tmp = gen_vfp_mrs();
180
- tcg_gen_shri_i32(tmp, tmp, 16);
181
- if (dp) {
182
- gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
183
- fpst, ahp);
184
- } else {
185
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
186
- fpst, ahp);
187
- }
188
- tcg_temp_free_i32(tmp);
189
- tcg_temp_free_i32(ahp);
190
- tcg_temp_free_ptr(fpst);
191
- break;
192
- }
193
case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
194
{
195
TCGv_ptr fpst = get_fpstatus_ptr(false);
196
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
197
index XXXXXXX..XXXXXXX 100644
198
--- a/target/arm/vfp.decode
199
+++ b/target/arm/vfp.decode
200
@@ -XXX,XX +XXX,XX @@ VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
201
vd=%vd_sp vm=%vm_sp
202
VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
203
vd=%vd_dp vm=%vm_dp
204
+
205
+# VCVTT and VCVTB from f16: Vd format depends on size bit; Vm is always vm_sp
206
+VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
207
+ vd=%vd_sp vm=%vm_sp
208
+VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
209
+ vd=%vd_dp vm=%vm_sp
210
--
142
--
211
2.20.1
143
2.20.1
212
144
213
145
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
We're going to want at least some of the NeonGen* typedefs
2
for the refactored 32-bit Neon decoder, so move them all
3
to translate.h since it makes more sense to keep them in
4
one group.
2
5
3
This replaces 3 target-specific implementations for BIT, BIF, and BSL.
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20200430181003.21682-23-peter.maydell@linaro.org
9
---
10
target/arm/translate.h | 17 +++++++++++++++++
11
target/arm/translate-a64.c | 17 -----------------
12
2 files changed, 17 insertions(+), 17 deletions(-)
4
13
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20190518191934.21887-3-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/translate-a64.h | 2 +
11
target/arm/translate.h | 3 --
12
target/arm/translate-a64.c | 15 ++++++--
13
target/arm/translate.c | 78 +++-----------------------------------
14
4 files changed, 20 insertions(+), 78 deletions(-)
15
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.h
19
+++ b/target/arm/translate-a64.h
20
@@ -XXX,XX +XXX,XX @@ typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
21
uint32_t, uint32_t);
22
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
23
uint32_t, uint32_t, uint32_t);
24
+typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
25
+ uint32_t, uint32_t, uint32_t);
26
27
#endif /* TARGET_ARM_TRANSLATE_A64_H */
28
diff --git a/target/arm/translate.h b/target/arm/translate.h
14
diff --git a/target/arm/translate.h b/target/arm/translate.h
29
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate.h
16
--- a/target/arm/translate.h
31
+++ b/target/arm/translate.h
17
+++ b/target/arm/translate.h
32
@@ -XXX,XX +XXX,XX @@ static inline void gen_ss_advance(DisasContext *s)
18
@@ -XXX,XX +XXX,XX @@ typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
33
}
19
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
34
20
uint32_t, uint32_t, uint32_t);
35
/* Vector operations shared between ARM and AArch64. */
21
36
-extern const GVecGen3 bsl_op;
22
+/* Function prototype for gen_ functions for calling Neon helpers */
37
-extern const GVecGen3 bit_op;
23
+typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
38
-extern const GVecGen3 bif_op;
24
+typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
39
extern const GVecGen3 mla_op[4];
25
+typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
40
extern const GVecGen3 mls_op[4];
26
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
41
extern const GVecGen3 cmtst_op[4];
27
+typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
28
+typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
29
+typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
30
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
31
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
32
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
33
+typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
34
+typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
35
+typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
36
+typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
37
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
38
+
39
#endif /* TARGET_ARM_TRANSLATE_H */
42
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
40
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
43
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/translate-a64.c
42
--- a/target/arm/translate-a64.c
45
+++ b/target/arm/translate-a64.c
43
+++ b/target/arm/translate-a64.c
46
@@ -XXX,XX +XXX,XX @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
44
@@ -XXX,XX +XXX,XX @@ typedef struct AArch64DecodeTable {
47
vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
45
AArch64DecodeFn *disas_fn;
48
}
46
} AArch64DecodeTable;
49
47
50
+/* Expand a 4-operand AdvSIMD vector operation using an expander function. */
48
-/* Function prototype for gen_ functions for calling Neon helpers */
51
+static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
49
-typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
52
+ int rx, GVecGen4Fn *gvec_fn, int vece)
50
-typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
53
+{
51
-typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
54
+ gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
52
-typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
55
+ vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
53
-typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
56
+ is_q ? 16 : 8, vec_full_reg_size(s));
54
-typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
57
+}
55
-typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
58
+
56
-typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
59
/* Expand a 2-operand + immediate AdvSIMD vector operation using
57
-typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
60
* an op descriptor.
58
-typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
61
*/
59
-typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
62
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
60
-typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
63
return;
61
-typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
64
62
-typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
65
case 5: /* BSL bitwise select */
63
-typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
66
- gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
67
+ gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
68
return;
69
case 6: /* BIT, bitwise insert if true */
70
- gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
71
+ gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
72
return;
73
case 7: /* BIF, bitwise insert if false */
74
- gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
75
+ gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
76
return;
77
78
default:
79
diff --git a/target/arm/translate.c b/target/arm/translate.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/target/arm/translate.c
82
+++ b/target/arm/translate.c
83
@@ -XXX,XX +XXX,XX @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
84
return 1;
85
}
86
87
-/*
88
- * Expanders for VBitOps_VBIF, VBIT, VBSL.
89
- */
90
-static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
91
-{
92
- tcg_gen_xor_i64(rn, rn, rm);
93
- tcg_gen_and_i64(rn, rn, rd);
94
- tcg_gen_xor_i64(rd, rm, rn);
95
-}
96
-
64
-
97
-static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
65
/* initialize TCG globals. */
98
-{
66
void a64_translate_init(void)
99
- tcg_gen_xor_i64(rn, rn, rd);
100
- tcg_gen_and_i64(rn, rn, rm);
101
- tcg_gen_xor_i64(rd, rd, rn);
102
-}
103
-
104
-static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
105
-{
106
- tcg_gen_xor_i64(rn, rn, rd);
107
- tcg_gen_andc_i64(rn, rn, rm);
108
- tcg_gen_xor_i64(rd, rd, rn);
109
-}
110
-
111
-static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
112
-{
113
- tcg_gen_xor_vec(vece, rn, rn, rm);
114
- tcg_gen_and_vec(vece, rn, rn, rd);
115
- tcg_gen_xor_vec(vece, rd, rm, rn);
116
-}
117
-
118
-static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
119
-{
120
- tcg_gen_xor_vec(vece, rn, rn, rd);
121
- tcg_gen_and_vec(vece, rn, rn, rm);
122
- tcg_gen_xor_vec(vece, rd, rd, rn);
123
-}
124
-
125
-static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
126
-{
127
- tcg_gen_xor_vec(vece, rn, rn, rd);
128
- tcg_gen_andc_vec(vece, rn, rn, rm);
129
- tcg_gen_xor_vec(vece, rd, rd, rn);
130
-}
131
-
132
-const GVecGen3 bsl_op = {
133
- .fni8 = gen_bsl_i64,
134
- .fniv = gen_bsl_vec,
135
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
136
- .load_dest = true
137
-};
138
-
139
-const GVecGen3 bit_op = {
140
- .fni8 = gen_bit_i64,
141
- .fniv = gen_bit_vec,
142
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
143
- .load_dest = true
144
-};
145
-
146
-const GVecGen3 bif_op = {
147
- .fni8 = gen_bif_i64,
148
- .fniv = gen_bif_vec,
149
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
150
- .load_dest = true
151
-};
152
-
153
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
154
{
67
{
155
tcg_gen_vec_sar8i_i64(a, a, shift);
156
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
157
vec_size, vec_size);
158
break;
159
case 5: /* VBSL */
160
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
161
- vec_size, vec_size, &bsl_op);
162
+ tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
163
+ vec_size, vec_size);
164
break;
165
case 6: /* VBIT */
166
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
167
- vec_size, vec_size, &bit_op);
168
+ tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
169
+ vec_size, vec_size);
170
break;
171
case 7: /* VBIF */
172
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
173
- vec_size, vec_size, &bif_op);
174
+ tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
175
+ vec_size, vec_size);
176
break;
177
}
178
return 0;
179
--
68
--
180
2.20.1
69
2.20.1
181
70
182
71
diff view generated by jsdifflib
Deleted patch
1
The Cortex-R5F initfn was not correctly setting up the MVFR
2
ID register values. Fill these in, since some subsequent patches
3
will use ID register checks rather than CPU feature bit checks.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/cpu.c | 2 ++
9
1 file changed, 2 insertions(+)
10
11
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu.c
14
+++ b/target/arm/cpu.c
15
@@ -XXX,XX +XXX,XX @@ static void cortex_r5f_initfn(Object *obj)
16
17
cortex_r5_initfn(obj);
18
set_feature(&cpu->env, ARM_FEATURE_VFP3);
19
+ cpu->isar.mvfr0 = 0x10110221;
20
+ cpu->isar.mvfr1 = 0x00000011;
21
}
22
23
static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
24
--
25
2.20.1
26
27
diff view generated by jsdifflib
Deleted patch
1
Expand out the sequences in the new decoder VLDR/VSTR/VLDM/VSTM trans
2
functions which perform the memory accesses by going via the TCG
3
globals cpu_F0s and cpu_F0d, to use local TCG temps instead.
4
1
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/translate-vfp.inc.c | 46 +++++++++++++++++++++-------------
9
target/arm/translate.c | 18 -------------
10
2 files changed, 28 insertions(+), 36 deletions(-)
11
12
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-vfp.inc.c
15
+++ b/target/arm/translate-vfp.inc.c
16
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a)
17
static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
18
{
19
uint32_t offset;
20
- TCGv_i32 addr;
21
+ TCGv_i32 addr, tmp;
22
23
if (!vfp_access_check(s)) {
24
return true;
25
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
26
addr = load_reg(s, a->rn);
27
}
28
tcg_gen_addi_i32(addr, addr, offset);
29
+ tmp = tcg_temp_new_i32();
30
if (a->l) {
31
- gen_vfp_ld(s, false, addr);
32
- gen_mov_vreg_F0(false, a->vd);
33
+ gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
34
+ neon_store_reg32(tmp, a->vd);
35
} else {
36
- gen_mov_F0_vreg(false, a->vd);
37
- gen_vfp_st(s, false, addr);
38
+ neon_load_reg32(tmp, a->vd);
39
+ gen_aa32_st32(s, tmp, addr, get_mem_index(s));
40
}
41
+ tcg_temp_free_i32(tmp);
42
tcg_temp_free_i32(addr);
43
44
return true;
45
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
46
{
47
uint32_t offset;
48
TCGv_i32 addr;
49
+ TCGv_i64 tmp;
50
51
/* UNDEF accesses to D16-D31 if they don't exist */
52
if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
53
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
54
addr = load_reg(s, a->rn);
55
}
56
tcg_gen_addi_i32(addr, addr, offset);
57
+ tmp = tcg_temp_new_i64();
58
if (a->l) {
59
- gen_vfp_ld(s, true, addr);
60
- gen_mov_vreg_F0(true, a->vd);
61
+ gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
62
+ neon_store_reg64(tmp, a->vd);
63
} else {
64
- gen_mov_F0_vreg(true, a->vd);
65
- gen_vfp_st(s, true, addr);
66
+ neon_load_reg64(tmp, a->vd);
67
+ gen_aa32_st64(s, tmp, addr, get_mem_index(s));
68
}
69
+ tcg_temp_free_i64(tmp);
70
tcg_temp_free_i32(addr);
71
72
return true;
73
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
74
static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
75
{
76
uint32_t offset;
77
- TCGv_i32 addr;
78
+ TCGv_i32 addr, tmp;
79
int i, n;
80
81
n = a->imm;
82
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
83
}
84
85
offset = 4;
86
+ tmp = tcg_temp_new_i32();
87
for (i = 0; i < n; i++) {
88
if (a->l) {
89
/* load */
90
- gen_vfp_ld(s, false, addr);
91
- gen_mov_vreg_F0(false, a->vd + i);
92
+ gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
93
+ neon_store_reg32(tmp, a->vd + i);
94
} else {
95
/* store */
96
- gen_mov_F0_vreg(false, a->vd + i);
97
- gen_vfp_st(s, false, addr);
98
+ neon_load_reg32(tmp, a->vd + i);
99
+ gen_aa32_st32(s, tmp, addr, get_mem_index(s));
100
}
101
tcg_gen_addi_i32(addr, addr, offset);
102
}
103
+ tcg_temp_free_i32(tmp);
104
if (a->w) {
105
/* writeback */
106
if (a->p) {
107
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
108
{
109
uint32_t offset;
110
TCGv_i32 addr;
111
+ TCGv_i64 tmp;
112
int i, n;
113
114
n = a->imm >> 1;
115
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
116
}
117
118
offset = 8;
119
+ tmp = tcg_temp_new_i64();
120
for (i = 0; i < n; i++) {
121
if (a->l) {
122
/* load */
123
- gen_vfp_ld(s, true, addr);
124
- gen_mov_vreg_F0(true, a->vd + i);
125
+ gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
126
+ neon_store_reg64(tmp, a->vd + i);
127
} else {
128
/* store */
129
- gen_mov_F0_vreg(true, a->vd + i);
130
- gen_vfp_st(s, true, addr);
131
+ neon_load_reg64(tmp, a->vd + i);
132
+ gen_aa32_st64(s, tmp, addr, get_mem_index(s));
133
}
134
tcg_gen_addi_i32(addr, addr, offset);
135
}
136
+ tcg_temp_free_i64(tmp);
137
if (a->w) {
138
/* writeback */
139
if (a->p) {
140
diff --git a/target/arm/translate.c b/target/arm/translate.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/target/arm/translate.c
143
+++ b/target/arm/translate.c
144
@@ -XXX,XX +XXX,XX @@ VFP_GEN_FIX(uhto, )
145
VFP_GEN_FIX(ulto, )
146
#undef VFP_GEN_FIX
147
148
-static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
149
-{
150
- if (dp) {
151
- gen_aa32_ld64(s, cpu_F0d, addr, get_mem_index(s));
152
- } else {
153
- gen_aa32_ld32u(s, cpu_F0s, addr, get_mem_index(s));
154
- }
155
-}
156
-
157
-static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
158
-{
159
- if (dp) {
160
- gen_aa32_st64(s, cpu_F0d, addr, get_mem_index(s));
161
- } else {
162
- gen_aa32_st32(s, cpu_F0s, addr, get_mem_index(s));
163
- }
164
-}
165
-
166
static inline long vfp_reg_offset(bool dp, unsigned reg)
167
{
168
if (dp) {
169
--
170
2.20.1
171
172
diff view generated by jsdifflib
Deleted patch
1
Convert the VFP VMLS instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 38 ++++++++++++++++++++++++++++++++++
7
target/arm/translate.c | 8 +------
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 44 insertions(+), 7 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
18
}
19
+
20
+static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
21
+{
22
+ /*
23
+ * VMLS: vd = vd + -(vn * vm)
24
+ * Note that order of inputs to the add matters for NaNs.
25
+ */
26
+ TCGv_i32 tmp = tcg_temp_new_i32();
27
+
28
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
29
+ gen_helper_vfp_negs(tmp, tmp);
30
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
31
+ tcg_temp_free_i32(tmp);
32
+}
33
+
34
+static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
35
+{
36
+ return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
37
+}
38
+
39
+static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
40
+{
41
+ /*
42
+ * VMLS: vd = vd + -(vn * vm)
43
+ * Note that order of inputs to the add matters for NaNs.
44
+ */
45
+ TCGv_i64 tmp = tcg_temp_new_i64();
46
+
47
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
48
+ gen_helper_vfp_negd(tmp, tmp);
49
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
50
+ tcg_temp_free_i64(tmp);
51
+}
52
+
53
+static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a)
54
+{
55
+ return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
56
+}
57
diff --git a/target/arm/translate.c b/target/arm/translate.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate.c
60
+++ b/target/arm/translate.c
61
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
62
rn = VFP_SREG_N(insn);
63
64
switch (op) {
65
- case 0:
66
+ case 0 ... 1:
67
/* Already handled by decodetree */
68
return 1;
69
default:
70
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
71
for (;;) {
72
/* Perform the calculation. */
73
switch (op) {
74
- case 1: /* VMLS: fd + -(fn * fm) */
75
- gen_vfp_mul(dp);
76
- gen_vfp_F1_neg(dp);
77
- gen_mov_F0_vreg(dp, rd);
78
- gen_vfp_add(dp);
79
- break;
80
case 2: /* VNMLS: -fd + (fn * fm) */
81
/* Note that it isn't valid to replace (-A + B) with (B - A)
82
* or similar plausible looking simplifications
83
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
84
index XXXXXXX..XXXXXXX 100644
85
--- a/target/arm/vfp.decode
86
+++ b/target/arm/vfp.decode
87
@@ -XXX,XX +XXX,XX @@ VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... \
88
vm=%vm_sp vn=%vn_sp vd=%vd_sp
89
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... \
90
vm=%vm_dp vn=%vn_dp vd=%vd_dp
91
+
92
+VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... \
93
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
94
+VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... \
95
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
96
--
97
2.20.1
98
99
diff view generated by jsdifflib
Deleted patch
1
Convert the VFP VNMLS instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 42 ++++++++++++++++++++++++++++++++++
7
target/arm/translate.c | 24 +------------------
8
target/arm/vfp.decode | 5 ++++
9
3 files changed, 48 insertions(+), 23 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
18
}
19
+
20
+static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
21
+{
22
+ /*
23
+ * VNMLS: -fd + (fn * fm)
24
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
25
+ * plausible looking simplifications because this will give wrong results
26
+ * for NaNs.
27
+ */
28
+ TCGv_i32 tmp = tcg_temp_new_i32();
29
+
30
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
31
+ gen_helper_vfp_negs(vd, vd);
32
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
33
+ tcg_temp_free_i32(tmp);
34
+}
35
+
36
+static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
37
+{
38
+ return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
39
+}
40
+
41
+static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
42
+{
43
+ /*
44
+ * VNMLS: -fd + (fn * fm)
45
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
46
+ * plausible looking simplifications because this will give wrong results
47
+ * for NaNs.
48
+ */
49
+ TCGv_i64 tmp = tcg_temp_new_i64();
50
+
51
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
52
+ gen_helper_vfp_negd(vd, vd);
53
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
54
+ tcg_temp_free_i64(tmp);
55
+}
56
+
57
+static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a)
58
+{
59
+ return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
60
+}
61
diff --git a/target/arm/translate.c b/target/arm/translate.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.c
64
+++ b/target/arm/translate.c
65
@@ -XXX,XX +XXX,XX @@ VFP_OP2(div)
66
67
#undef VFP_OP2
68
69
-static inline void gen_vfp_F1_mul(int dp)
70
-{
71
- /* Like gen_vfp_mul() but put result in F1 */
72
- TCGv_ptr fpst = get_fpstatus_ptr(0);
73
- if (dp) {
74
- gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
75
- } else {
76
- gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
77
- }
78
- tcg_temp_free_ptr(fpst);
79
-}
80
-
81
static inline void gen_vfp_F1_neg(int dp)
82
{
83
/* Like gen_vfp_neg() but put result in F1 */
84
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
85
rn = VFP_SREG_N(insn);
86
87
switch (op) {
88
- case 0 ... 1:
89
+ case 0 ... 2:
90
/* Already handled by decodetree */
91
return 1;
92
default:
93
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
94
for (;;) {
95
/* Perform the calculation. */
96
switch (op) {
97
- case 2: /* VNMLS: -fd + (fn * fm) */
98
- /* Note that it isn't valid to replace (-A + B) with (B - A)
99
- * or similar plausible looking simplifications
100
- * because this will give wrong results for NaNs.
101
- */
102
- gen_vfp_F1_mul(dp);
103
- gen_mov_F0_vreg(dp, rd);
104
- gen_vfp_neg(dp);
105
- gen_vfp_add(dp);
106
- break;
107
case 3: /* VNMLA: -fd + -(fn * fm) */
108
gen_vfp_mul(dp);
109
gen_vfp_F1_neg(dp);
110
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/arm/vfp.decode
113
+++ b/target/arm/vfp.decode
114
@@ -XXX,XX +XXX,XX @@ VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... \
115
vm=%vm_sp vn=%vn_sp vd=%vd_sp
116
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... \
117
vm=%vm_dp vn=%vn_dp vd=%vd_dp
118
+
119
+VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... \
120
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
121
+VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... \
122
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
123
--
124
2.20.1
125
126
diff view generated by jsdifflib
Deleted patch
1
Convert the VFP VNMLA instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 34 ++++++++++++++++++++++++++++++++++
7
target/arm/translate.c | 19 +------------------
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 40 insertions(+), 18 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
18
}
19
+
20
+static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
21
+{
22
+ /* VNMLA: -fd + -(fn * fm) */
23
+ TCGv_i32 tmp = tcg_temp_new_i32();
24
+
25
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
26
+ gen_helper_vfp_negs(tmp, tmp);
27
+ gen_helper_vfp_negs(vd, vd);
28
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
29
+ tcg_temp_free_i32(tmp);
30
+}
31
+
32
+static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
33
+{
34
+ return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
35
+}
36
+
37
+static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
38
+{
39
+ /* VNMLA: -fd + (fn * fm) */
40
+ TCGv_i64 tmp = tcg_temp_new_i64();
41
+
42
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
43
+ gen_helper_vfp_negd(tmp, tmp);
44
+ gen_helper_vfp_negd(vd, vd);
45
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
46
+ tcg_temp_free_i64(tmp);
47
+}
48
+
49
+static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_sp *a)
50
+{
51
+ return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
52
+}
53
diff --git a/target/arm/translate.c b/target/arm/translate.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/translate.c
56
+++ b/target/arm/translate.c
57
@@ -XXX,XX +XXX,XX @@ VFP_OP2(div)
58
59
#undef VFP_OP2
60
61
-static inline void gen_vfp_F1_neg(int dp)
62
-{
63
- /* Like gen_vfp_neg() but put result in F1 */
64
- if (dp) {
65
- gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
66
- } else {
67
- gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
68
- }
69
-}
70
-
71
static inline void gen_vfp_abs(int dp)
72
{
73
if (dp)
74
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
75
rn = VFP_SREG_N(insn);
76
77
switch (op) {
78
- case 0 ... 2:
79
+ case 0 ... 3:
80
/* Already handled by decodetree */
81
return 1;
82
default:
83
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
84
for (;;) {
85
/* Perform the calculation. */
86
switch (op) {
87
- case 3: /* VNMLA: -fd + -(fn * fm) */
88
- gen_vfp_mul(dp);
89
- gen_vfp_F1_neg(dp);
90
- gen_mov_F0_vreg(dp, rd);
91
- gen_vfp_neg(dp);
92
- gen_vfp_add(dp);
93
- break;
94
case 4: /* mul: fn * fm */
95
gen_vfp_mul(dp);
96
break;
97
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/arm/vfp.decode
100
+++ b/target/arm/vfp.decode
101
@@ -XXX,XX +XXX,XX @@ VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... \
102
vm=%vm_sp vn=%vn_sp vd=%vd_sp
103
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... \
104
vm=%vm_dp vn=%vn_dp vd=%vd_dp
105
+
106
+VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... \
107
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
108
+VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... \
109
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
110
--
111
2.20.1
112
113
diff view generated by jsdifflib
Deleted patch
1
Convert the VMUL instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 10 ++++++++++
7
target/arm/translate.c | 5 +----
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 16 insertions(+), 4 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
18
}
19
+
20
+static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
21
+{
22
+ return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
23
+}
24
+
25
+static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_sp *a)
26
+{
27
+ return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
28
+}
29
diff --git a/target/arm/translate.c b/target/arm/translate.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate.c
32
+++ b/target/arm/translate.c
33
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
34
rn = VFP_SREG_N(insn);
35
36
switch (op) {
37
- case 0 ... 3:
38
+ case 0 ... 4:
39
/* Already handled by decodetree */
40
return 1;
41
default:
42
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
43
for (;;) {
44
/* Perform the calculation. */
45
switch (op) {
46
- case 4: /* mul: fn * fm */
47
- gen_vfp_mul(dp);
48
- break;
49
case 5: /* nmul: -(fn * fm) */
50
gen_vfp_mul(dp);
51
gen_vfp_neg(dp);
52
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/arm/vfp.decode
55
+++ b/target/arm/vfp.decode
56
@@ -XXX,XX +XXX,XX @@ VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... \
57
vm=%vm_sp vn=%vn_sp vd=%vd_sp
58
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... \
59
vm=%vm_dp vn=%vn_dp vd=%vd_dp
60
+
61
+VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... \
62
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
63
+VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... \
64
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
65
--
66
2.20.1
67
68
diff view generated by jsdifflib
Deleted patch
1
Convert the VNMUL instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 24 ++++++++++++++++++++++++
7
target/arm/translate.c | 7 +------
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 30 insertions(+), 6 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
18
}
19
+
20
+static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
21
+{
22
+ /* VNMUL: -(fn * fm) */
23
+ gen_helper_vfp_muls(vd, vn, vm, fpst);
24
+ gen_helper_vfp_negs(vd, vd);
25
+}
26
+
27
+static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
28
+{
29
+ return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
30
+}
31
+
32
+static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
33
+{
34
+ /* VNMUL: -(fn * fm) */
35
+ gen_helper_vfp_muld(vd, vn, vm, fpst);
36
+ gen_helper_vfp_negd(vd, vd);
37
+}
38
+
39
+static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_sp *a)
40
+{
41
+ return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
42
+}
43
diff --git a/target/arm/translate.c b/target/arm/translate.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate.c
46
+++ b/target/arm/translate.c
47
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_##name(int dp) \
48
49
VFP_OP2(add)
50
VFP_OP2(sub)
51
-VFP_OP2(mul)
52
VFP_OP2(div)
53
54
#undef VFP_OP2
55
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
56
rn = VFP_SREG_N(insn);
57
58
switch (op) {
59
- case 0 ... 4:
60
+ case 0 ... 5:
61
/* Already handled by decodetree */
62
return 1;
63
default:
64
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
65
for (;;) {
66
/* Perform the calculation. */
67
switch (op) {
68
- case 5: /* nmul: -(fn * fm) */
69
- gen_vfp_mul(dp);
70
- gen_vfp_neg(dp);
71
- break;
72
case 6: /* add: fn + fm */
73
gen_vfp_add(dp);
74
break;
75
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/vfp.decode
78
+++ b/target/arm/vfp.decode
79
@@ -XXX,XX +XXX,XX @@ VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... \
80
vm=%vm_sp vn=%vn_sp vd=%vd_sp
81
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... \
82
vm=%vm_dp vn=%vn_dp vd=%vd_dp
83
+
84
+VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... \
85
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
86
+VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... \
87
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
88
--
89
2.20.1
90
91
diff view generated by jsdifflib
Deleted patch
1
Convert the VADD instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 10 ++++++++++
7
target/arm/translate.c | 6 +-----
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 16 insertions(+), 5 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
18
}
19
+
20
+static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
21
+{
22
+ return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
23
+}
24
+
25
+static bool trans_VADD_dp(DisasContext *s, arg_VADD_sp *a)
26
+{
27
+ return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
28
+}
29
diff --git a/target/arm/translate.c b/target/arm/translate.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate.c
32
+++ b/target/arm/translate.c
33
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_##name(int dp) \
34
tcg_temp_free_ptr(fpst); \
35
}
36
37
-VFP_OP2(add)
38
VFP_OP2(sub)
39
VFP_OP2(div)
40
41
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
42
rn = VFP_SREG_N(insn);
43
44
switch (op) {
45
- case 0 ... 5:
46
+ case 0 ... 6:
47
/* Already handled by decodetree */
48
return 1;
49
default:
50
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
51
for (;;) {
52
/* Perform the calculation. */
53
switch (op) {
54
- case 6: /* add: fn + fm */
55
- gen_vfp_add(dp);
56
- break;
57
case 7: /* sub: fn - fm */
58
gen_vfp_sub(dp);
59
break;
60
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/vfp.decode
63
+++ b/target/arm/vfp.decode
64
@@ -XXX,XX +XXX,XX @@ VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... \
65
vm=%vm_sp vn=%vn_sp vd=%vd_sp
66
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... \
67
vm=%vm_dp vn=%vn_dp vd=%vd_dp
68
+
69
+VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... \
70
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
71
+VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... \
72
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
73
--
74
2.20.1
75
76
diff view generated by jsdifflib
Deleted patch
1
Convert the VSUB instruction to decodetree.
2
1
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-vfp.inc.c | 10 ++++++++++
7
target/arm/translate.c | 6 +-----
8
target/arm/vfp.decode | 5 +++++
9
3 files changed, 16 insertions(+), 5 deletions(-)
10
11
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.inc.c
14
+++ b/target/arm/translate-vfp.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_sp *a)
16
{
17
return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
18
}
19
+
20
+static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
21
+{
22
+ return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
23
+}
24
+
25
+static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_sp *a)
26
+{
27
+ return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
28
+}
29
diff --git a/target/arm/translate.c b/target/arm/translate.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate.c
32
+++ b/target/arm/translate.c
33
@@ -XXX,XX +XXX,XX @@ static inline void gen_vfp_##name(int dp) \
34
tcg_temp_free_ptr(fpst); \
35
}
36
37
-VFP_OP2(sub)
38
VFP_OP2(div)
39
40
#undef VFP_OP2
41
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
42
rn = VFP_SREG_N(insn);
43
44
switch (op) {
45
- case 0 ... 6:
46
+ case 0 ... 7:
47
/* Already handled by decodetree */
48
return 1;
49
default:
50
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
51
for (;;) {
52
/* Perform the calculation. */
53
switch (op) {
54
- case 7: /* sub: fn - fm */
55
- gen_vfp_sub(dp);
56
- break;
57
case 8: /* div: fn / fm */
58
gen_vfp_div(dp);
59
break;
60
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/vfp.decode
63
+++ b/target/arm/vfp.decode
64
@@ -XXX,XX +XXX,XX @@ VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... \
65
vm=%vm_sp vn=%vn_sp vd=%vd_sp
66
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... \
67
vm=%vm_dp vn=%vn_dp vd=%vd_dp
68
+
69
+VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... \
70
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp
71
+VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... \
72
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
73
--
74
2.20.1
75
76
diff view generated by jsdifflib