1
Arm patch queue -- these are all bug fix patches but we might
1
Mostly my decodetree stuff, but also some patches for various
2
as well put them in to rc0...
2
smaller bugs/features from others.
3
3
4
thanks
4
thanks
5
-- PMM
5
-- PMM
6
6
7
The following changes since commit 2c8cfc0b52b5a4d123c26c0b5fdf941be24805be:
7
The following changes since commit 53550e81e2cafe7c03a39526b95cd21b5194d9b1:
8
8
9
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2018-03-19 11:44:26 +0000)
9
Merge remote-tracking branch 'remotes/berrange/tags/qcrypto-next-pull-request' into staging (2020-06-15 16:36:34 +0100)
10
10
11
are available in the Git repository at:
11
are available in the Git repository at:
12
12
13
git://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20180319
13
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20200616
14
14
15
for you to fetch changes up to ff72cb6b46b95bb530787add5277c211af3d31c6:
15
for you to fetch changes up to 64b397417a26509bcdff44ab94356a35c7901c79:
16
16
17
hw/arm/raspi: Provide spin-loop code for AArch64 CPUs (2018-03-19 18:23:24 +0000)
17
hw: arm: Set vendor property for IMX SDHCI emulations (2020-06-16 10:32:29 +0100)
18
18
19
----------------------------------------------------------------
19
----------------------------------------------------------------
20
target-arm queue:
20
* hw: arm: Set vendor property for IMX SDHCI emulations
21
* fsl-imx6: Fix incorrect Ethernet interrupt defines
21
* sd: sdhci: Implement basic vendor specific register support
22
* dump: Update correct kdump phys_base field for AArch64
22
* hw/net/imx_fec: Convert debug fprintf() to trace events
23
* char: i.MX: Add support for "TX complete" interrupt
23
* target/arm/cpu: adjust virtual time for all KVM arm cpus
24
* bcm2836/raspi: Fix various bugs resulting in panics trying
24
* Implement configurable descriptor size in ftgmac100
25
to boot a Debian Linux kernel on raspi3
25
* hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
26
* target/arm: More Neon decodetree conversion work
26
27
27
----------------------------------------------------------------
28
----------------------------------------------------------------
28
Andrey Smirnov (2):
29
Erik Smit (1):
29
char: i.MX: Simplify imx_update()
30
Implement configurable descriptor size in ftgmac100
30
char: i.MX: Add support for "TX complete" interrupt
31
31
32
Guenter Roeck (1):
32
Guenter Roeck (2):
33
fsl-imx6: Swap Ethernet interrupt defines
33
sd: sdhci: Implement basic vendor specific register support
34
hw: arm: Set vendor property for IMX SDHCI emulations
34
35
35
Peter Maydell (9):
36
Jean-Christophe Dubois (2):
36
hw/arm/raspi: Don't do board-setup or secure-boot for raspi3
37
hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
37
hw/arm/boot: assert that secure_boot and secure_board_setup are false for AArch64
38
hw/net/imx_fec: Convert debug fprintf() to trace events
38
hw/arm/boot: If booting a kernel in EL2, set SCR_EL3.HCE
39
hw/arm/bcm2386: Fix parent type of bcm2386
40
hw/arm/bcm2836: Rename bcm2836 type/struct to bcm283x
41
hw/arm/bcm2836: Create proper bcm2837 device
42
hw/arm/bcm2836: Use correct affinity values for BCM2837
43
hw/arm/bcm2836: Hardcode correct CPU type
44
hw/arm/raspi: Provide spin-loop code for AArch64 CPUs
45
39
46
Wei Huang (1):
40
Peter Maydell (17):
47
dump: Update correct kdump phys_base field for AArch64
41
target/arm: Fix missing temp frees in do_vshll_2sh
42
target/arm: Convert Neon 3-reg-diff prewidening ops to decodetree
43
target/arm: Convert Neon 3-reg-diff narrowing ops to decodetree
44
target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree
45
target/arm: Convert Neon 3-reg-diff long multiplies
46
target/arm: Convert Neon 3-reg-diff saturating doubling multiplies
47
target/arm: Convert Neon 3-reg-diff polynomial VMULL
48
target/arm: Add 'static' and 'const' annotations to VSHLL function arrays
49
target/arm: Add missing TCG temp free in do_2shift_env_64()
50
target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree
51
target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree
52
target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree
53
target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree
54
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
55
target/arm: Convert Neon VEXT to decodetree
56
target/arm: Convert Neon VTBL, VTBX to decodetree
57
target/arm: Convert Neon VDUP (scalar) to decodetree
48
58
49
include/hw/arm/bcm2836.h | 31 +++++++++++++---
59
fangying (1):
50
include/hw/arm/fsl-imx6.h | 4 +-
60
target/arm/cpu: adjust virtual time for all KVM arm cpus
51
include/hw/char/imx_serial.h | 3 ++
52
dump.c | 14 +++++--
53
hw/arm/bcm2836.c | 87 +++++++++++++++++++++++++++++++-------------
54
hw/arm/boot.c | 12 ++++++
55
hw/arm/raspi.c | 77 +++++++++++++++++++++++++++++++--------
56
hw/char/imx_serial.c | 44 ++++++++++++++++------
57
hw/net/imx_fec.c | 28 +++++++++++++-
58
9 files changed, 237 insertions(+), 63 deletions(-)
59
61
62
hw/sd/sdhci-internal.h | 5 +
63
include/hw/sd/sdhci.h | 5 +
64
target/arm/translate.h | 1 +
65
target/arm/neon-dp.decode | 130 +++++
66
hw/arm/fsl-imx25.c | 6 +
67
hw/arm/fsl-imx6.c | 6 +
68
hw/arm/fsl-imx6ul.c | 2 +
69
hw/arm/fsl-imx7.c | 2 +
70
hw/misc/imx6ul_ccm.c | 76 ++-
71
hw/net/ftgmac100.c | 26 +-
72
hw/net/imx_fec.c | 106 ++--
73
hw/sd/sdhci.c | 18 +-
74
target/arm/cpu.c | 6 +-
75
target/arm/cpu64.c | 1 -
76
target/arm/kvm.c | 21 +-
77
target/arm/translate-neon.inc.c | 1148 ++++++++++++++++++++++++++++++++++++++-
78
target/arm/translate.c | 684 +----------------------
79
hw/net/trace-events | 18 +
80
18 files changed, 1495 insertions(+), 766 deletions(-)
81
diff view generated by jsdifflib
New patch
1
The widenfn() in do_vshll_2sh() does not free the input 32-bit
2
TCGv, so we need to do this in the calling code.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
---
8
target/arm/translate-neon.inc.c | 2 ++
9
1 file changed, 2 insertions(+)
10
11
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-neon.inc.c
14
+++ b/target/arm/translate-neon.inc.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
16
tmp = tcg_temp_new_i64();
17
18
widenfn(tmp, rm0);
19
+ tcg_temp_free_i32(rm0);
20
if (a->shift != 0) {
21
tcg_gen_shli_i64(tmp, tmp, a->shift);
22
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
23
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
24
neon_store_reg64(tmp, a->vd);
25
26
widenfn(tmp, rm1);
27
+ tcg_temp_free_i32(rm1);
28
if (a->shift != 0) {
29
tcg_gen_shli_i64(tmp, tmp, a->shift);
30
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
31
--
32
2.20.1
33
34
diff view generated by jsdifflib
New patch
1
1
Convert the "pre-widening" insns VADDL, VSUBL, VADDW and VSUBW
2
in the Neon 3-registers-different-lengths group to decodetree.
3
These insns work by widening one or both inputs to double their
4
size, performing an add or subtract at the doubled size and
5
then storing the double-size result.
6
7
As usual, rather than copying the loop of the original decoder
8
(which needs awkward code to avoid problems when source and
9
destination registers overlap) we just unroll the two passes.
10
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
target/arm/neon-dp.decode | 43 +++++++++++++
15
target/arm/translate-neon.inc.c | 104 ++++++++++++++++++++++++++++++++
16
target/arm/translate.c | 16 ++---
17
3 files changed, 151 insertions(+), 12 deletions(-)
18
19
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/neon-dp.decode
22
+++ b/target/arm/neon-dp.decode
23
@@ -XXX,XX +XXX,XX @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
24
# So we have a single decode line and check the cmode/op in the
25
# trans function.
26
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
27
+
28
+######################################################################
29
+# Within the "two registers, or three registers of different lengths"
30
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
31
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
32
+# or they are a size field for the three-reg-different-lengths and
33
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
34
+# is slightly awkward for decodetree: we handle it with this
35
+# non-exclusive group which contains within it two exclusive groups:
36
+# one for the size=0b11 patterns, and one for the size-not-0b11
37
+# patterns. This allows us to check that none of the insns within
38
+# each subgroup accidentally overlap each other. Note that all the
39
+# trans functions for the size-not-0b11 patterns must check and
40
+# return false for size==3.
41
+######################################################################
42
+{
43
+ # 0b11 subgroup will go here
44
+
45
+ # Subgroup for size != 0b11
46
+ [
47
+ ##################################################################
48
+ # 3-reg-different-length grouping:
49
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
50
+ ##################################################################
51
+
52
+ &3diff vm vn vd size
53
+
54
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
55
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
56
+
57
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
58
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
59
+
60
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
61
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
62
+
63
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
64
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
65
+
66
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
67
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
68
+ ]
69
+}
70
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/translate-neon.inc.c
73
+++ b/target/arm/translate-neon.inc.c
74
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
75
}
76
return do_1reg_imm(s, a, fn);
77
}
78
+
79
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
80
+ NeonGenWidenFn *widenfn,
81
+ NeonGenTwo64OpFn *opfn,
82
+ bool src1_wide)
83
+{
84
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
85
+ TCGv_i64 rn0_64, rn1_64, rm_64;
86
+ TCGv_i32 rm;
87
+
88
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
89
+ return false;
90
+ }
91
+
92
+ /* UNDEF accesses to D16-D31 if they don't exist. */
93
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
94
+ ((a->vd | a->vn | a->vm) & 0x10)) {
95
+ return false;
96
+ }
97
+
98
+ if (!widenfn || !opfn) {
99
+ /* size == 3 case, which is an entirely different insn group */
100
+ return false;
101
+ }
102
+
103
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
104
+ return false;
105
+ }
106
+
107
+ if (!vfp_access_check(s)) {
108
+ return true;
109
+ }
110
+
111
+ rn0_64 = tcg_temp_new_i64();
112
+ rn1_64 = tcg_temp_new_i64();
113
+ rm_64 = tcg_temp_new_i64();
114
+
115
+ if (src1_wide) {
116
+ neon_load_reg64(rn0_64, a->vn);
117
+ } else {
118
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
119
+ widenfn(rn0_64, tmp);
120
+ tcg_temp_free_i32(tmp);
121
+ }
122
+ rm = neon_load_reg(a->vm, 0);
123
+
124
+ widenfn(rm_64, rm);
125
+ tcg_temp_free_i32(rm);
126
+ opfn(rn0_64, rn0_64, rm_64);
127
+
128
+ /*
129
+ * Load second pass inputs before storing the first pass result, to
130
+ * avoid incorrect results if a narrow input overlaps with the result.
131
+ */
132
+ if (src1_wide) {
133
+ neon_load_reg64(rn1_64, a->vn + 1);
134
+ } else {
135
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
136
+ widenfn(rn1_64, tmp);
137
+ tcg_temp_free_i32(tmp);
138
+ }
139
+ rm = neon_load_reg(a->vm, 1);
140
+
141
+ neon_store_reg64(rn0_64, a->vd);
142
+
143
+ widenfn(rm_64, rm);
144
+ tcg_temp_free_i32(rm);
145
+ opfn(rn1_64, rn1_64, rm_64);
146
+ neon_store_reg64(rn1_64, a->vd + 1);
147
+
148
+ tcg_temp_free_i64(rn0_64);
149
+ tcg_temp_free_i64(rn1_64);
150
+ tcg_temp_free_i64(rm_64);
151
+
152
+ return true;
153
+}
154
+
155
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
156
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
157
+ { \
158
+ static NeonGenWidenFn * const widenfn[] = { \
159
+ gen_helper_neon_widen_##S##8, \
160
+ gen_helper_neon_widen_##S##16, \
161
+ tcg_gen_##EXT##_i32_i64, \
162
+ NULL, \
163
+ }; \
164
+ static NeonGenTwo64OpFn * const addfn[] = { \
165
+ gen_helper_neon_##OP##l_u16, \
166
+ gen_helper_neon_##OP##l_u32, \
167
+ tcg_gen_##OP##_i64, \
168
+ NULL, \
169
+ }; \
170
+ return do_prewiden_3d(s, a, widenfn[a->size], \
171
+ addfn[a->size], SRC1WIDE); \
172
+ }
173
+
174
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
175
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
176
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
177
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
178
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
179
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
180
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
181
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
182
diff --git a/target/arm/translate.c b/target/arm/translate.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/target/arm/translate.c
185
+++ b/target/arm/translate.c
186
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
187
/* Three registers of different lengths. */
188
int src1_wide;
189
int src2_wide;
190
- int prewiden;
191
/* undefreq: bit 0 : UNDEF if size == 0
192
* bit 1 : UNDEF if size == 1
193
* bit 2 : UNDEF if size == 2
194
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
195
int undefreq;
196
/* prewiden, src1_wide, src2_wide, undefreq */
197
static const int neon_3reg_wide[16][4] = {
198
- {1, 0, 0, 0}, /* VADDL */
199
- {1, 1, 0, 0}, /* VADDW */
200
- {1, 0, 0, 0}, /* VSUBL */
201
- {1, 1, 0, 0}, /* VSUBW */
202
+ {0, 0, 0, 7}, /* VADDL: handled by decodetree */
203
+ {0, 0, 0, 7}, /* VADDW: handled by decodetree */
204
+ {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
205
+ {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
206
{0, 1, 1, 0}, /* VADDHN */
207
{0, 0, 0, 0}, /* VABAL */
208
{0, 1, 1, 0}, /* VSUBHN */
209
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
210
{0, 0, 0, 7}, /* Reserved: always UNDEF */
211
};
212
213
- prewiden = neon_3reg_wide[op][0];
214
src1_wide = neon_3reg_wide[op][1];
215
src2_wide = neon_3reg_wide[op][2];
216
undefreq = neon_3reg_wide[op][3];
217
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
218
} else {
219
tmp = neon_load_reg(rn, pass);
220
}
221
- if (prewiden) {
222
- gen_neon_widen(cpu_V0, tmp, size, u);
223
- }
224
}
225
if (src2_wide) {
226
neon_load_reg64(cpu_V1, rm + pass);
227
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
228
} else {
229
tmp2 = neon_load_reg(rm, pass);
230
}
231
- if (prewiden) {
232
- gen_neon_widen(cpu_V1, tmp2, size, u);
233
- }
234
}
235
switch (op) {
236
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
237
--
238
2.20.1
239
240
diff view generated by jsdifflib
New patch
1
1
Convert the narrow-to-high-half insns VADDHN, VSUBHN, VRADDHN,
2
VRSUBHN in the Neon 3-registers-different-lengths group to
3
decodetree.
4
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/arm/neon-dp.decode | 6 +++
9
target/arm/translate-neon.inc.c | 87 +++++++++++++++++++++++++++++++
10
target/arm/translate.c | 91 ++++-----------------------------
11
3 files changed, 104 insertions(+), 80 deletions(-)
12
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/neon-dp.decode
16
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
18
19
VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
20
VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
21
+
22
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
23
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
24
+
25
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
26
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
27
]
28
}
29
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-neon.inc.c
32
+++ b/target/arm/translate-neon.inc.c
33
@@ -XXX,XX +XXX,XX @@ DO_PREWIDEN(VADDW_S, s, ext, add, true)
34
DO_PREWIDEN(VADDW_U, u, extu, add, true)
35
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
36
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
37
+
38
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
39
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
40
+{
41
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
42
+ TCGv_i64 rn_64, rm_64;
43
+ TCGv_i32 rd0, rd1;
44
+
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
46
+ return false;
47
+ }
48
+
49
+ /* UNDEF accesses to D16-D31 if they don't exist. */
50
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
51
+ ((a->vd | a->vn | a->vm) & 0x10)) {
52
+ return false;
53
+ }
54
+
55
+ if (!opfn || !narrowfn) {
56
+ /* size == 3 case, which is an entirely different insn group */
57
+ return false;
58
+ }
59
+
60
+ if ((a->vn | a->vm) & 1) {
61
+ return false;
62
+ }
63
+
64
+ if (!vfp_access_check(s)) {
65
+ return true;
66
+ }
67
+
68
+ rn_64 = tcg_temp_new_i64();
69
+ rm_64 = tcg_temp_new_i64();
70
+ rd0 = tcg_temp_new_i32();
71
+ rd1 = tcg_temp_new_i32();
72
+
73
+ neon_load_reg64(rn_64, a->vn);
74
+ neon_load_reg64(rm_64, a->vm);
75
+
76
+ opfn(rn_64, rn_64, rm_64);
77
+
78
+ narrowfn(rd0, rn_64);
79
+
80
+ neon_load_reg64(rn_64, a->vn + 1);
81
+ neon_load_reg64(rm_64, a->vm + 1);
82
+
83
+ opfn(rn_64, rn_64, rm_64);
84
+
85
+ narrowfn(rd1, rn_64);
86
+
87
+ neon_store_reg(a->vd, 0, rd0);
88
+ neon_store_reg(a->vd, 1, rd1);
89
+
90
+ tcg_temp_free_i64(rn_64);
91
+ tcg_temp_free_i64(rm_64);
92
+
93
+ return true;
94
+}
95
+
96
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
97
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
98
+ { \
99
+ static NeonGenTwo64OpFn * const addfn[] = { \
100
+ gen_helper_neon_##OP##l_u16, \
101
+ gen_helper_neon_##OP##l_u32, \
102
+ tcg_gen_##OP##_i64, \
103
+ NULL, \
104
+ }; \
105
+ static NeonGenNarrowFn * const narrowfn[] = { \
106
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
107
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
108
+ EXTOP, \
109
+ NULL, \
110
+ }; \
111
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
112
+ }
113
+
114
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
115
+{
116
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
117
+ tcg_gen_extrh_i64_i32(rd, rn);
118
+}
119
+
120
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
121
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
122
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
123
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
124
diff --git a/target/arm/translate.c b/target/arm/translate.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/target/arm/translate.c
127
+++ b/target/arm/translate.c
128
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
129
}
130
}
131
132
-static inline void gen_neon_subl(int size)
133
-{
134
- switch (size) {
135
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
136
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
137
- case 2: tcg_gen_sub_i64(CPU_V001); break;
138
- default: abort();
139
- }
140
-}
141
-
142
static inline void gen_neon_negl(TCGv_i64 var, int size)
143
{
144
switch (size) {
145
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
146
op = (insn >> 8) & 0xf;
147
if ((insn & (1 << 6)) == 0) {
148
/* Three registers of different lengths. */
149
- int src1_wide;
150
- int src2_wide;
151
/* undefreq: bit 0 : UNDEF if size == 0
152
* bit 1 : UNDEF if size == 1
153
* bit 2 : UNDEF if size == 2
154
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
155
{0, 0, 0, 7}, /* VADDW: handled by decodetree */
156
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
157
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
158
- {0, 1, 1, 0}, /* VADDHN */
159
+ {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
160
{0, 0, 0, 0}, /* VABAL */
161
- {0, 1, 1, 0}, /* VSUBHN */
162
+ {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
163
{0, 0, 0, 0}, /* VABDL */
164
{0, 0, 0, 0}, /* VMLAL */
165
{0, 0, 0, 9}, /* VQDMLAL */
166
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
167
{0, 0, 0, 7}, /* Reserved: always UNDEF */
168
};
169
170
- src1_wide = neon_3reg_wide[op][1];
171
- src2_wide = neon_3reg_wide[op][2];
172
undefreq = neon_3reg_wide[op][3];
173
174
if ((undefreq & (1 << size)) ||
175
((undefreq & 8) && u)) {
176
return 1;
177
}
178
- if ((src1_wide && (rn & 1)) ||
179
- (src2_wide && (rm & 1)) ||
180
- (!src2_wide && (rd & 1))) {
181
+ if (rd & 1) {
182
return 1;
183
}
184
185
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
186
/* Avoid overlapping operands. Wide source operands are
187
always aligned so will never overlap with wide
188
destinations in problematic ways. */
189
- if (rd == rm && !src2_wide) {
190
+ if (rd == rm) {
191
tmp = neon_load_reg(rm, 1);
192
neon_store_scratch(2, tmp);
193
- } else if (rd == rn && !src1_wide) {
194
+ } else if (rd == rn) {
195
tmp = neon_load_reg(rn, 1);
196
neon_store_scratch(2, tmp);
197
}
198
tmp3 = NULL;
199
for (pass = 0; pass < 2; pass++) {
200
- if (src1_wide) {
201
- neon_load_reg64(cpu_V0, rn + pass);
202
- tmp = NULL;
203
+ if (pass == 1 && rd == rn) {
204
+ tmp = neon_load_scratch(2);
205
} else {
206
- if (pass == 1 && rd == rn) {
207
- tmp = neon_load_scratch(2);
208
- } else {
209
- tmp = neon_load_reg(rn, pass);
210
- }
211
+ tmp = neon_load_reg(rn, pass);
212
}
213
- if (src2_wide) {
214
- neon_load_reg64(cpu_V1, rm + pass);
215
- tmp2 = NULL;
216
+ if (pass == 1 && rd == rm) {
217
+ tmp2 = neon_load_scratch(2);
218
} else {
219
- if (pass == 1 && rd == rm) {
220
- tmp2 = neon_load_scratch(2);
221
- } else {
222
- tmp2 = neon_load_reg(rm, pass);
223
- }
224
+ tmp2 = neon_load_reg(rm, pass);
225
}
226
switch (op) {
227
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
228
- gen_neon_addl(size);
229
- break;
230
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
231
- gen_neon_subl(size);
232
- break;
233
case 5: case 7: /* VABAL, VABDL */
234
switch ((size << 1) | u) {
235
case 0:
236
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
237
abort();
238
}
239
neon_store_reg64(cpu_V0, rd + pass);
240
- } else if (op == 4 || op == 6) {
241
- /* Narrowing operation. */
242
- tmp = tcg_temp_new_i32();
243
- if (!u) {
244
- switch (size) {
245
- case 0:
246
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
247
- break;
248
- case 1:
249
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
250
- break;
251
- case 2:
252
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
253
- break;
254
- default: abort();
255
- }
256
- } else {
257
- switch (size) {
258
- case 0:
259
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
260
- break;
261
- case 1:
262
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
263
- break;
264
- case 2:
265
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
266
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
267
- break;
268
- default: abort();
269
- }
270
- }
271
- if (pass == 0) {
272
- tmp3 = tmp;
273
- } else {
274
- neon_store_reg(rd, 0, tmp3);
275
- neon_store_reg(rd, 1, tmp);
276
- }
277
} else {
278
/* Write back the result. */
279
neon_store_reg64(cpu_V0, rd + pass);
280
--
281
2.20.1
282
283
diff view generated by jsdifflib
New patch
1
1
Convert the Neon 3-reg-diff insns VABAL and VABDL to decodetree.
2
Like almost all the remaining insns in this group, these are
3
a combination of a two-input operation which returns a double width
4
result and then a possible accumulation of that double width
5
result into the destination.
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/translate.h | 1 +
11
target/arm/neon-dp.decode | 6 ++
12
target/arm/translate-neon.inc.c | 132 ++++++++++++++++++++++++++++++++
13
target/arm/translate.c | 31 +-------
14
4 files changed, 142 insertions(+), 28 deletions(-)
15
16
diff --git a/target/arm/translate.h b/target/arm/translate.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate.h
19
+++ b/target/arm/translate.h
20
@@ -XXX,XX +XXX,XX @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
21
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
22
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
23
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
24
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
25
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
26
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
27
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
28
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/neon-dp.decode
31
+++ b/target/arm/neon-dp.decode
32
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
33
VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
34
VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
35
36
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
37
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
38
+
39
VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
40
VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
41
+
42
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
43
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
44
]
45
}
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
51
DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
52
DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
53
DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
54
+
55
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
56
+ NeonGenTwoOpWidenFn *opfn,
57
+ NeonGenTwo64OpFn *accfn)
58
+{
59
+ /*
60
+ * 3-regs different lengths, long operations.
61
+ * These perform an operation on two inputs that returns a double-width
62
+ * result, and then possibly perform an accumulation operation of
63
+ * that result into the double-width destination.
64
+ */
65
+ TCGv_i64 rd0, rd1, tmp;
66
+ TCGv_i32 rn, rm;
67
+
68
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
69
+ return false;
70
+ }
71
+
72
+ /* UNDEF accesses to D16-D31 if they don't exist. */
73
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
74
+ ((a->vd | a->vn | a->vm) & 0x10)) {
75
+ return false;
76
+ }
77
+
78
+ if (!opfn) {
79
+ /* size == 3 case, which is an entirely different insn group */
80
+ return false;
81
+ }
82
+
83
+ if (a->vd & 1) {
84
+ return false;
85
+ }
86
+
87
+ if (!vfp_access_check(s)) {
88
+ return true;
89
+ }
90
+
91
+ rd0 = tcg_temp_new_i64();
92
+ rd1 = tcg_temp_new_i64();
93
+
94
+ rn = neon_load_reg(a->vn, 0);
95
+ rm = neon_load_reg(a->vm, 0);
96
+ opfn(rd0, rn, rm);
97
+ tcg_temp_free_i32(rn);
98
+ tcg_temp_free_i32(rm);
99
+
100
+ rn = neon_load_reg(a->vn, 1);
101
+ rm = neon_load_reg(a->vm, 1);
102
+ opfn(rd1, rn, rm);
103
+ tcg_temp_free_i32(rn);
104
+ tcg_temp_free_i32(rm);
105
+
106
+ /* Don't store results until after all loads: they might overlap */
107
+ if (accfn) {
108
+ tmp = tcg_temp_new_i64();
109
+ neon_load_reg64(tmp, a->vd);
110
+ accfn(tmp, tmp, rd0);
111
+ neon_store_reg64(tmp, a->vd);
112
+ neon_load_reg64(tmp, a->vd + 1);
113
+ accfn(tmp, tmp, rd1);
114
+ neon_store_reg64(tmp, a->vd + 1);
115
+ tcg_temp_free_i64(tmp);
116
+ } else {
117
+ neon_store_reg64(rd0, a->vd);
118
+ neon_store_reg64(rd1, a->vd + 1);
119
+ }
120
+
121
+ tcg_temp_free_i64(rd0);
122
+ tcg_temp_free_i64(rd1);
123
+
124
+ return true;
125
+}
126
+
127
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
128
+{
129
+ static NeonGenTwoOpWidenFn * const opfn[] = {
130
+ gen_helper_neon_abdl_s16,
131
+ gen_helper_neon_abdl_s32,
132
+ gen_helper_neon_abdl_s64,
133
+ NULL,
134
+ };
135
+
136
+ return do_long_3d(s, a, opfn[a->size], NULL);
137
+}
138
+
139
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
140
+{
141
+ static NeonGenTwoOpWidenFn * const opfn[] = {
142
+ gen_helper_neon_abdl_u16,
143
+ gen_helper_neon_abdl_u32,
144
+ gen_helper_neon_abdl_u64,
145
+ NULL,
146
+ };
147
+
148
+ return do_long_3d(s, a, opfn[a->size], NULL);
149
+}
150
+
151
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
152
+{
153
+ static NeonGenTwoOpWidenFn * const opfn[] = {
154
+ gen_helper_neon_abdl_s16,
155
+ gen_helper_neon_abdl_s32,
156
+ gen_helper_neon_abdl_s64,
157
+ NULL,
158
+ };
159
+ static NeonGenTwo64OpFn * const addfn[] = {
160
+ gen_helper_neon_addl_u16,
161
+ gen_helper_neon_addl_u32,
162
+ tcg_gen_add_i64,
163
+ NULL,
164
+ };
165
+
166
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
167
+}
168
+
169
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
170
+{
171
+ static NeonGenTwoOpWidenFn * const opfn[] = {
172
+ gen_helper_neon_abdl_u16,
173
+ gen_helper_neon_abdl_u32,
174
+ gen_helper_neon_abdl_u64,
175
+ NULL,
176
+ };
177
+ static NeonGenTwo64OpFn * const addfn[] = {
178
+ gen_helper_neon_addl_u16,
179
+ gen_helper_neon_addl_u32,
180
+ tcg_gen_add_i64,
181
+ NULL,
182
+ };
183
+
184
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
185
+}
186
diff --git a/target/arm/translate.c b/target/arm/translate.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/arm/translate.c
189
+++ b/target/arm/translate.c
190
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
191
{0, 0, 0, 7}, /* VSUBL: handled by decodetree */
192
{0, 0, 0, 7}, /* VSUBW: handled by decodetree */
193
{0, 0, 0, 7}, /* VADDHN: handled by decodetree */
194
- {0, 0, 0, 0}, /* VABAL */
195
+ {0, 0, 0, 7}, /* VABAL */
196
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
197
- {0, 0, 0, 0}, /* VABDL */
198
+ {0, 0, 0, 7}, /* VABDL */
199
{0, 0, 0, 0}, /* VMLAL */
200
{0, 0, 0, 9}, /* VQDMLAL */
201
{0, 0, 0, 0}, /* VMLSL */
202
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
203
tmp2 = neon_load_reg(rm, pass);
204
}
205
switch (op) {
206
- case 5: case 7: /* VABAL, VABDL */
207
- switch ((size << 1) | u) {
208
- case 0:
209
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
210
- break;
211
- case 1:
212
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
213
- break;
214
- case 2:
215
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
216
- break;
217
- case 3:
218
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
219
- break;
220
- case 4:
221
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
222
- break;
223
- case 5:
224
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
225
- break;
226
- default: abort();
227
- }
228
- tcg_temp_free_i32(tmp2);
229
- tcg_temp_free_i32(tmp);
230
- break;
231
case 8: case 9: case 10: case 11: case 12: case 13:
232
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
233
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
234
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
235
case 10: /* VMLSL */
236
gen_neon_negl(cpu_V0, size);
237
/* Fall through */
238
- case 5: case 8: /* VABAL, VMLAL */
239
+ case 8: /* VABAL, VMLAL */
240
gen_neon_addl(size);
241
break;
242
case 9: case 11: /* VQDMLAL, VQDMLSL */
243
--
244
2.20.1
245
246
diff view generated by jsdifflib
New patch
1
Convert the Neon 3-reg-diff insns VMULL, VMLAL and VMLSL; these perform
2
a 32x32->64 multiply with possible accumulate.
1
3
4
Note that for VMLSL we do the accumulate directly with a subtraction
5
rather than doing a negate-then-add as the old code did.
6
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
target/arm/neon-dp.decode | 9 +++++
11
target/arm/translate-neon.inc.c | 71 +++++++++++++++++++++++++++++++++
12
target/arm/translate.c | 21 +++-------
13
3 files changed, 86 insertions(+), 15 deletions(-)
14
15
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/neon-dp.decode
18
+++ b/target/arm/neon-dp.decode
19
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
20
21
VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
22
VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
23
+
24
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
25
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
26
+
27
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
28
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
29
+
30
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
31
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
32
]
33
}
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-neon.inc.c
37
+++ b/target/arm/translate-neon.inc.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
39
40
return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
41
}
42
+
43
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
44
+{
45
+ TCGv_i32 lo = tcg_temp_new_i32();
46
+ TCGv_i32 hi = tcg_temp_new_i32();
47
+
48
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
49
+ tcg_gen_concat_i32_i64(rd, lo, hi);
50
+
51
+ tcg_temp_free_i32(lo);
52
+ tcg_temp_free_i32(hi);
53
+}
54
+
55
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
56
+{
57
+ TCGv_i32 lo = tcg_temp_new_i32();
58
+ TCGv_i32 hi = tcg_temp_new_i32();
59
+
60
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
61
+ tcg_gen_concat_i32_i64(rd, lo, hi);
62
+
63
+ tcg_temp_free_i32(lo);
64
+ tcg_temp_free_i32(hi);
65
+}
66
+
67
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
68
+{
69
+ static NeonGenTwoOpWidenFn * const opfn[] = {
70
+ gen_helper_neon_mull_s8,
71
+ gen_helper_neon_mull_s16,
72
+ gen_mull_s32,
73
+ NULL,
74
+ };
75
+
76
+ return do_long_3d(s, a, opfn[a->size], NULL);
77
+}
78
+
79
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
80
+{
81
+ static NeonGenTwoOpWidenFn * const opfn[] = {
82
+ gen_helper_neon_mull_u8,
83
+ gen_helper_neon_mull_u16,
84
+ gen_mull_u32,
85
+ NULL,
86
+ };
87
+
88
+ return do_long_3d(s, a, opfn[a->size], NULL);
89
+}
90
+
91
+#define DO_VMLAL(INSN,MULL,ACC) \
92
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
93
+ { \
94
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
95
+ gen_helper_neon_##MULL##8, \
96
+ gen_helper_neon_##MULL##16, \
97
+ gen_##MULL##32, \
98
+ NULL, \
99
+ }; \
100
+ static NeonGenTwo64OpFn * const accfn[] = { \
101
+ gen_helper_neon_##ACC##l_u16, \
102
+ gen_helper_neon_##ACC##l_u32, \
103
+ tcg_gen_##ACC##_i64, \
104
+ NULL, \
105
+ }; \
106
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
107
+ }
108
+
109
+DO_VMLAL(VMLAL_S,mull_s,add)
110
+DO_VMLAL(VMLAL_U,mull_u,add)
111
+DO_VMLAL(VMLSL_S,mull_s,sub)
112
+DO_VMLAL(VMLSL_U,mull_u,sub)
113
diff --git a/target/arm/translate.c b/target/arm/translate.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/translate.c
116
+++ b/target/arm/translate.c
117
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
118
{0, 0, 0, 7}, /* VABAL */
119
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
120
{0, 0, 0, 7}, /* VABDL */
121
- {0, 0, 0, 0}, /* VMLAL */
122
+ {0, 0, 0, 7}, /* VMLAL */
123
{0, 0, 0, 9}, /* VQDMLAL */
124
- {0, 0, 0, 0}, /* VMLSL */
125
+ {0, 0, 0, 7}, /* VMLSL */
126
{0, 0, 0, 9}, /* VQDMLSL */
127
- {0, 0, 0, 0}, /* Integer VMULL */
128
+ {0, 0, 0, 7}, /* Integer VMULL */
129
{0, 0, 0, 9}, /* VQDMULL */
130
{0, 0, 0, 0xa}, /* Polynomial VMULL */
131
{0, 0, 0, 7}, /* Reserved: always UNDEF */
132
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
133
tmp2 = neon_load_reg(rm, pass);
134
}
135
switch (op) {
136
- case 8: case 9: case 10: case 11: case 12: case 13:
137
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
138
+ case 9: case 11: case 13:
139
+ /* VQDMLAL, VQDMLSL, VQDMULL */
140
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
141
break;
142
default: /* 15 is RESERVED: caught earlier */
143
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
144
/* VQDMULL */
145
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
146
neon_store_reg64(cpu_V0, rd + pass);
147
- } else if (op == 5 || (op >= 8 && op <= 11)) {
148
+ } else {
149
/* Accumulate. */
150
neon_load_reg64(cpu_V1, rd + pass);
151
switch (op) {
152
- case 10: /* VMLSL */
153
- gen_neon_negl(cpu_V0, size);
154
- /* Fall through */
155
- case 8: /* VABAL, VMLAL */
156
- gen_neon_addl(size);
157
- break;
158
case 9: case 11: /* VQDMLAL, VQDMLSL */
159
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
160
if (op == 11) {
161
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
162
abort();
163
}
164
neon_store_reg64(cpu_V0, rd + pass);
165
- } else {
166
- /* Write back the result. */
167
- neon_store_reg64(cpu_V0, rd + pass);
168
}
169
}
170
} else {
171
--
172
2.20.1
173
174
diff view generated by jsdifflib
New patch
1
1
Convert the Neon 3-reg-diff insns VQDMULL, VQDMLAL and VQDMLSL:
2
these are all saturating doubling long multiplies with a possible
3
accumulate step.
4
5
These are the last insns in the group which use the pass-over-each
6
elements loop, so we can delete that code.
7
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/arm/neon-dp.decode | 6 +++
12
target/arm/translate-neon.inc.c | 82 +++++++++++++++++++++++++++++++++
13
target/arm/translate.c | 59 ++----------------------
14
3 files changed, 92 insertions(+), 55 deletions(-)
15
16
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/neon-dp.decode
19
+++ b/target/arm/neon-dp.decode
20
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
21
VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
22
VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
23
24
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
25
+
26
VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
27
VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
28
29
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
30
+
31
VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
32
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
33
+
34
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
35
]
36
}
37
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/translate-neon.inc.c
40
+++ b/target/arm/translate-neon.inc.c
41
@@ -XXX,XX +XXX,XX @@ DO_VMLAL(VMLAL_S,mull_s,add)
42
DO_VMLAL(VMLAL_U,mull_u,add)
43
DO_VMLAL(VMLSL_S,mull_s,sub)
44
DO_VMLAL(VMLSL_U,mull_u,sub)
45
+
46
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
47
+{
48
+ gen_helper_neon_mull_s16(rd, rn, rm);
49
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
50
+}
51
+
52
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
53
+{
54
+ gen_mull_s32(rd, rn, rm);
55
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
56
+}
57
+
58
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
59
+{
60
+ static NeonGenTwoOpWidenFn * const opfn[] = {
61
+ NULL,
62
+ gen_VQDMULL_16,
63
+ gen_VQDMULL_32,
64
+ NULL,
65
+ };
66
+
67
+ return do_long_3d(s, a, opfn[a->size], NULL);
68
+}
69
+
70
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
71
+{
72
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
73
+}
74
+
75
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
76
+{
77
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
78
+}
79
+
80
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
81
+{
82
+ static NeonGenTwoOpWidenFn * const opfn[] = {
83
+ NULL,
84
+ gen_VQDMULL_16,
85
+ gen_VQDMULL_32,
86
+ NULL,
87
+ };
88
+ static NeonGenTwo64OpFn * const accfn[] = {
89
+ NULL,
90
+ gen_VQDMLAL_acc_16,
91
+ gen_VQDMLAL_acc_32,
92
+ NULL,
93
+ };
94
+
95
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
96
+}
97
+
98
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
99
+{
100
+ gen_helper_neon_negl_u32(rm, rm);
101
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
102
+}
103
+
104
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
105
+{
106
+ tcg_gen_neg_i64(rm, rm);
107
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
108
+}
109
+
110
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
111
+{
112
+ static NeonGenTwoOpWidenFn * const opfn[] = {
113
+ NULL,
114
+ gen_VQDMULL_16,
115
+ gen_VQDMULL_32,
116
+ NULL,
117
+ };
118
+ static NeonGenTwo64OpFn * const accfn[] = {
119
+ NULL,
120
+ gen_VQDMLSL_acc_16,
121
+ gen_VQDMLSL_acc_32,
122
+ NULL,
123
+ };
124
+
125
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
126
+}
127
diff --git a/target/arm/translate.c b/target/arm/translate.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/target/arm/translate.c
130
+++ b/target/arm/translate.c
131
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
132
{0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
133
{0, 0, 0, 7}, /* VABDL */
134
{0, 0, 0, 7}, /* VMLAL */
135
- {0, 0, 0, 9}, /* VQDMLAL */
136
+ {0, 0, 0, 7}, /* VQDMLAL */
137
{0, 0, 0, 7}, /* VMLSL */
138
- {0, 0, 0, 9}, /* VQDMLSL */
139
+ {0, 0, 0, 7}, /* VQDMLSL */
140
{0, 0, 0, 7}, /* Integer VMULL */
141
- {0, 0, 0, 9}, /* VQDMULL */
142
+ {0, 0, 0, 7}, /* VQDMULL */
143
{0, 0, 0, 0xa}, /* Polynomial VMULL */
144
{0, 0, 0, 7}, /* Reserved: always UNDEF */
145
};
146
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
147
}
148
return 0;
149
}
150
-
151
- /* Avoid overlapping operands. Wide source operands are
152
- always aligned so will never overlap with wide
153
- destinations in problematic ways. */
154
- if (rd == rm) {
155
- tmp = neon_load_reg(rm, 1);
156
- neon_store_scratch(2, tmp);
157
- } else if (rd == rn) {
158
- tmp = neon_load_reg(rn, 1);
159
- neon_store_scratch(2, tmp);
160
- }
161
- tmp3 = NULL;
162
- for (pass = 0; pass < 2; pass++) {
163
- if (pass == 1 && rd == rn) {
164
- tmp = neon_load_scratch(2);
165
- } else {
166
- tmp = neon_load_reg(rn, pass);
167
- }
168
- if (pass == 1 && rd == rm) {
169
- tmp2 = neon_load_scratch(2);
170
- } else {
171
- tmp2 = neon_load_reg(rm, pass);
172
- }
173
- switch (op) {
174
- case 9: case 11: case 13:
175
- /* VQDMLAL, VQDMLSL, VQDMULL */
176
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
177
- break;
178
- default: /* 15 is RESERVED: caught earlier */
179
- abort();
180
- }
181
- if (op == 13) {
182
- /* VQDMULL */
183
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
184
- neon_store_reg64(cpu_V0, rd + pass);
185
- } else {
186
- /* Accumulate. */
187
- neon_load_reg64(cpu_V1, rd + pass);
188
- switch (op) {
189
- case 9: case 11: /* VQDMLAL, VQDMLSL */
190
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
191
- if (op == 11) {
192
- gen_neon_negl(cpu_V0, size);
193
- }
194
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
195
- break;
196
- default:
197
- abort();
198
- }
199
- neon_store_reg64(cpu_V0, rd + pass);
200
- }
201
- }
202
+ abort(); /* all others handled by decodetree */
203
} else {
204
/* Two registers and a scalar. NB that for ops of this form
205
* the ARM ARM labels bit 24 as Q, but it is in our variable
206
--
207
2.20.1
208
209
diff view generated by jsdifflib
New patch
1
Convert the Neon 3-reg-diff insn polynomial VMULL. This is the last
2
insn in this group to be converted.
1
3
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/neon-dp.decode | 2 ++
8
target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++
9
target/arm/translate.c | 60 ++-------------------------------
10
3 files changed, 48 insertions(+), 57 deletions(-)
11
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/neon-dp.decode
15
+++ b/target/arm/neon-dp.decode
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
17
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
18
19
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
20
+
21
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
22
]
23
}
24
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/translate-neon.inc.c
27
+++ b/target/arm/translate-neon.inc.c
28
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
29
30
return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
31
}
32
+
33
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
34
+{
35
+ gen_helper_gvec_3 *fn_gvec;
36
+
37
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
38
+ return false;
39
+ }
40
+
41
+ /* UNDEF accesses to D16-D31 if they don't exist. */
42
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
43
+ ((a->vd | a->vn | a->vm) & 0x10)) {
44
+ return false;
45
+ }
46
+
47
+ if (a->vd & 1) {
48
+ return false;
49
+ }
50
+
51
+ switch (a->size) {
52
+ case 0:
53
+ fn_gvec = gen_helper_neon_pmull_h;
54
+ break;
55
+ case 2:
56
+ if (!dc_isar_feature(aa32_pmull, s)) {
57
+ return false;
58
+ }
59
+ fn_gvec = gen_helper_gvec_pmull_q;
60
+ break;
61
+ default:
62
+ return false;
63
+ }
64
+
65
+ if (!vfp_access_check(s)) {
66
+ return true;
67
+ }
68
+
69
+ tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
70
+ neon_reg_offset(a->vn, 0),
71
+ neon_reg_offset(a->vm, 0),
72
+ 16, 16, 0, fn_gvec);
73
+ return true;
74
+}
75
diff --git a/target/arm/translate.c b/target/arm/translate.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/translate.c
78
+++ b/target/arm/translate.c
79
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
80
{
81
int op;
82
int q;
83
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
84
+ int rd, rn, rm, rd_ofs, rm_ofs;
85
int size;
86
int pass;
87
int u;
88
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
89
size = (insn >> 20) & 3;
90
vec_size = q ? 16 : 8;
91
rd_ofs = neon_reg_offset(rd, 0);
92
- rn_ofs = neon_reg_offset(rn, 0);
93
rm_ofs = neon_reg_offset(rm, 0);
94
95
if ((insn & (1 << 23)) == 0) {
96
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
97
if (size != 3) {
98
op = (insn >> 8) & 0xf;
99
if ((insn & (1 << 6)) == 0) {
100
- /* Three registers of different lengths. */
101
- /* undefreq: bit 0 : UNDEF if size == 0
102
- * bit 1 : UNDEF if size == 1
103
- * bit 2 : UNDEF if size == 2
104
- * bit 3 : UNDEF if U == 1
105
- * Note that [2:0] set implies 'always UNDEF'
106
- */
107
- int undefreq;
108
- /* prewiden, src1_wide, src2_wide, undefreq */
109
- static const int neon_3reg_wide[16][4] = {
110
- {0, 0, 0, 7}, /* VADDL: handled by decodetree */
111
- {0, 0, 0, 7}, /* VADDW: handled by decodetree */
112
- {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
113
- {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
114
- {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
115
- {0, 0, 0, 7}, /* VABAL */
116
- {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
117
- {0, 0, 0, 7}, /* VABDL */
118
- {0, 0, 0, 7}, /* VMLAL */
119
- {0, 0, 0, 7}, /* VQDMLAL */
120
- {0, 0, 0, 7}, /* VMLSL */
121
- {0, 0, 0, 7}, /* VQDMLSL */
122
- {0, 0, 0, 7}, /* Integer VMULL */
123
- {0, 0, 0, 7}, /* VQDMULL */
124
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
125
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
126
- };
127
-
128
- undefreq = neon_3reg_wide[op][3];
129
-
130
- if ((undefreq & (1 << size)) ||
131
- ((undefreq & 8) && u)) {
132
- return 1;
133
- }
134
- if (rd & 1) {
135
- return 1;
136
- }
137
-
138
- /* Handle polynomial VMULL in a single pass. */
139
- if (op == 14) {
140
- if (size == 0) {
141
- /* VMULL.P8 */
142
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
143
- 0, gen_helper_neon_pmull_h);
144
- } else {
145
- /* VMULL.P64 */
146
- if (!dc_isar_feature(aa32_pmull, s)) {
147
- return 1;
148
- }
149
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
150
- 0, gen_helper_gvec_pmull_q);
151
- }
152
- return 0;
153
- }
154
- abort(); /* all others handled by decodetree */
155
+ /* Three registers of different lengths: handled by decodetree */
156
+ return 1;
157
} else {
158
/* Two registers and a scalar. NB that for ops of this form
159
* the ARM ARM labels bit 24 as Q, but it is in our variable
160
--
161
2.20.1
162
163
diff view generated by jsdifflib
1
Now we have separate types for BCM2386 and BCM2387, we might as well
1
Mark the arrays of function pointers in trans_VSHLL_S_2sh() and
2
just hard-code the CPU type they use rather than having it passed
2
trans_VSHLL_U_2sh() as both 'static' and 'const'.
3
through as an object property. This then lets us put the initialization
4
of the CPU object in init rather than realize.
5
6
Note that this change means that it's no longer possible on
7
the command line to use -cpu to ask for a different kind of
8
CPU than the SoC supports. This was never a supported thing to
9
do anyway; we were just not sanity-checking the command line.
10
11
This does require us to only build the bcm2837 object on
12
TARGET_AARCH64 configs, since otherwise it won't instantiate
13
due to the missing cortex-a53 device and "make check" will fail.
14
3
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
18
Message-id: 20180313153458.26822-9-peter.maydell@linaro.org
19
---
6
---
20
hw/arm/bcm2836.c | 24 +++++++++++++++---------
7
target/arm/translate-neon.inc.c | 4 ++--
21
hw/arm/raspi.c | 2 --
8
1 file changed, 2 insertions(+), 2 deletions(-)
22
2 files changed, 15 insertions(+), 11 deletions(-)
23
9
24
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
25
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/arm/bcm2836.c
12
--- a/target/arm/translate-neon.inc.c
27
+++ b/hw/arm/bcm2836.c
13
+++ b/target/arm/translate-neon.inc.c
28
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
29
15
30
struct BCM283XInfo {
16
static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
31
const char *name;
32
+ const char *cpu_type;
33
int clusterid;
34
};
35
36
static const BCM283XInfo bcm283x_socs[] = {
37
{
38
.name = TYPE_BCM2836,
39
+ .cpu_type = ARM_CPU_TYPE_NAME("cortex-a15"),
40
.clusterid = 0xf,
41
},
42
+#ifdef TARGET_AARCH64
43
{
44
.name = TYPE_BCM2837,
45
+ .cpu_type = ARM_CPU_TYPE_NAME("cortex-a53"),
46
.clusterid = 0x0,
47
},
48
+#endif
49
};
50
51
static void bcm2836_init(Object *obj)
52
{
17
{
53
BCM283XState *s = BCM283X(obj);
18
- NeonGenWidenFn *widenfn[] = {
54
+ BCM283XClass *bc = BCM283X_GET_CLASS(obj);
19
+ static NeonGenWidenFn * const widenfn[] = {
55
+ const BCM283XInfo *info = bc->info;
20
gen_helper_neon_widen_s8,
56
+ int n;
21
gen_helper_neon_widen_s16,
57
+
22
tcg_gen_ext_i32_i64,
58
+ for (n = 0; n < BCM283X_NCPUS; n++) {
23
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
59
+ object_initialize(&s->cpus[n], sizeof(s->cpus[n]),
24
60
+ info->cpu_type);
25
static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
61
+ object_property_add_child(obj, "cpu[*]", OBJECT(&s->cpus[n]),
26
{
62
+ &error_abort);
27
- NeonGenWidenFn *widenfn[] = {
63
+ }
28
+ static NeonGenWidenFn * const widenfn[] = {
64
29
gen_helper_neon_widen_u8,
65
object_initialize(&s->control, sizeof(s->control), TYPE_BCM2836_CONTROL);
30
gen_helper_neon_widen_u16,
66
object_property_add_child(obj, "control", OBJECT(&s->control), NULL);
31
tcg_gen_extu_i32_i64,
67
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
68
69
/* common peripherals from bcm2835 */
70
71
- obj = OBJECT(dev);
72
- for (n = 0; n < BCM283X_NCPUS; n++) {
73
- object_initialize(&s->cpus[n], sizeof(s->cpus[n]),
74
- s->cpu_type);
75
- object_property_add_child(obj, "cpu[*]", OBJECT(&s->cpus[n]),
76
- &error_abort);
77
- }
78
-
79
obj = object_property_get_link(OBJECT(dev), "ram", &err);
80
if (obj == NULL) {
81
error_setg(errp, "%s: required ram link not found: %s",
82
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
83
}
84
85
static Property bcm2836_props[] = {
86
- DEFINE_PROP_STRING("cpu-type", BCM283XState, cpu_type),
87
DEFINE_PROP_UINT32("enabled-cpus", BCM283XState, enabled_cpus,
88
BCM283X_NCPUS),
89
DEFINE_PROP_END_OF_LIST()
90
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/arm/raspi.c
93
+++ b/hw/arm/raspi.c
94
@@ -XXX,XX +XXX,XX @@ static void raspi_init(MachineState *machine, int version)
95
/* Setup the SOC */
96
object_property_add_const_link(OBJECT(&s->soc), "ram", OBJECT(&s->ram),
97
&error_abort);
98
- object_property_set_str(OBJECT(&s->soc), machine->cpu_type, "cpu-type",
99
- &error_abort);
100
object_property_set_int(OBJECT(&s->soc), smp_cpus, "enabled-cpus",
101
&error_abort);
102
int board_rev = version == 3 ? 0xa02082 : 0xa21041;
103
--
32
--
104
2.16.2
33
2.20.1
105
34
106
35
diff view generated by jsdifflib
1
The BCM2837 sets the Aff1 field of the MPIDR affinity values for the
1
In commit 37bfce81b10450071 we accidentally introduced a leak of a TCG
2
CPUs to 0, whereas the BCM2836 uses 0xf. Set this correctly, as it
2
temporary in do_2shift_env_64(); free it.
3
is required for Linux to boot.
4
3
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Message-id: 20180313153458.26822-8-peter.maydell@linaro.org
9
---
6
---
10
hw/arm/bcm2836.c | 11 +++++++----
7
target/arm/translate-neon.inc.c | 1 +
11
1 file changed, 7 insertions(+), 4 deletions(-)
8
1 file changed, 1 insertion(+)
12
9
13
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
10
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/arm/bcm2836.c
12
--- a/target/arm/translate-neon.inc.c
16
+++ b/hw/arm/bcm2836.c
13
+++ b/target/arm/translate-neon.inc.c
17
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
18
15
neon_load_reg64(tmp, a->vm + pass);
19
struct BCM283XInfo {
16
fn(tmp, cpu_env, tmp, constimm);
20
const char *name;
17
neon_store_reg64(tmp, a->vd + pass);
21
+ int clusterid;
18
+ tcg_temp_free_i64(tmp);
22
};
19
}
23
20
tcg_temp_free_i64(constimm);
24
static const BCM283XInfo bcm283x_socs[] = {
21
return true;
25
{
26
.name = TYPE_BCM2836,
27
+ .clusterid = 0xf,
28
},
29
{
30
.name = TYPE_BCM2837,
31
+ .clusterid = 0x0,
32
},
33
};
34
35
@@ -XXX,XX +XXX,XX @@ static void bcm2836_init(Object *obj)
36
static void bcm2836_realize(DeviceState *dev, Error **errp)
37
{
38
BCM283XState *s = BCM283X(dev);
39
+ BCM283XClass *bc = BCM283X_GET_CLASS(dev);
40
+ const BCM283XInfo *info = bc->info;
41
Object *obj;
42
Error *err = NULL;
43
int n;
44
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
45
qdev_get_gpio_in_named(DEVICE(&s->control), "gpu-fiq", 0));
46
47
for (n = 0; n < BCM283X_NCPUS; n++) {
48
- /* Mirror bcm2836, which has clusterid set to 0xf
49
- * TODO: this should be converted to a property of ARM_CPU
50
- */
51
- s->cpus[n].mp_affinity = 0xF00 | n;
52
+ /* TODO: this should be converted to a property of ARM_CPU */
53
+ s->cpus[n].mp_affinity = (info->clusterid << 8) | n;
54
55
/* set periphbase/CBAR value for CPU-local registers */
56
object_property_set_int(OBJECT(&s->cpus[n]),
57
--
22
--
58
2.16.2
23
2.20.1
59
24
60
25
diff view generated by jsdifflib
New patch
1
1
Convert the VMLA, VMLS and VMUL insns in the Neon "2 registers and a
2
scalar" group to decodetree. These are 32x32->32 operations where
3
one of the inputs is the scalar, followed by a possible accumulate
4
operation of the 32-bit result.
5
6
The refactoring removes some of the oddities of the old decoder:
7
* operands to the operation and accumulation were often
8
reversed (taking advantage of the fact that most of these ops
9
are commutative); the new code follows the pseudocode order
10
* the Q bit in the insn was in a local variable 'u'; in the
11
new code it is decoded into a->q
12
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
target/arm/neon-dp.decode | 15 ++++
17
target/arm/translate-neon.inc.c | 133 ++++++++++++++++++++++++++++++++
18
target/arm/translate.c | 77 ++----------------
19
3 files changed, 154 insertions(+), 71 deletions(-)
20
21
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/neon-dp.decode
24
+++ b/target/arm/neon-dp.decode
25
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
26
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
27
28
VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
29
+
30
+ ##################################################################
31
+ # 2-regs-plus-scalar grouping:
32
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
33
+ ##################################################################
34
+ &2scalar vm vn vd size q
35
+
36
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
37
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
38
+
39
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
40
+
41
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
42
+
43
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
44
]
45
}
46
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-neon.inc.c
49
+++ b/target/arm/translate-neon.inc.c
50
@@ -XXX,XX +XXX,XX @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
51
16, 16, 0, fn_gvec);
52
return true;
53
}
54
+
55
+static void gen_neon_dup_low16(TCGv_i32 var)
56
+{
57
+ TCGv_i32 tmp = tcg_temp_new_i32();
58
+ tcg_gen_ext16u_i32(var, var);
59
+ tcg_gen_shli_i32(tmp, var, 16);
60
+ tcg_gen_or_i32(var, var, tmp);
61
+ tcg_temp_free_i32(tmp);
62
+}
63
+
64
+static void gen_neon_dup_high16(TCGv_i32 var)
65
+{
66
+ TCGv_i32 tmp = tcg_temp_new_i32();
67
+ tcg_gen_andi_i32(var, var, 0xffff0000);
68
+ tcg_gen_shri_i32(tmp, var, 16);
69
+ tcg_gen_or_i32(var, var, tmp);
70
+ tcg_temp_free_i32(tmp);
71
+}
72
+
73
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
74
+{
75
+ TCGv_i32 tmp;
76
+ if (size == 1) {
77
+ tmp = neon_load_reg(reg & 7, reg >> 4);
78
+ if (reg & 8) {
79
+ gen_neon_dup_high16(tmp);
80
+ } else {
81
+ gen_neon_dup_low16(tmp);
82
+ }
83
+ } else {
84
+ tmp = neon_load_reg(reg & 15, reg >> 4);
85
+ }
86
+ return tmp;
87
+}
88
+
89
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
90
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
91
+{
92
+ /*
93
+ * Two registers and a scalar: perform an operation between
94
+ * the input elements and the scalar, and then possibly
95
+ * perform an accumulation operation of that result into the
96
+ * destination.
97
+ */
98
+ TCGv_i32 scalar;
99
+ int pass;
100
+
101
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
102
+ return false;
103
+ }
104
+
105
+ /* UNDEF accesses to D16-D31 if they don't exist. */
106
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
107
+ ((a->vd | a->vn | a->vm) & 0x10)) {
108
+ return false;
109
+ }
110
+
111
+ if (!opfn) {
112
+ /* Bad size (including size == 3, which is a different insn group) */
113
+ return false;
114
+ }
115
+
116
+ if (a->q && ((a->vd | a->vn) & 1)) {
117
+ return false;
118
+ }
119
+
120
+ if (!vfp_access_check(s)) {
121
+ return true;
122
+ }
123
+
124
+ scalar = neon_get_scalar(a->size, a->vm);
125
+
126
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
127
+ TCGv_i32 tmp = neon_load_reg(a->vn, pass);
128
+ opfn(tmp, tmp, scalar);
129
+ if (accfn) {
130
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
131
+ accfn(tmp, rd, tmp);
132
+ tcg_temp_free_i32(rd);
133
+ }
134
+ neon_store_reg(a->vd, pass, tmp);
135
+ }
136
+ tcg_temp_free_i32(scalar);
137
+ return true;
138
+}
139
+
140
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
141
+{
142
+ static NeonGenTwoOpFn * const opfn[] = {
143
+ NULL,
144
+ gen_helper_neon_mul_u16,
145
+ tcg_gen_mul_i32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar(s, a, opfn[a->size], NULL);
150
+}
151
+
152
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
153
+{
154
+ static NeonGenTwoOpFn * const opfn[] = {
155
+ NULL,
156
+ gen_helper_neon_mul_u16,
157
+ tcg_gen_mul_i32,
158
+ NULL,
159
+ };
160
+ static NeonGenTwoOpFn * const accfn[] = {
161
+ NULL,
162
+ gen_helper_neon_add_u16,
163
+ tcg_gen_add_i32,
164
+ NULL,
165
+ };
166
+
167
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
168
+}
169
+
170
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
171
+{
172
+ static NeonGenTwoOpFn * const opfn[] = {
173
+ NULL,
174
+ gen_helper_neon_mul_u16,
175
+ tcg_gen_mul_i32,
176
+ NULL,
177
+ };
178
+ static NeonGenTwoOpFn * const accfn[] = {
179
+ NULL,
180
+ gen_helper_neon_sub_u16,
181
+ tcg_gen_sub_i32,
182
+ NULL,
183
+ };
184
+
185
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
186
+}
187
diff --git a/target/arm/translate.c b/target/arm/translate.c
188
index XXXXXXX..XXXXXXX 100644
189
--- a/target/arm/translate.c
190
+++ b/target/arm/translate.c
191
@@ -XXX,XX +XXX,XX @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
192
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
193
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
194
195
-static void gen_neon_dup_low16(TCGv_i32 var)
196
-{
197
- TCGv_i32 tmp = tcg_temp_new_i32();
198
- tcg_gen_ext16u_i32(var, var);
199
- tcg_gen_shli_i32(tmp, var, 16);
200
- tcg_gen_or_i32(var, var, tmp);
201
- tcg_temp_free_i32(tmp);
202
-}
203
-
204
-static void gen_neon_dup_high16(TCGv_i32 var)
205
-{
206
- TCGv_i32 tmp = tcg_temp_new_i32();
207
- tcg_gen_andi_i32(var, var, 0xffff0000);
208
- tcg_gen_shri_i32(tmp, var, 16);
209
- tcg_gen_or_i32(var, var, tmp);
210
- tcg_temp_free_i32(tmp);
211
-}
212
-
213
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
214
{
215
#ifndef CONFIG_USER_ONLY
216
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
217
218
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
219
220
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
221
-{
222
- switch (size) {
223
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
224
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
225
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
226
- default: abort();
227
- }
228
-}
229
-
230
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
231
-{
232
- switch (size) {
233
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
234
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
235
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
236
- default: return;
237
- }
238
-}
239
-
240
static TCGv_i32 neon_load_scratch(int scratch)
241
{
242
TCGv_i32 tmp = tcg_temp_new_i32();
243
@@ -XXX,XX +XXX,XX @@ static void neon_store_scratch(int scratch, TCGv_i32 var)
244
tcg_temp_free_i32(var);
245
}
246
247
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
248
-{
249
- TCGv_i32 tmp;
250
- if (size == 1) {
251
- tmp = neon_load_reg(reg & 7, reg >> 4);
252
- if (reg & 8) {
253
- gen_neon_dup_high16(tmp);
254
- } else {
255
- gen_neon_dup_low16(tmp);
256
- }
257
- } else {
258
- tmp = neon_load_reg(reg & 15, reg >> 4);
259
- }
260
- return tmp;
261
-}
262
-
263
static int gen_neon_unzip(int rd, int rm, int size, int q)
264
{
265
TCGv_ptr pd, pm;
266
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
267
return 1;
268
}
269
switch (op) {
270
+ case 0: /* Integer VMLA scalar */
271
+ case 4: /* Integer VMLS scalar */
272
+ case 8: /* Integer VMUL scalar */
273
+ return 1; /* handled by decodetree */
274
+
275
case 1: /* Float VMLA scalar */
276
case 5: /* Floating point VMLS scalar */
277
case 9: /* Floating point VMUL scalar */
278
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
279
return 1;
280
}
281
/* fall through */
282
- case 0: /* Integer VMLA scalar */
283
- case 4: /* Integer VMLS scalar */
284
- case 8: /* Integer VMUL scalar */
285
case 12: /* VQDMULH scalar */
286
case 13: /* VQRDMULH scalar */
287
if (u && ((rd | rn) & 1)) {
288
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
289
} else {
290
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
291
}
292
- } else if (op & 1) {
293
+ } else {
294
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
295
gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
296
tcg_temp_free_ptr(fpstatus);
297
- } else {
298
- switch (size) {
299
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
300
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
301
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
302
- default: abort();
303
- }
304
}
305
tcg_temp_free_i32(tmp2);
306
if (op < 8) {
307
/* Accumulate. */
308
tmp2 = neon_load_reg(rd, pass);
309
switch (op) {
310
- case 0:
311
- gen_neon_add(size, tmp, tmp2);
312
- break;
313
case 1:
314
{
315
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
316
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
317
tcg_temp_free_ptr(fpstatus);
318
break;
319
}
320
- case 4:
321
- gen_neon_rsb(size, tmp, tmp2);
322
- break;
323
case 5:
324
{
325
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
326
--
327
2.20.1
328
329
diff view generated by jsdifflib
1
The raspi3 has AArch64 CPUs, which means that our smpboot
1
Convert the float versions of VMLA, VMLS and VMUL in the Neon
2
code for keeping the secondary CPUs in a pen needs to have
2
2-reg-scalar group to decodetree.
3
a version for A64 as well as A32. Without this, the
4
secondary CPUs go into an infinite loop of taking undefined
5
instruction exceptions.
6
3
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Message-id: 20180313153458.26822-10-peter.maydell@linaro.org
10
---
5
---
11
hw/arm/raspi.c | 41 ++++++++++++++++++++++++++++++++++++++++-
6
As noted in the comment on the WRAP_FP_FN macro, we could have
12
1 file changed, 40 insertions(+), 1 deletion(-)
7
had a do_2scalar_fp() function, but for 3 insns it seemed
8
simpler to just do the wrapping to get hold of the fpstatus ptr.
9
(These are the only fp insns in the group.)
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
target/arm/neon-dp.decode | 3 ++
13
target/arm/translate-neon.inc.c | 65 +++++++++++++++++++++++++++++++++
14
target/arm/translate.c | 37 ++-----------------
15
3 files changed, 71 insertions(+), 34 deletions(-)
13
16
14
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/arm/raspi.c
19
--- a/target/arm/neon-dp.decode
17
+++ b/hw/arm/raspi.c
20
+++ b/target/arm/neon-dp.decode
18
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
19
#define BOARDSETUP_ADDR (MVBAR_ADDR + 0x20) /* board setup code */
22
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
20
#define FIRMWARE_ADDR_2 0x8000 /* Pi 2 loads kernel.img here by default */
23
21
#define FIRMWARE_ADDR_3 0x80000 /* Pi 3 loads kernel.img here by default */
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
22
+#define SPINTABLE_ADDR 0xd8 /* Pi 3 bootloader spintable */
25
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
23
26
24
/* Table of Linux board IDs for different Pi versions */
27
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
25
static const int raspi_boardid[] = {[1] = 0xc42, [2] = 0xc43, [3] = 0xc44};
28
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
26
@@ -XXX,XX +XXX,XX @@ static void write_smpboot(ARMCPU *cpu, const struct arm_boot_info *info)
29
27
info->smp_loader_start);
30
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
31
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
32
]
28
}
33
}
29
34
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
30
+static void write_smpboot64(ARMCPU *cpu, const struct arm_boot_info *info)
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-neon.inc.c
37
+++ b/target/arm/translate-neon.inc.c
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
39
40
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
41
}
42
+
43
+/*
44
+ * Rather than have a float-specific version of do_2scalar just for
45
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
46
+ * a NeonGenTwoOpFn.
47
+ */
48
+#define WRAP_FP_FN(WRAPNAME, FUNC) \
49
+ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
50
+ { \
51
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
52
+ FUNC(rd, rn, rm, fpstatus); \
53
+ tcg_temp_free_ptr(fpstatus); \
54
+ }
55
+
56
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
57
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
58
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
59
+
60
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
31
+{
61
+{
32
+ /* Unlike the AArch32 version we don't need to call the board setup hook.
62
+ static NeonGenTwoOpFn * const opfn[] = {
33
+ * The mechanism for doing the spin-table is also entirely different.
63
+ NULL,
34
+ * We must have four 64-bit fields at absolute addresses
64
+ NULL, /* TODO: fp16 support */
35
+ * 0xd8, 0xe0, 0xe8, 0xf0 in RAM, which are the flag variables for
65
+ gen_VMUL_F_mul,
36
+ * our CPUs, and which we must ensure are zero initialized before
66
+ NULL,
37
+ * the primary CPU goes into the kernel. We put these variables inside
38
+ * a rom blob, so that the reset for ROM contents zeroes them for us.
39
+ */
40
+ static const uint32_t smpboot[] = {
41
+ 0xd2801b05, /* mov x5, 0xd8 */
42
+ 0xd53800a6, /* mrs x6, mpidr_el1 */
43
+ 0x924004c6, /* and x6, x6, #0x3 */
44
+ 0xd503205f, /* spin: wfe */
45
+ 0xf86678a4, /* ldr x4, [x5,x6,lsl #3] */
46
+ 0xb4ffffc4, /* cbz x4, spin */
47
+ 0xd2800000, /* mov x0, #0x0 */
48
+ 0xd2800001, /* mov x1, #0x0 */
49
+ 0xd2800002, /* mov x2, #0x0 */
50
+ 0xd2800003, /* mov x3, #0x0 */
51
+ 0xd61f0080, /* br x4 */
52
+ };
67
+ };
53
+
68
+
54
+ static const uint64_t spintables[] = {
69
+ return do_2scalar(s, a, opfn[a->size], NULL);
55
+ 0, 0, 0, 0
70
+}
71
+
72
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
73
+{
74
+ static NeonGenTwoOpFn * const opfn[] = {
75
+ NULL,
76
+ NULL, /* TODO: fp16 support */
77
+ gen_VMUL_F_mul,
78
+ NULL,
79
+ };
80
+ static NeonGenTwoOpFn * const accfn[] = {
81
+ NULL,
82
+ NULL, /* TODO: fp16 support */
83
+ gen_VMUL_F_add,
84
+ NULL,
56
+ };
85
+ };
57
+
86
+
58
+ rom_add_blob_fixed("raspi_smpboot", smpboot, sizeof(smpboot),
87
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
59
+ info->smp_loader_start);
60
+ rom_add_blob_fixed("raspi_spintables", spintables, sizeof(spintables),
61
+ SPINTABLE_ADDR);
62
+}
88
+}
63
+
89
+
64
static void write_board_setup(ARMCPU *cpu, const struct arm_boot_info *info)
90
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
65
{
91
+{
66
arm_write_secure_board_setup_dummy_smc(cpu, info, MVBAR_ADDR);
92
+ static NeonGenTwoOpFn * const opfn[] = {
67
@@ -XXX,XX +XXX,XX @@ static void setup_boot(MachineState *machine, int version, size_t ram_size)
93
+ NULL,
68
/* Pi2 and Pi3 requires SMP setup */
94
+ NULL, /* TODO: fp16 support */
69
if (version >= 2) {
95
+ gen_VMUL_F_mul,
70
binfo.smp_loader_start = SMPBOOT_ADDR;
96
+ NULL,
71
- binfo.write_secondary_boot = write_smpboot;
97
+ };
72
+ if (version == 2) {
98
+ static NeonGenTwoOpFn * const accfn[] = {
73
+ binfo.write_secondary_boot = write_smpboot;
99
+ NULL,
74
+ } else {
100
+ NULL, /* TODO: fp16 support */
75
+ binfo.write_secondary_boot = write_smpboot64;
101
+ gen_VMUL_F_sub,
76
+ }
102
+ NULL,
77
binfo.secondary_cpu_reset_hook = reset_secondary;
103
+ };
78
}
104
+
79
105
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 0: /* Integer VMLA scalar */
113
case 4: /* Integer VMLS scalar */
114
case 8: /* Integer VMUL scalar */
115
- return 1; /* handled by decodetree */
116
-
117
case 1: /* Float VMLA scalar */
118
case 5: /* Floating point VMLS scalar */
119
case 9: /* Floating point VMUL scalar */
120
- if (size == 1) {
121
- return 1;
122
- }
123
- /* fall through */
124
+ return 1; /* handled by decodetree */
125
+
126
case 12: /* VQDMULH scalar */
127
case 13: /* VQRDMULH scalar */
128
if (u && ((rd | rn) & 1)) {
129
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
130
} else {
131
gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
132
}
133
- } else if (op == 13) {
134
+ } else {
135
if (size == 1) {
136
gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
137
} else {
138
gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
139
}
140
- } else {
141
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
142
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
143
- tcg_temp_free_ptr(fpstatus);
144
}
145
tcg_temp_free_i32(tmp2);
146
- if (op < 8) {
147
- /* Accumulate. */
148
- tmp2 = neon_load_reg(rd, pass);
149
- switch (op) {
150
- case 1:
151
- {
152
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
153
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
154
- tcg_temp_free_ptr(fpstatus);
155
- break;
156
- }
157
- case 5:
158
- {
159
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
160
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
161
- tcg_temp_free_ptr(fpstatus);
162
- break;
163
- }
164
- default:
165
- abort();
166
- }
167
- tcg_temp_free_i32(tmp2);
168
- }
169
neon_store_reg(rd, pass, tmp);
170
}
171
break;
80
--
172
--
81
2.16.2
173
2.20.1
82
174
83
175
diff view generated by jsdifflib
1
Our BCM2836 type is really a generic one that can be any of
1
Convert the VQDMULH and VQRDMULH insns in the 2-reg-scalar group
2
the bcm283x family. Rename it accordingly. We change only
2
to decodetree.
3
the names which are visible via the header file to the
4
rest of the QEMU code, leaving private function names
5
in bcm2836.c as they are.
6
7
This is a preliminary to making bcm283x be an abstract
8
parent class to specific types for the bcm2836 and bcm2837.
9
3
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Message-id: 20180313153458.26822-6-peter.maydell@linaro.org
14
---
6
---
15
include/hw/arm/bcm2836.h | 12 ++++++------
7
target/arm/neon-dp.decode | 3 +++
16
hw/arm/bcm2836.c | 17 +++++++++--------
8
target/arm/translate-neon.inc.c | 29 +++++++++++++++++++++++
17
hw/arm/raspi.c | 16 ++++++++--------
9
target/arm/translate.c | 42 ++-------------------------------
18
3 files changed, 23 insertions(+), 22 deletions(-)
10
3 files changed, 34 insertions(+), 40 deletions(-)
19
11
20
diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
21
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/arm/bcm2836.h
14
--- a/target/arm/neon-dp.decode
23
+++ b/include/hw/arm/bcm2836.h
15
+++ b/target/arm/neon-dp.decode
24
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
25
#include "hw/arm/bcm2835_peripherals.h"
17
26
#include "hw/intc/bcm2836_control.h"
18
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
27
19
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
28
-#define TYPE_BCM2836 "bcm2836"
20
+
29
-#define BCM2836(obj) OBJECT_CHECK(BCM2836State, (obj), TYPE_BCM2836)
21
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
30
+#define TYPE_BCM283X "bcm283x"
22
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
31
+#define BCM283X(obj) OBJECT_CHECK(BCM283XState, (obj), TYPE_BCM283X)
23
]
32
24
}
33
-#define BCM2836_NCPUS 4
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
34
+#define BCM283X_NCPUS 4
35
36
-typedef struct BCM2836State {
37
+typedef struct BCM283XState {
38
/*< private >*/
39
DeviceState parent_obj;
40
/*< public >*/
41
@@ -XXX,XX +XXX,XX @@ typedef struct BCM2836State {
42
char *cpu_type;
43
uint32_t enabled_cpus;
44
45
- ARMCPU cpus[BCM2836_NCPUS];
46
+ ARMCPU cpus[BCM283X_NCPUS];
47
BCM2836ControlState control;
48
BCM2835PeripheralState peripherals;
49
-} BCM2836State;
50
+} BCM283XState;
51
52
#endif /* BCM2836_H */
53
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
54
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
55
--- a/hw/arm/bcm2836.c
27
--- a/target/arm/translate-neon.inc.c
56
+++ b/hw/arm/bcm2836.c
28
+++ b/target/arm/translate-neon.inc.c
57
@@ -XXX,XX +XXX,XX @@
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
58
30
59
static void bcm2836_init(Object *obj)
31
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
32
}
33
+
34
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
35
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
36
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
37
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
38
+
39
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
40
+{
41
+ static NeonGenTwoOpFn * const opfn[] = {
42
+ NULL,
43
+ gen_VQDMULH_16,
44
+ gen_VQDMULH_32,
45
+ NULL,
46
+ };
47
+
48
+ return do_2scalar(s, a, opfn[a->size], NULL);
49
+}
50
+
51
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
52
+{
53
+ static NeonGenTwoOpFn * const opfn[] = {
54
+ NULL,
55
+ gen_VQRDMULH_16,
56
+ gen_VQRDMULH_32,
57
+ NULL,
58
+ };
59
+
60
+ return do_2scalar(s, a, opfn[a->size], NULL);
61
+}
62
diff --git a/target/arm/translate.c b/target/arm/translate.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/translate.c
65
+++ b/target/arm/translate.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
67
68
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
69
70
-static TCGv_i32 neon_load_scratch(int scratch)
71
-{
72
- TCGv_i32 tmp = tcg_temp_new_i32();
73
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
74
- return tmp;
75
-}
76
-
77
-static void neon_store_scratch(int scratch, TCGv_i32 var)
78
-{
79
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
80
- tcg_temp_free_i32(var);
81
-}
82
-
83
static int gen_neon_unzip(int rd, int rm, int size, int q)
60
{
84
{
61
- BCM2836State *s = BCM2836(obj);
85
TCGv_ptr pd, pm;
62
+ BCM283XState *s = BCM283X(obj);
86
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
63
87
case 1: /* Float VMLA scalar */
64
object_initialize(&s->control, sizeof(s->control), TYPE_BCM2836_CONTROL);
88
case 5: /* Floating point VMLS scalar */
65
object_property_add_child(obj, "control", OBJECT(&s->control), NULL);
89
case 9: /* Floating point VMUL scalar */
66
@@ -XXX,XX +XXX,XX @@ static void bcm2836_init(Object *obj)
90
- return 1; /* handled by decodetree */
67
91
-
68
static void bcm2836_realize(DeviceState *dev, Error **errp)
92
case 12: /* VQDMULH scalar */
69
{
93
case 13: /* VQRDMULH scalar */
70
- BCM2836State *s = BCM2836(dev);
94
- if (u && ((rd | rn) & 1)) {
71
+ BCM283XState *s = BCM283X(dev);
95
- return 1;
72
Object *obj;
96
- }
73
Error *err = NULL;
97
- tmp = neon_get_scalar(size, rm);
74
int n;
98
- neon_store_scratch(0, tmp);
75
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
99
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
76
/* common peripherals from bcm2835 */
100
- tmp = neon_load_scratch(0);
77
101
- tmp2 = neon_load_reg(rn, pass);
78
obj = OBJECT(dev);
102
- if (op == 12) {
79
- for (n = 0; n < BCM2836_NCPUS; n++) {
103
- if (size == 1) {
80
+ for (n = 0; n < BCM283X_NCPUS; n++) {
104
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
81
object_initialize(&s->cpus[n], sizeof(s->cpus[n]),
105
- } else {
82
s->cpu_type);
106
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
83
object_property_add_child(obj, "cpu[*]", OBJECT(&s->cpus[n]),
107
- }
84
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
108
- } else {
85
sysbus_connect_irq(SYS_BUS_DEVICE(&s->peripherals), 1,
109
- if (size == 1) {
86
qdev_get_gpio_in_named(DEVICE(&s->control), "gpu-fiq", 0));
110
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
87
111
- } else {
88
- for (n = 0; n < BCM2836_NCPUS; n++) {
112
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
89
+ for (n = 0; n < BCM283X_NCPUS; n++) {
113
- }
90
/* Mirror bcm2836, which has clusterid set to 0xf
114
- }
91
* TODO: this should be converted to a property of ARM_CPU
115
- tcg_temp_free_i32(tmp2);
92
*/
116
- neon_store_reg(rd, pass, tmp);
93
@@ -XXX,XX +XXX,XX @@ static void bcm2836_realize(DeviceState *dev, Error **errp)
117
- }
94
}
118
- break;
95
119
+ return 1; /* handled by decodetree */
96
static Property bcm2836_props[] = {
120
+
97
- DEFINE_PROP_STRING("cpu-type", BCM2836State, cpu_type),
121
case 3: /* VQDMLAL scalar */
98
- DEFINE_PROP_UINT32("enabled-cpus", BCM2836State, enabled_cpus, BCM2836_NCPUS),
122
case 7: /* VQDMLSL scalar */
99
+ DEFINE_PROP_STRING("cpu-type", BCM283XState, cpu_type),
123
case 11: /* VQDMULL scalar */
100
+ DEFINE_PROP_UINT32("enabled-cpus", BCM283XState, enabled_cpus,
101
+ BCM283X_NCPUS),
102
DEFINE_PROP_END_OF_LIST()
103
};
104
105
@@ -XXX,XX +XXX,XX @@ static void bcm2836_class_init(ObjectClass *oc, void *data)
106
}
107
108
static const TypeInfo bcm2836_type_info = {
109
- .name = TYPE_BCM2836,
110
+ .name = TYPE_BCM283X,
111
.parent = TYPE_DEVICE,
112
- .instance_size = sizeof(BCM2836State),
113
+ .instance_size = sizeof(BCM283XState),
114
.instance_init = bcm2836_init,
115
.class_init = bcm2836_class_init,
116
};
117
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/hw/arm/raspi.c
120
+++ b/hw/arm/raspi.c
121
@@ -XXX,XX +XXX,XX @@
122
static const int raspi_boardid[] = {[1] = 0xc42, [2] = 0xc43, [3] = 0xc44};
123
124
typedef struct RasPiState {
125
- BCM2836State soc;
126
+ BCM283XState soc;
127
MemoryRegion ram;
128
} RasPiState;
129
130
@@ -XXX,XX +XXX,XX @@ static void raspi_init(MachineState *machine, int version)
131
BusState *bus;
132
DeviceState *carddev;
133
134
- object_initialize(&s->soc, sizeof(s->soc), TYPE_BCM2836);
135
+ object_initialize(&s->soc, sizeof(s->soc), TYPE_BCM283X);
136
object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
137
&error_abort);
138
139
@@ -XXX,XX +XXX,XX @@ static void raspi2_machine_init(MachineClass *mc)
140
mc->no_floppy = 1;
141
mc->no_cdrom = 1;
142
mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
143
- mc->max_cpus = BCM2836_NCPUS;
144
- mc->min_cpus = BCM2836_NCPUS;
145
- mc->default_cpus = BCM2836_NCPUS;
146
+ mc->max_cpus = BCM283X_NCPUS;
147
+ mc->min_cpus = BCM283X_NCPUS;
148
+ mc->default_cpus = BCM283X_NCPUS;
149
mc->default_ram_size = 1024 * 1024 * 1024;
150
mc->ignore_memory_transaction_failures = true;
151
};
152
@@ -XXX,XX +XXX,XX @@ static void raspi3_machine_init(MachineClass *mc)
153
mc->no_floppy = 1;
154
mc->no_cdrom = 1;
155
mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a53");
156
- mc->max_cpus = BCM2836_NCPUS;
157
- mc->min_cpus = BCM2836_NCPUS;
158
- mc->default_cpus = BCM2836_NCPUS;
159
+ mc->max_cpus = BCM283X_NCPUS;
160
+ mc->min_cpus = BCM283X_NCPUS;
161
+ mc->default_cpus = BCM283X_NCPUS;
162
mc->default_ram_size = 1024 * 1024 * 1024;
163
}
164
DEFINE_MACHINE("raspi3", raspi3_machine_init)
165
--
124
--
166
2.16.2
125
2.20.1
167
126
168
127
diff view generated by jsdifflib
1
The TypeInfo and state struct for bcm2386 disagree about what the
1
Convert the VQRDMLAH and VQRDMLSH insns in the 2-reg-scalar
2
parent class is -- the TypeInfo says it's TYPE_SYS_BUS_DEVICE,
2
group to decodetree.
3
but the BCM2386State struct only defines the parent_obj field
4
as DeviceState. This would have caused problems if anything
5
actually tried to treat the object as a TYPE_SYS_BUS_DEVICE.
6
Fix the TypeInfo to use TYPE_DEVICE as the parent, since we don't
7
need any of the additional functionality TYPE_SYS_BUS_DEVICE
8
provides.
9
3
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Message-id: 20180313153458.26822-5-peter.maydell@linaro.org
14
---
6
---
15
hw/arm/bcm2836.c | 2 +-
7
target/arm/neon-dp.decode | 3 ++
16
1 file changed, 1 insertion(+), 1 deletion(-)
8
target/arm/translate-neon.inc.c | 74 +++++++++++++++++++++++++++++++++
9
target/arm/translate.c | 38 +----------------
10
3 files changed, 79 insertions(+), 36 deletions(-)
17
11
18
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/arm/bcm2836.c
14
--- a/target/arm/neon-dp.decode
21
+++ b/hw/arm/bcm2836.c
15
+++ b/target/arm/neon-dp.decode
22
@@ -XXX,XX +XXX,XX @@ static void bcm2836_class_init(ObjectClass *oc, void *data)
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
23
17
24
static const TypeInfo bcm2836_type_info = {
18
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
25
.name = TYPE_BCM2836,
19
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
26
- .parent = TYPE_SYS_BUS_DEVICE,
20
+
27
+ .parent = TYPE_DEVICE,
21
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
28
.instance_size = sizeof(BCM2836State),
22
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
29
.instance_init = bcm2836_init,
23
]
30
.class_init = bcm2836_class_init,
24
}
25
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/translate-neon.inc.c
28
+++ b/target/arm/translate-neon.inc.c
29
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
30
31
return do_2scalar(s, a, opfn[a->size], NULL);
32
}
33
+
34
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
35
+ NeonGenThreeOpEnvFn *opfn)
36
+{
37
+ /*
38
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
39
+ * performs a kind of fused op-then-accumulate using a helper
40
+ * function that takes all of rd, rn and the scalar at once.
41
+ */
42
+ TCGv_i32 scalar;
43
+ int pass;
44
+
45
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
46
+ return false;
47
+ }
48
+
49
+ if (!dc_isar_feature(aa32_rdm, s)) {
50
+ return false;
51
+ }
52
+
53
+ /* UNDEF accesses to D16-D31 if they don't exist. */
54
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
55
+ ((a->vd | a->vn | a->vm) & 0x10)) {
56
+ return false;
57
+ }
58
+
59
+ if (!opfn) {
60
+ /* Bad size (including size == 3, which is a different insn group) */
61
+ return false;
62
+ }
63
+
64
+ if (a->q && ((a->vd | a->vn) & 1)) {
65
+ return false;
66
+ }
67
+
68
+ if (!vfp_access_check(s)) {
69
+ return true;
70
+ }
71
+
72
+ scalar = neon_get_scalar(a->size, a->vm);
73
+
74
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
75
+ TCGv_i32 rn = neon_load_reg(a->vn, pass);
76
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
77
+ opfn(rd, cpu_env, rn, scalar, rd);
78
+ tcg_temp_free_i32(rn);
79
+ neon_store_reg(a->vd, pass, rd);
80
+ }
81
+ tcg_temp_free_i32(scalar);
82
+
83
+ return true;
84
+}
85
+
86
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
87
+{
88
+ static NeonGenThreeOpEnvFn *opfn[] = {
89
+ NULL,
90
+ gen_helper_neon_qrdmlah_s16,
91
+ gen_helper_neon_qrdmlah_s32,
92
+ NULL,
93
+ };
94
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
95
+}
96
+
97
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
98
+{
99
+ static NeonGenThreeOpEnvFn *opfn[] = {
100
+ NULL,
101
+ gen_helper_neon_qrdmlsh_s16,
102
+ gen_helper_neon_qrdmlsh_s32,
103
+ NULL,
104
+ };
105
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
106
+}
107
diff --git a/target/arm/translate.c b/target/arm/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/arm/translate.c
110
+++ b/target/arm/translate.c
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
case 9: /* Floating point VMUL scalar */
113
case 12: /* VQDMULH scalar */
114
case 13: /* VQRDMULH scalar */
115
+ case 14: /* VQRDMLAH scalar */
116
+ case 15: /* VQRDMLSH scalar */
117
return 1; /* handled by decodetree */
118
119
case 3: /* VQDMLAL scalar */
120
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
121
neon_store_reg64(cpu_V0, rd + pass);
122
}
123
break;
124
- case 14: /* VQRDMLAH scalar */
125
- case 15: /* VQRDMLSH scalar */
126
- {
127
- NeonGenThreeOpEnvFn *fn;
128
-
129
- if (!dc_isar_feature(aa32_rdm, s)) {
130
- return 1;
131
- }
132
- if (u && ((rd | rn) & 1)) {
133
- return 1;
134
- }
135
- if (op == 14) {
136
- if (size == 1) {
137
- fn = gen_helper_neon_qrdmlah_s16;
138
- } else {
139
- fn = gen_helper_neon_qrdmlah_s32;
140
- }
141
- } else {
142
- if (size == 1) {
143
- fn = gen_helper_neon_qrdmlsh_s16;
144
- } else {
145
- fn = gen_helper_neon_qrdmlsh_s32;
146
- }
147
- }
148
-
149
- tmp2 = neon_get_scalar(size, rm);
150
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
151
- tmp = neon_load_reg(rn, pass);
152
- tmp3 = neon_load_reg(rd, pass);
153
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
154
- tcg_temp_free_i32(tmp3);
155
- neon_store_reg(rd, pass, tmp);
156
- }
157
- tcg_temp_free_i32(tmp2);
158
- }
159
- break;
160
default:
161
g_assert_not_reached();
162
}
31
--
163
--
32
2.16.2
164
2.20.1
33
165
34
166
diff view generated by jsdifflib
1
If we're directly booting a Linux kernel and the CPU supports both
1
Convert the Neon 2-reg-scalar long multiplies to decodetree.
2
EL3 and EL2, we start the kernel in EL2, as it expects. We must also
2
These are the last instructions in the group.
3
set the SCR_EL3.HCE bit in this situation, so that the HVC
4
instruction is enabled rather than UNDEFing. Otherwise at least some
5
kernels will panic when trying to initialize KVM in the guest.
6
3
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Message-id: 20180313153458.26822-4-peter.maydell@linaro.org
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
hw/arm/boot.c | 5 +++++
7
target/arm/neon-dp.decode | 18 ++++
11
1 file changed, 5 insertions(+)
8
target/arm/translate-neon.inc.c | 163 ++++++++++++++++++++++++++++
9
target/arm/translate.c | 182 ++------------------------------
10
3 files changed, 187 insertions(+), 176 deletions(-)
12
11
13
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
12
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/arm/boot.c
14
--- a/target/arm/neon-dp.decode
16
+++ b/hw/arm/boot.c
15
+++ b/target/arm/neon-dp.decode
17
@@ -XXX,XX +XXX,XX @@ static void do_cpu_reset(void *opaque)
16
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
18
assert(!info->secure_board_setup);
17
19
}
18
@2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
20
19
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
21
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
20
+ # For the 'long' ops the Q bit is part of insn decode
22
+ /* If we have EL2 then Linux expects the HVC insn to work */
21
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
23
+ env->cp15.scr_el3 |= SCR_HCE;
22
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
24
+ }
23
25
+
24
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
26
/* Set to non-secure if not a secure boot */
25
VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
27
if (!info->secure_boot &&
26
28
(cs != first_cpu || !info->secure_board_setup)) {
27
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
28
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
29
+
30
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
31
+
32
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
33
VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
34
35
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
36
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
37
+
38
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
39
+
40
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
41
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
42
43
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
44
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
45
+
46
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
47
+
48
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
49
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
50
51
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/translate-neon.inc.c
54
+++ b/target/arm/translate-neon.inc.c
55
@@ -XXX,XX +XXX,XX @@ static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
56
};
57
return do_vqrdmlah_2sc(s, a, opfn[a->size]);
58
}
59
+
60
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
61
+ NeonGenTwoOpWidenFn *opfn,
62
+ NeonGenTwo64OpFn *accfn)
63
+{
64
+ /*
65
+ * Two registers and a scalar, long operations: perform an
66
+ * operation on the input elements and the scalar which produces
67
+ * a double-width result, and then possibly perform an accumulation
68
+ * operation of that result into the destination.
69
+ */
70
+ TCGv_i32 scalar, rn;
71
+ TCGv_i64 rn0_64, rn1_64;
72
+
73
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
74
+ return false;
75
+ }
76
+
77
+ /* UNDEF accesses to D16-D31 if they don't exist. */
78
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
79
+ ((a->vd | a->vn | a->vm) & 0x10)) {
80
+ return false;
81
+ }
82
+
83
+ if (!opfn) {
84
+ /* Bad size (including size == 3, which is a different insn group) */
85
+ return false;
86
+ }
87
+
88
+ if (a->vd & 1) {
89
+ return false;
90
+ }
91
+
92
+ if (!vfp_access_check(s)) {
93
+ return true;
94
+ }
95
+
96
+ scalar = neon_get_scalar(a->size, a->vm);
97
+
98
+ /* Load all inputs before writing any outputs, in case of overlap */
99
+ rn = neon_load_reg(a->vn, 0);
100
+ rn0_64 = tcg_temp_new_i64();
101
+ opfn(rn0_64, rn, scalar);
102
+ tcg_temp_free_i32(rn);
103
+
104
+ rn = neon_load_reg(a->vn, 1);
105
+ rn1_64 = tcg_temp_new_i64();
106
+ opfn(rn1_64, rn, scalar);
107
+ tcg_temp_free_i32(rn);
108
+ tcg_temp_free_i32(scalar);
109
+
110
+ if (accfn) {
111
+ TCGv_i64 t64 = tcg_temp_new_i64();
112
+ neon_load_reg64(t64, a->vd);
113
+ accfn(t64, t64, rn0_64);
114
+ neon_store_reg64(t64, a->vd);
115
+ neon_load_reg64(t64, a->vd + 1);
116
+ accfn(t64, t64, rn1_64);
117
+ neon_store_reg64(t64, a->vd + 1);
118
+ tcg_temp_free_i64(t64);
119
+ } else {
120
+ neon_store_reg64(rn0_64, a->vd);
121
+ neon_store_reg64(rn1_64, a->vd + 1);
122
+ }
123
+ tcg_temp_free_i64(rn0_64);
124
+ tcg_temp_free_i64(rn1_64);
125
+ return true;
126
+}
127
+
128
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
129
+{
130
+ static NeonGenTwoOpWidenFn * const opfn[] = {
131
+ NULL,
132
+ gen_helper_neon_mull_s16,
133
+ gen_mull_s32,
134
+ NULL,
135
+ };
136
+
137
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
138
+}
139
+
140
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
141
+{
142
+ static NeonGenTwoOpWidenFn * const opfn[] = {
143
+ NULL,
144
+ gen_helper_neon_mull_u16,
145
+ gen_mull_u32,
146
+ NULL,
147
+ };
148
+
149
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
150
+}
151
+
152
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
153
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
154
+ { \
155
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
156
+ NULL, \
157
+ gen_helper_neon_##MULL##16, \
158
+ gen_##MULL##32, \
159
+ NULL, \
160
+ }; \
161
+ static NeonGenTwo64OpFn * const accfn[] = { \
162
+ NULL, \
163
+ gen_helper_neon_##ACC##l_u32, \
164
+ tcg_gen_##ACC##_i64, \
165
+ NULL, \
166
+ }; \
167
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
168
+ }
169
+
170
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
171
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
172
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
173
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
174
+
175
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
176
+{
177
+ static NeonGenTwoOpWidenFn * const opfn[] = {
178
+ NULL,
179
+ gen_VQDMULL_16,
180
+ gen_VQDMULL_32,
181
+ NULL,
182
+ };
183
+
184
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
185
+}
186
+
187
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
188
+{
189
+ static NeonGenTwoOpWidenFn * const opfn[] = {
190
+ NULL,
191
+ gen_VQDMULL_16,
192
+ gen_VQDMULL_32,
193
+ NULL,
194
+ };
195
+ static NeonGenTwo64OpFn * const accfn[] = {
196
+ NULL,
197
+ gen_VQDMLAL_acc_16,
198
+ gen_VQDMLAL_acc_32,
199
+ NULL,
200
+ };
201
+
202
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
203
+}
204
+
205
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
206
+{
207
+ static NeonGenTwoOpWidenFn * const opfn[] = {
208
+ NULL,
209
+ gen_VQDMULL_16,
210
+ gen_VQDMULL_32,
211
+ NULL,
212
+ };
213
+ static NeonGenTwo64OpFn * const accfn[] = {
214
+ NULL,
215
+ gen_VQDMLSL_acc_16,
216
+ gen_VQDMLSL_acc_32,
217
+ NULL,
218
+ };
219
+
220
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
221
+}
222
diff --git a/target/arm/translate.c b/target/arm/translate.c
223
index XXXXXXX..XXXXXXX 100644
224
--- a/target/arm/translate.c
225
+++ b/target/arm/translate.c
226
@@ -XXX,XX +XXX,XX @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
227
tcg_gen_ext16s_i32(dest, var);
228
}
229
230
-/* 32x32->64 multiply. Marks inputs as dead. */
231
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
232
-{
233
- TCGv_i32 lo = tcg_temp_new_i32();
234
- TCGv_i32 hi = tcg_temp_new_i32();
235
- TCGv_i64 ret;
236
-
237
- tcg_gen_mulu2_i32(lo, hi, a, b);
238
- tcg_temp_free_i32(a);
239
- tcg_temp_free_i32(b);
240
-
241
- ret = tcg_temp_new_i64();
242
- tcg_gen_concat_i32_i64(ret, lo, hi);
243
- tcg_temp_free_i32(lo);
244
- tcg_temp_free_i32(hi);
245
-
246
- return ret;
247
-}
248
-
249
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
250
-{
251
- TCGv_i32 lo = tcg_temp_new_i32();
252
- TCGv_i32 hi = tcg_temp_new_i32();
253
- TCGv_i64 ret;
254
-
255
- tcg_gen_muls2_i32(lo, hi, a, b);
256
- tcg_temp_free_i32(a);
257
- tcg_temp_free_i32(b);
258
-
259
- ret = tcg_temp_new_i64();
260
- tcg_gen_concat_i32_i64(ret, lo, hi);
261
- tcg_temp_free_i32(lo);
262
- tcg_temp_free_i32(hi);
263
-
264
- return ret;
265
-}
266
-
267
/* Swap low and high halfwords. */
268
static void gen_swap_half(TCGv_i32 var)
269
{
270
@@ -XXX,XX +XXX,XX @@ static inline void gen_neon_addl(int size)
271
}
272
}
273
274
-static inline void gen_neon_negl(TCGv_i64 var, int size)
275
-{
276
- switch (size) {
277
- case 0: gen_helper_neon_negl_u16(var, var); break;
278
- case 1: gen_helper_neon_negl_u32(var, var); break;
279
- case 2:
280
- tcg_gen_neg_i64(var, var);
281
- break;
282
- default: abort();
283
- }
284
-}
285
-
286
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
287
-{
288
- switch (size) {
289
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
290
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
291
- default: abort();
292
- }
293
-}
294
-
295
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
296
- int size, int u)
297
-{
298
- TCGv_i64 tmp;
299
-
300
- switch ((size << 1) | u) {
301
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
302
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
303
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
304
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
305
- case 4:
306
- tmp = gen_muls_i64_i32(a, b);
307
- tcg_gen_mov_i64(dest, tmp);
308
- tcg_temp_free_i64(tmp);
309
- break;
310
- case 5:
311
- tmp = gen_mulu_i64_i32(a, b);
312
- tcg_gen_mov_i64(dest, tmp);
313
- tcg_temp_free_i64(tmp);
314
- break;
315
- default: abort();
316
- }
317
-
318
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
319
- Don't forget to clean them now. */
320
- if (size < 2) {
321
- tcg_temp_free_i32(a);
322
- tcg_temp_free_i32(b);
323
- }
324
-}
325
-
326
static void gen_neon_narrow_op(int op, int u, int size,
327
TCGv_i32 dest, TCGv_i64 src)
328
{
329
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
330
int u;
331
int vec_size;
332
uint32_t imm;
333
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
334
+ TCGv_i32 tmp, tmp2, tmp3, tmp5;
335
TCGv_ptr ptr1;
336
TCGv_i64 tmp64;
337
338
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
339
return 1;
340
} else { /* (insn & 0x00800010 == 0x00800000) */
341
if (size != 3) {
342
- op = (insn >> 8) & 0xf;
343
- if ((insn & (1 << 6)) == 0) {
344
- /* Three registers of different lengths: handled by decodetree */
345
- return 1;
346
- } else {
347
- /* Two registers and a scalar. NB that for ops of this form
348
- * the ARM ARM labels bit 24 as Q, but it is in our variable
349
- * 'u', not 'q'.
350
- */
351
- if (size == 0) {
352
- return 1;
353
- }
354
- switch (op) {
355
- case 0: /* Integer VMLA scalar */
356
- case 4: /* Integer VMLS scalar */
357
- case 8: /* Integer VMUL scalar */
358
- case 1: /* Float VMLA scalar */
359
- case 5: /* Floating point VMLS scalar */
360
- case 9: /* Floating point VMUL scalar */
361
- case 12: /* VQDMULH scalar */
362
- case 13: /* VQRDMULH scalar */
363
- case 14: /* VQRDMLAH scalar */
364
- case 15: /* VQRDMLSH scalar */
365
- return 1; /* handled by decodetree */
366
-
367
- case 3: /* VQDMLAL scalar */
368
- case 7: /* VQDMLSL scalar */
369
- case 11: /* VQDMULL scalar */
370
- if (u == 1) {
371
- return 1;
372
- }
373
- /* fall through */
374
- case 2: /* VMLAL sclar */
375
- case 6: /* VMLSL scalar */
376
- case 10: /* VMULL scalar */
377
- if (rd & 1) {
378
- return 1;
379
- }
380
- tmp2 = neon_get_scalar(size, rm);
381
- /* We need a copy of tmp2 because gen_neon_mull
382
- * deletes it during pass 0. */
383
- tmp4 = tcg_temp_new_i32();
384
- tcg_gen_mov_i32(tmp4, tmp2);
385
- tmp3 = neon_load_reg(rn, 1);
386
-
387
- for (pass = 0; pass < 2; pass++) {
388
- if (pass == 0) {
389
- tmp = neon_load_reg(rn, 0);
390
- } else {
391
- tmp = tmp3;
392
- tmp2 = tmp4;
393
- }
394
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
395
- if (op != 11) {
396
- neon_load_reg64(cpu_V1, rd + pass);
397
- }
398
- switch (op) {
399
- case 6:
400
- gen_neon_negl(cpu_V0, size);
401
- /* Fall through */
402
- case 2:
403
- gen_neon_addl(size);
404
- break;
405
- case 3: case 7:
406
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
407
- if (op == 7) {
408
- gen_neon_negl(cpu_V0, size);
409
- }
410
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
411
- break;
412
- case 10:
413
- /* no-op */
414
- break;
415
- case 11:
416
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
417
- break;
418
- default:
419
- abort();
420
- }
421
- neon_store_reg64(cpu_V0, rd + pass);
422
- }
423
- break;
424
- default:
425
- g_assert_not_reached();
426
- }
427
- }
428
+ /*
429
+ * Three registers of different lengths, or two registers and
430
+ * a scalar: handled by decodetree
431
+ */
432
+ return 1;
433
} else { /* size == 3 */
434
if (!u) {
435
/* Extract. */
29
--
436
--
30
2.16.2
437
2.20.1
31
438
32
439
diff view generated by jsdifflib
New patch
1
1
Convert the Neon VEXT insn to decodetree. Rather than keeping the
2
old implementation which used fixed temporaries cpu_V0 and cpu_V1
3
and did the extraction with by-hand shift and logic ops, we use
4
the TCG extract2 insn.
5
6
We don't need to special case 0 or 8 immediates any more as the
7
optimizer is smart enough to throw away the dead code.
8
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
target/arm/neon-dp.decode | 8 +++-
13
target/arm/translate-neon.inc.c | 76 +++++++++++++++++++++++++++++++++
14
target/arm/translate.c | 58 +------------------------
15
3 files changed, 85 insertions(+), 57 deletions(-)
16
17
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/neon-dp.decode
20
+++ b/target/arm/neon-dp.decode
21
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
22
# return false for size==3.
23
######################################################################
24
{
25
- # 0b11 subgroup will go here
26
+ [
27
+ ##################################################################
28
+ # Miscellaneous size=0b11 insns
29
+ ##################################################################
30
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
31
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
32
+ ]
33
34
# Subgroup for size != 0b11
35
[
36
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/translate-neon.inc.c
39
+++ b/target/arm/translate-neon.inc.c
40
@@ -XXX,XX +XXX,XX @@ static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
41
42
return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
43
}
44
+
45
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
46
+{
47
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
48
+ return false;
49
+ }
50
+
51
+ /* UNDEF accesses to D16-D31 if they don't exist. */
52
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
53
+ ((a->vd | a->vn | a->vm) & 0x10)) {
54
+ return false;
55
+ }
56
+
57
+ if ((a->vn | a->vm | a->vd) & a->q) {
58
+ return false;
59
+ }
60
+
61
+ if (a->imm > 7 && !a->q) {
62
+ return false;
63
+ }
64
+
65
+ if (!vfp_access_check(s)) {
66
+ return true;
67
+ }
68
+
69
+ if (!a->q) {
70
+ /* Extract 64 bits from <Vm:Vn> */
71
+ TCGv_i64 left, right, dest;
72
+
73
+ left = tcg_temp_new_i64();
74
+ right = tcg_temp_new_i64();
75
+ dest = tcg_temp_new_i64();
76
+
77
+ neon_load_reg64(right, a->vn);
78
+ neon_load_reg64(left, a->vm);
79
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
80
+ neon_store_reg64(dest, a->vd);
81
+
82
+ tcg_temp_free_i64(left);
83
+ tcg_temp_free_i64(right);
84
+ tcg_temp_free_i64(dest);
85
+ } else {
86
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
87
+ TCGv_i64 left, middle, right, destleft, destright;
88
+
89
+ left = tcg_temp_new_i64();
90
+ middle = tcg_temp_new_i64();
91
+ right = tcg_temp_new_i64();
92
+ destleft = tcg_temp_new_i64();
93
+ destright = tcg_temp_new_i64();
94
+
95
+ if (a->imm < 8) {
96
+ neon_load_reg64(right, a->vn);
97
+ neon_load_reg64(middle, a->vn + 1);
98
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
99
+ neon_load_reg64(left, a->vm);
100
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
101
+ } else {
102
+ neon_load_reg64(right, a->vn + 1);
103
+ neon_load_reg64(middle, a->vm);
104
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
105
+ neon_load_reg64(left, a->vm + 1);
106
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
107
+ }
108
+
109
+ neon_store_reg64(destright, a->vd);
110
+ neon_store_reg64(destleft, a->vd + 1);
111
+
112
+ tcg_temp_free_i64(destright);
113
+ tcg_temp_free_i64(destleft);
114
+ tcg_temp_free_i64(right);
115
+ tcg_temp_free_i64(middle);
116
+ tcg_temp_free_i64(left);
117
+ }
118
+ return true;
119
+}
120
diff --git a/target/arm/translate.c b/target/arm/translate.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/target/arm/translate.c
123
+++ b/target/arm/translate.c
124
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
125
int pass;
126
int u;
127
int vec_size;
128
- uint32_t imm;
129
TCGv_i32 tmp, tmp2, tmp3, tmp5;
130
TCGv_ptr ptr1;
131
- TCGv_i64 tmp64;
132
133
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
134
return 1;
135
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
136
return 1;
137
} else { /* size == 3 */
138
if (!u) {
139
- /* Extract. */
140
- imm = (insn >> 8) & 0xf;
141
-
142
- if (imm > 7 && !q)
143
- return 1;
144
-
145
- if (q && ((rd | rn | rm) & 1)) {
146
- return 1;
147
- }
148
-
149
- if (imm == 0) {
150
- neon_load_reg64(cpu_V0, rn);
151
- if (q) {
152
- neon_load_reg64(cpu_V1, rn + 1);
153
- }
154
- } else if (imm == 8) {
155
- neon_load_reg64(cpu_V0, rn + 1);
156
- if (q) {
157
- neon_load_reg64(cpu_V1, rm);
158
- }
159
- } else if (q) {
160
- tmp64 = tcg_temp_new_i64();
161
- if (imm < 8) {
162
- neon_load_reg64(cpu_V0, rn);
163
- neon_load_reg64(tmp64, rn + 1);
164
- } else {
165
- neon_load_reg64(cpu_V0, rn + 1);
166
- neon_load_reg64(tmp64, rm);
167
- }
168
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
169
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
170
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
171
- if (imm < 8) {
172
- neon_load_reg64(cpu_V1, rm);
173
- } else {
174
- neon_load_reg64(cpu_V1, rm + 1);
175
- imm -= 8;
176
- }
177
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
178
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
179
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
180
- tcg_temp_free_i64(tmp64);
181
- } else {
182
- /* BUGFIX */
183
- neon_load_reg64(cpu_V0, rn);
184
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
185
- neon_load_reg64(cpu_V1, rm);
186
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
187
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
188
- }
189
- neon_store_reg64(cpu_V0, rd);
190
- if (q) {
191
- neon_store_reg64(cpu_V1, rd + 1);
192
- }
193
+ /* Extract: handled by decodetree */
194
+ return 1;
195
} else if ((insn & (1 << 11)) == 0) {
196
/* Two register misc. */
197
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
198
--
199
2.20.1
200
201
diff view generated by jsdifflib
1
Add some assertions that if we're about to boot an AArch64 kernel,
1
Convert the Neon VTBL, VTBX instructions to decodetree. The actual
2
the board code has not mistakenly set either secure_boot or
2
implementation of the insn is copied across to the new trans function
3
secure_board_setup. It doesn't make sense to set secure_boot,
3
unchanged except for renaming 'tmp5' to 'tmp4'.
4
because all AArch64 kernels must be booted in non-secure mode.
5
6
It might in theory make sense to set secure_board_setup, but
7
we don't currently support that, because only the AArch32
8
bootloader[] code calls this hook; bootloader_aarch64[] does not.
9
Since we don't have a current need for this functionality, just
10
assert that we don't try to use it. If it's needed we'll add
11
it later.
12
4
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20180313153458.26822-3-peter.maydell@linaro.org
16
---
7
---
17
hw/arm/boot.c | 7 +++++++
8
target/arm/neon-dp.decode | 3 ++
18
1 file changed, 7 insertions(+)
9
target/arm/translate-neon.inc.c | 56 +++++++++++++++++++++++++++++++++
10
target/arm/translate.c | 41 +++---------------------
11
3 files changed, 63 insertions(+), 37 deletions(-)
19
12
20
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
21
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/arm/boot.c
15
--- a/target/arm/neon-dp.decode
23
+++ b/hw/arm/boot.c
16
+++ b/target/arm/neon-dp.decode
24
@@ -XXX,XX +XXX,XX @@ static void do_cpu_reset(void *opaque)
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
25
} else {
18
##################################################################
26
env->pstate = PSTATE_MODE_EL1h;
19
VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
27
}
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
28
+ /* AArch64 kernels never boot in secure mode */
21
+
29
+ assert(!info->secure_boot);
22
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
30
+ /* This hook is only supported for AArch32 currently:
23
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
31
+ * bootloader_aarch64[] will not call the hook, and
24
]
32
+ * the code above has already dropped us into EL2 or EL1.
25
33
+ */
26
# Subgroup for size != 0b11
34
+ assert(!info->secure_board_setup);
27
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-neon.inc.c
30
+++ b/target/arm/translate-neon.inc.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
32
}
33
return true;
34
}
35
+
36
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
37
+{
38
+ int n;
39
+ TCGv_i32 tmp, tmp2, tmp3, tmp4;
40
+ TCGv_ptr ptr1;
41
+
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
43
+ return false;
44
+ }
45
+
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
48
+ ((a->vd | a->vn | a->vm) & 0x10)) {
49
+ return false;
50
+ }
51
+
52
+ if (!vfp_access_check(s)) {
53
+ return true;
54
+ }
55
+
56
+ n = a->len + 1;
57
+ if ((a->vn + n) > 32) {
58
+ /*
59
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
60
+ * helper function running off the end of the register file.
61
+ */
62
+ return false;
63
+ }
64
+ n <<= 3;
65
+ if (a->op) {
66
+ tmp = neon_load_reg(a->vd, 0);
67
+ } else {
68
+ tmp = tcg_temp_new_i32();
69
+ tcg_gen_movi_i32(tmp, 0);
70
+ }
71
+ tmp2 = neon_load_reg(a->vm, 0);
72
+ ptr1 = vfp_reg_ptr(true, a->vn);
73
+ tmp4 = tcg_const_i32(n);
74
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
75
+ tcg_temp_free_i32(tmp);
76
+ if (a->op) {
77
+ tmp = neon_load_reg(a->vd, 1);
78
+ } else {
79
+ tmp = tcg_temp_new_i32();
80
+ tcg_gen_movi_i32(tmp, 0);
81
+ }
82
+ tmp3 = neon_load_reg(a->vm, 1);
83
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
84
+ tcg_temp_free_i32(tmp4);
85
+ tcg_temp_free_ptr(ptr1);
86
+ neon_store_reg(a->vd, 0, tmp2);
87
+ neon_store_reg(a->vd, 1, tmp3);
88
+ tcg_temp_free_i32(tmp);
89
+ return true;
90
+}
91
diff --git a/target/arm/translate.c b/target/arm/translate.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/target/arm/translate.c
94
+++ b/target/arm/translate.c
95
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
96
{
97
int op;
98
int q;
99
- int rd, rn, rm, rd_ofs, rm_ofs;
100
+ int rd, rm, rd_ofs, rm_ofs;
101
int size;
102
int pass;
103
int u;
104
int vec_size;
105
- TCGv_i32 tmp, tmp2, tmp3, tmp5;
106
- TCGv_ptr ptr1;
107
+ TCGv_i32 tmp, tmp2, tmp3;
108
109
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
110
return 1;
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
q = (insn & (1 << 6)) != 0;
113
u = (insn >> 24) & 1;
114
VFP_DREG_D(rd, insn);
115
- VFP_DREG_N(rn, insn);
116
VFP_DREG_M(rm, insn);
117
size = (insn >> 20) & 3;
118
vec_size = q ? 16 : 8;
119
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
120
break;
35
}
121
}
36
122
} else if ((insn & (1 << 10)) == 0) {
37
/* Set to non-secure if not a secure boot */
123
- /* VTBL, VTBX. */
124
- int n = ((insn >> 8) & 3) + 1;
125
- if ((rn + n) > 32) {
126
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
127
- * helper function running off the end of the register file.
128
- */
129
- return 1;
130
- }
131
- n <<= 3;
132
- if (insn & (1 << 6)) {
133
- tmp = neon_load_reg(rd, 0);
134
- } else {
135
- tmp = tcg_temp_new_i32();
136
- tcg_gen_movi_i32(tmp, 0);
137
- }
138
- tmp2 = neon_load_reg(rm, 0);
139
- ptr1 = vfp_reg_ptr(true, rn);
140
- tmp5 = tcg_const_i32(n);
141
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
142
- tcg_temp_free_i32(tmp);
143
- if (insn & (1 << 6)) {
144
- tmp = neon_load_reg(rd, 1);
145
- } else {
146
- tmp = tcg_temp_new_i32();
147
- tcg_gen_movi_i32(tmp, 0);
148
- }
149
- tmp3 = neon_load_reg(rm, 1);
150
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
151
- tcg_temp_free_i32(tmp5);
152
- tcg_temp_free_ptr(ptr1);
153
- neon_store_reg(rd, 0, tmp2);
154
- neon_store_reg(rd, 1, tmp3);
155
- tcg_temp_free_i32(tmp);
156
+ /* VTBL, VTBX: handled by decodetree */
157
+ return 1;
158
} else if ((insn & 0x380) == 0) {
159
/* VDUP */
160
int element;
38
--
161
--
39
2.16.2
162
2.20.1
40
163
41
164
diff view generated by jsdifflib
1
For the rpi1 and 2 we want to boot the Linux kernel via some
1
Convert the Neon VDUP (scalar) insn to decodetree. (Note that we
2
custom setup code that makes sure that the SMC instruction
2
can't call this just "VDUP" as we used that already in vfp.decode for
3
acts as a no-op, because it's used for cache maintenance.
3
the "VDUP (general purpose register" insn.)
4
The rpi3 boots AArch64 kernels, which don't need SMC for
5
cache maintenance and always expect to be booted non-secure.
6
Don't fill in the aarch32-specific parts of the binfo struct.
7
4
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Message-id: 20180313153458.26822-2-peter.maydell@linaro.org
12
---
7
---
13
hw/arm/raspi.c | 17 +++++++++++++----
8
target/arm/neon-dp.decode | 7 +++++++
14
1 file changed, 13 insertions(+), 4 deletions(-)
9
target/arm/translate-neon.inc.c | 26 ++++++++++++++++++++++++++
10
target/arm/translate.c | 25 +------------------------
11
3 files changed, 34 insertions(+), 24 deletions(-)
15
12
16
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
13
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/arm/raspi.c
15
--- a/target/arm/neon-dp.decode
19
+++ b/hw/arm/raspi.c
16
+++ b/target/arm/neon-dp.decode
20
@@ -XXX,XX +XXX,XX @@ static void setup_boot(MachineState *machine, int version, size_t ram_size)
17
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
21
binfo.board_id = raspi_boardid[version];
18
22
binfo.ram_size = ram_size;
19
VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
23
binfo.nb_cpus = smp_cpus;
20
vm=%vm_dp vn=%vn_dp vd=%vd_dp
24
- binfo.board_setup_addr = BOARDSETUP_ADDR;
25
- binfo.write_board_setup = write_board_setup;
26
- binfo.secure_board_setup = true;
27
- binfo.secure_boot = true;
28
+
21
+
29
+ if (version <= 2) {
22
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
30
+ /* The rpi1 and 2 require some custom setup code to run in Secure
23
+ vm=%vm_dp vd=%vd_dp size=0
31
+ * mode before booting a kernel (to set up the SMC vectors so
24
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
32
+ * that we get a no-op SMC; this is used by Linux to call the
25
+ vm=%vm_dp vd=%vd_dp size=1
33
+ * firmware for some cache maintenance operations.
26
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
34
+ * The rpi3 doesn't need this.
27
+ vm=%vm_dp vd=%vd_dp size=2
35
+ */
28
]
36
+ binfo.board_setup_addr = BOARDSETUP_ADDR;
29
37
+ binfo.write_board_setup = write_board_setup;
30
# Subgroup for size != 0b11
38
+ binfo.secure_board_setup = true;
31
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
39
+ binfo.secure_boot = true;
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/translate-neon.inc.c
34
+++ b/target/arm/translate-neon.inc.c
35
@@ -XXX,XX +XXX,XX @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
36
tcg_temp_free_i32(tmp);
37
return true;
38
}
39
+
40
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
41
+{
42
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
43
+ return false;
40
+ }
44
+ }
41
45
+
42
/* Pi2 and Pi3 requires SMP setup */
46
+ /* UNDEF accesses to D16-D31 if they don't exist. */
43
if (version >= 2) {
47
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
48
+ ((a->vd | a->vm) & 0x10)) {
49
+ return false;
50
+ }
51
+
52
+ if (a->vd & a->q) {
53
+ return false;
54
+ }
55
+
56
+ if (!vfp_access_check(s)) {
57
+ return true;
58
+ }
59
+
60
+ tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
61
+ neon_element_offset(a->vm, a->index, a->size),
62
+ a->q ? 16 : 8, a->q ? 16 : 8);
63
+ return true;
64
+}
65
diff --git a/target/arm/translate.c b/target/arm/translate.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/target/arm/translate.c
68
+++ b/target/arm/translate.c
69
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
70
}
71
break;
72
}
73
- } else if ((insn & (1 << 10)) == 0) {
74
- /* VTBL, VTBX: handled by decodetree */
75
- return 1;
76
- } else if ((insn & 0x380) == 0) {
77
- /* VDUP */
78
- int element;
79
- MemOp size;
80
-
81
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
82
- return 1;
83
- }
84
- if (insn & (1 << 16)) {
85
- size = MO_8;
86
- element = (insn >> 17) & 7;
87
- } else if (insn & (1 << 17)) {
88
- size = MO_16;
89
- element = (insn >> 18) & 3;
90
- } else {
91
- size = MO_32;
92
- element = (insn >> 19) & 1;
93
- }
94
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
95
- neon_element_offset(rm, element, size),
96
- q ? 16 : 8, q ? 16 : 8);
97
} else {
98
+ /* VTBL, VTBX, VDUP: handled by decodetree */
99
return 1;
100
}
101
}
44
--
102
--
45
2.16.2
103
2.20.1
46
104
47
105
diff view generated by jsdifflib
1
The bcm2837 is pretty similar to the bcm2836, but it does have
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
2
some differences. Notably, the MPIDR affinity aff1 values it
3
sets for the CPUs are 0x0, rather than the 0xf that the bcm2836
4
uses, and if this is wrong Linux will not boot.
5
2
6
Rather than trying to have one device with properties that
3
Some bits of the CCM registers are non writable.
7
configure it differently for the two cases, create two
8
separate QOM devices for the two SoCs. We use the same approach
9
as hw/arm/aspeed_soc.c and share code and have a data table
10
that might differ per-SoC. For the moment the two types don't
11
actually have different behaviour.
12
4
5
This was left undone in the initial commit (all bits of registers were
6
writable).
7
8
This patch adds the required code to protect the non writable bits.
9
10
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
11
Message-id: 20200608133508.550046-1-jcd@tribudubois.net
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
15
Message-id: 20180313153458.26822-7-peter.maydell@linaro.org
16
---
14
---
17
include/hw/arm/bcm2836.h | 19 +++++++++++++++++++
15
hw/misc/imx6ul_ccm.c | 76 ++++++++++++++++++++++++++++++++++++--------
18
hw/arm/bcm2836.c | 37 ++++++++++++++++++++++++++++++++-----
16
1 file changed, 63 insertions(+), 13 deletions(-)
19
hw/arm/raspi.c | 3 ++-
20
3 files changed, 53 insertions(+), 6 deletions(-)
21
17
22
diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h
18
diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/include/hw/arm/bcm2836.h
20
--- a/hw/misc/imx6ul_ccm.c
25
+++ b/include/hw/arm/bcm2836.h
21
+++ b/hw/misc/imx6ul_ccm.c
26
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@
27
23
28
#define BCM283X_NCPUS 4
24
#include "trace.h"
29
25
30
+/* These type names are for specific SoCs; other than instantiating
26
+static const uint32_t ccm_mask[CCM_MAX] = {
31
+ * them, code using these devices should always handle them via the
27
+ [CCM_CCR] = 0xf01fef80,
32
+ * BCM283x base class, so they have no BCM2836(obj) etc macros.
28
+ [CCM_CCDR] = 0xfffeffff,
33
+ */
29
+ [CCM_CSR] = 0xffffffff,
34
+#define TYPE_BCM2836 "bcm2836"
30
+ [CCM_CCSR] = 0xfffffef2,
35
+#define TYPE_BCM2837 "bcm2837"
31
+ [CCM_CACRR] = 0xfffffff8,
36
+
32
+ [CCM_CBCDR] = 0xc1f8e000,
37
typedef struct BCM283XState {
33
+ [CCM_CBCMR] = 0xfc03cfff,
38
/*< private >*/
34
+ [CCM_CSCMR1] = 0x80700000,
39
DeviceState parent_obj;
35
+ [CCM_CSCMR2] = 0xe01ff003,
40
@@ -XXX,XX +XXX,XX @@ typedef struct BCM283XState {
36
+ [CCM_CSCDR1] = 0xfe00c780,
41
BCM2835PeripheralState peripherals;
37
+ [CCM_CS1CDR] = 0xfe00fe00,
42
} BCM283XState;
38
+ [CCM_CS2CDR] = 0xf8007000,
43
39
+ [CCM_CDCDR] = 0xf00fffff,
44
+typedef struct BCM283XInfo BCM283XInfo;
40
+ [CCM_CHSCCDR] = 0xfffc01ff,
45
+
41
+ [CCM_CSCDR2] = 0xfe0001ff,
46
+typedef struct BCM283XClass {
42
+ [CCM_CSCDR3] = 0xffffc1ff,
47
+ DeviceClass parent_class;
43
+ [CCM_CDHIPR] = 0xffffffff,
48
+ const BCM283XInfo *info;
44
+ [CCM_CTOR] = 0x00000000,
49
+} BCM283XClass;
45
+ [CCM_CLPCR] = 0xf39ff01c,
50
+
46
+ [CCM_CISR] = 0xfb85ffbe,
51
+#define BCM283X_CLASS(klass) \
47
+ [CCM_CIMR] = 0xfb85ffbf,
52
+ OBJECT_CLASS_CHECK(BCM283XClass, (klass), TYPE_BCM283X)
48
+ [CCM_CCOSR] = 0xfe00fe00,
53
+#define BCM283X_GET_CLASS(obj) \
49
+ [CCM_CGPR] = 0xfffc3fea,
54
+ OBJECT_GET_CLASS(BCM283XClass, (obj), TYPE_BCM283X)
50
+ [CCM_CCGR0] = 0x00000000,
55
+
51
+ [CCM_CCGR1] = 0x00000000,
56
#endif /* BCM2836_H */
52
+ [CCM_CCGR2] = 0x00000000,
57
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
53
+ [CCM_CCGR3] = 0x00000000,
58
index XXXXXXX..XXXXXXX 100644
54
+ [CCM_CCGR4] = 0x00000000,
59
--- a/hw/arm/bcm2836.c
55
+ [CCM_CCGR5] = 0x00000000,
60
+++ b/hw/arm/bcm2836.c
56
+ [CCM_CCGR6] = 0x00000000,
61
@@ -XXX,XX +XXX,XX @@
57
+ [CCM_CMEOR] = 0xafffff1f,
62
/* "QA7" (Pi2) interrupt controller and mailboxes etc. */
63
#define BCM2836_CONTROL_BASE 0x40000000
64
65
+struct BCM283XInfo {
66
+ const char *name;
67
+};
58
+};
68
+
59
+
69
+static const BCM283XInfo bcm283x_socs[] = {
60
+static const uint32_t analog_mask[CCM_ANALOG_MAX] = {
70
+ {
61
+ [CCM_ANALOG_PLL_ARM] = 0xfff60f80,
71
+ .name = TYPE_BCM2836,
62
+ [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc,
72
+ },
63
+ [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc,
73
+ {
64
+ [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe,
74
+ .name = TYPE_BCM2837,
65
+ [CCM_ANALOG_PLL_SYS_SS] = 0x00000000,
75
+ },
66
+ [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000,
67
+ [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000,
68
+ [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80,
69
+ [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000,
70
+ [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000,
71
+ [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80,
72
+ [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000,
73
+ [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000,
74
+ [CCM_ANALOG_PLL_ENET] = 0xffc20ff0,
75
+ [CCM_ANALOG_PFD_480] = 0x40404040,
76
+ [CCM_ANALOG_PFD_528] = 0x40404040,
77
+ [PMU_MISC0] = 0x01fe8306,
78
+ [PMU_MISC1] = 0x07fcede0,
79
+ [PMU_MISC2] = 0x005f5f5f,
76
+};
80
+};
77
+
81
+
78
static void bcm2836_init(Object *obj)
82
static const char *imx6ul_ccm_reg_name(uint32_t reg)
79
{
83
{
80
BCM283XState *s = BCM283X(obj);
84
static char unknown[20];
81
@@ -XXX,XX +XXX,XX @@ static Property bcm2836_props[] = {
85
@@ -XXX,XX +XXX,XX @@ static void imx6ul_ccm_write(void *opaque, hwaddr offset, uint64_t value,
82
DEFINE_PROP_END_OF_LIST()
86
83
};
87
trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value);
84
88
85
-static void bcm2836_class_init(ObjectClass *oc, void *data)
89
- /*
86
+static void bcm283x_class_init(ObjectClass *oc, void *data)
90
- * We will do a better implementation later. In particular some bits
87
{
91
- * cannot be written to.
88
DeviceClass *dc = DEVICE_CLASS(oc);
92
- */
89
+ BCM283XClass *bc = BCM283X_CLASS(oc);
93
- s->ccm[index] = (uint32_t)value;
90
94
+ s->ccm[index] = (s->ccm[index] & ccm_mask[index]) |
91
- dc->props = bcm2836_props;
95
+ ((uint32_t)value & ~ccm_mask[index]);
92
+ bc->info = data;
93
dc->realize = bcm2836_realize;
94
+ dc->props = bcm2836_props;
95
}
96
}
96
97
97
-static const TypeInfo bcm2836_type_info = {
98
static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size)
98
+static const TypeInfo bcm283x_type_info = {
99
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
99
.name = TYPE_BCM283X,
100
* the REG_NAME register. So we change the value of the
100
.parent = TYPE_DEVICE,
101
* REG_NAME register, setting bits passed in the value.
101
.instance_size = sizeof(BCM283XState),
102
*/
102
.instance_init = bcm2836_init,
103
- s->analog[index - 1] |= value;
103
- .class_init = bcm2836_class_init,
104
+ s->analog[index - 1] |= (value & ~analog_mask[index - 1]);
104
+ .class_size = sizeof(BCM283XClass),
105
break;
105
+ .abstract = true,
106
case CCM_ANALOG_PLL_ARM_CLR:
106
};
107
case CCM_ANALOG_PLL_USB1_CLR:
107
108
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
108
static void bcm2836_register_types(void)
109
* the REG_NAME register. So we change the value of the
109
{
110
* REG_NAME register, unsetting bits passed in the value.
110
- type_register_static(&bcm2836_type_info);
111
*/
111
+ int i;
112
- s->analog[index - 2] &= ~value;
112
+
113
+ s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]);
113
+ type_register_static(&bcm283x_type_info);
114
break;
114
+ for (i = 0; i < ARRAY_SIZE(bcm283x_socs); i++) {
115
case CCM_ANALOG_PLL_ARM_TOG:
115
+ TypeInfo ti = {
116
case CCM_ANALOG_PLL_USB1_TOG:
116
+ .name = bcm283x_socs[i].name,
117
@@ -XXX,XX +XXX,XX @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
117
+ .parent = TYPE_BCM283X,
118
* the REG_NAME register. So we change the value of the
118
+ .class_init = bcm283x_class_init,
119
* REG_NAME register, toggling bits passed in the value.
119
+ .class_data = (void *) &bcm283x_socs[i],
120
*/
120
+ };
121
- s->analog[index - 3] ^= value;
121
+ type_register(&ti);
122
+ s->analog[index - 3] ^= (value & ~analog_mask[index - 3]);
122
+ }
123
break;
124
default:
125
- /*
126
- * We will do a better implementation later. In particular some bits
127
- * cannot be written to.
128
- */
129
- s->analog[index] = value;
130
+ s->analog[index] = (s->analog[index] & analog_mask[index]) |
131
+ (value & ~analog_mask[index]);
132
break;
133
}
123
}
134
}
124
125
type_init(bcm2836_register_types)
126
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/arm/raspi.c
129
+++ b/hw/arm/raspi.c
130
@@ -XXX,XX +XXX,XX @@ static void raspi_init(MachineState *machine, int version)
131
BusState *bus;
132
DeviceState *carddev;
133
134
- object_initialize(&s->soc, sizeof(s->soc), TYPE_BCM283X);
135
+ object_initialize(&s->soc, sizeof(s->soc),
136
+ version == 3 ? TYPE_BCM2837 : TYPE_BCM2836);
137
object_property_add_child(OBJECT(machine), "soc", OBJECT(&s->soc),
138
&error_abort);
139
140
--
135
--
141
2.16.2
136
2.20.1
142
137
143
138
diff view generated by jsdifflib
New patch
1
From: Erik Smit <erik.lucas.smit@gmail.com>
1
2
3
The hardware supports configurable descriptor sizes, configured in the DBLAC
4
register.
5
6
Most drivers use the default 4 word descriptor, which is currently hardcoded,
7
but Aspeed SDK configures 8 words to store extra data.
8
9
Signed-off-by: Erik Smit <erik.lucas.smit@gmail.com>
10
Reviewed-by: Cédric Le Goater <clg@kaod.org>
11
[PMM: removed unnecessary parens]
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
hw/net/ftgmac100.c | 26 ++++++++++++++++++++++++--
15
1 file changed, 24 insertions(+), 2 deletions(-)
16
17
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/ftgmac100.c
20
+++ b/hw/net/ftgmac100.c
21
@@ -XXX,XX +XXX,XX @@
22
#define FTGMAC100_APTC_TXPOLL_CNT(x) (((x) >> 8) & 0xf)
23
#define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12)
24
25
+/*
26
+ * DMA burst length and arbitration control register
27
+ */
28
+#define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3)
29
+#define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3)
30
+#define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8)
31
+#define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8)
32
+#define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7)
33
+#define FTGMAC100_DBLAC_IFG_INC (1 << 23)
34
+
35
/*
36
* PHY control register
37
*/
38
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
39
if (bd.des0 & s->txdes0_edotr) {
40
addr = tx_ring;
41
} else {
42
- addr += sizeof(FTGMAC100Desc);
43
+ addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac);
44
}
45
}
46
47
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_write(void *opaque, hwaddr addr,
48
s->phydata = value & 0xffff;
49
break;
50
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
51
+ if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
52
+ qemu_log_mask(LOG_GUEST_ERROR,
53
+ "%s: transmit descriptor too small : %d bytes\n",
54
+ __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
55
+ break;
56
+ }
57
+ if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
58
+ qemu_log_mask(LOG_GUEST_ERROR,
59
+ "%s: receive descriptor too small : %d bytes\n",
60
+ __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
61
+ break;
62
+ }
63
s->dblac = value;
64
break;
65
case FTGMAC100_REVR: /* Feature Register */
66
@@ -XXX,XX +XXX,XX @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
67
if (bd.des0 & s->rxdes0_edorr) {
68
addr = s->rx_ring;
69
} else {
70
- addr += sizeof(FTGMAC100Desc);
71
+ addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac);
72
}
73
}
74
s->rx_descriptor = addr;
75
--
76
2.20.1
77
78
diff view generated by jsdifflib
1
From: Andrey Smirnov <andrew.smirnov@gmail.com>
1
From: fangying <fangying1@huawei.com>
2
2
3
Code of imx_update() is slightly confusing since the "flags" variable
3
Virtual time adjustment was implemented for virt-5.0 machine type,
4
doesn't really corespond to anything in real hardware and server as a
4
but the cpu property was enabled only for host-passthrough and max
5
kitchensink accumulating events normally reported via USR1 and USR2
5
cpu model. Let's add it for any KVM arm cpu which has the generic
6
registers.
6
timer feature enabled.
7
7
8
Change the code to explicitly evaluate state of interrupts reported
8
Signed-off-by: Ying Fang <fangying1@huawei.com>
9
via USR1 and USR2 against corresponding masking bits and use the to
9
Reviewed-by: Andrew Jones <drjones@redhat.com>
10
detemine if IRQ line should be asserted or not.
10
Message-id: 20200608121243.2076-1-fangying1@huawei.com
11
11
[PMM: minor commit message tweak, removed inaccurate
12
NOTE: Check for UTS1_TXEMPTY being set has been dropped for two
12
suggested-by tag]
13
reasons:
14
15
1. Emulation code implements a single character FIFO, so this flag
16
will always be set since characters are trasmitted as a part of
17
the code emulating "push" into the FIFO
18
19
2. imx_update() is really just a function doing ORing and maksing
20
of reported events, so checking for UTS1_TXEMPTY should happen,
21
if it's ever really needed should probably happen outside of
22
it.
23
24
Cc: qemu-devel@nongnu.org
25
Cc: qemu-arm@nongnu.org
26
Cc: Bill Paul <wpaul@windriver.com>
27
Cc: Peter Maydell <peter.maydell@linaro.org>
28
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
29
Message-id: 20180315191141.6789-1-andrew.smirnov@gmail.com
30
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
---
14
---
33
hw/char/imx_serial.c | 24 ++++++++++++++++--------
15
target/arm/cpu.c | 6 ++++--
34
1 file changed, 16 insertions(+), 8 deletions(-)
16
target/arm/cpu64.c | 1 -
17
target/arm/kvm.c | 21 +++++++++++----------
18
3 files changed, 15 insertions(+), 13 deletions(-)
35
19
36
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
20
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
37
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/char/imx_serial.c
22
--- a/target/arm/cpu.c
39
+++ b/hw/char/imx_serial.c
23
+++ b/target/arm/cpu.c
40
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_imx_serial = {
24
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj)
41
25
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
42
static void imx_update(IMXSerialState *s)
26
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
27
}
28
+
29
+ if (kvm_enabled()) {
30
+ kvm_arm_add_vcpu_properties(obj);
31
+ }
32
}
33
34
static void arm_cpu_finalizefn(Object *obj)
35
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
36
37
if (kvm_enabled()) {
38
kvm_arm_set_cpu_features_from_host(cpu);
39
- kvm_arm_add_vcpu_properties(obj);
40
} else {
41
cortex_a15_initfn(obj);
42
43
@@ -XXX,XX +XXX,XX @@ static void arm_host_initfn(Object *obj)
44
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
45
aarch64_add_sve_properties(obj);
46
}
47
- kvm_arm_add_vcpu_properties(obj);
48
arm_cpu_post_init(obj);
49
}
50
51
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/target/arm/cpu64.c
54
+++ b/target/arm/cpu64.c
55
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
56
57
if (kvm_enabled()) {
58
kvm_arm_set_cpu_features_from_host(cpu);
59
- kvm_arm_add_vcpu_properties(obj);
60
} else {
61
uint64_t t;
62
uint32_t u;
63
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/arm/kvm.c
66
+++ b/target/arm/kvm.c
67
@@ -XXX,XX +XXX,XX @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
68
/* KVM VCPU properties should be prefixed with "kvm-". */
69
void kvm_arm_add_vcpu_properties(Object *obj)
43
{
70
{
44
- uint32_t flags;
71
- if (!kvm_enabled()) {
45
+ uint32_t usr1;
72
- return;
46
+ uint32_t usr2;
47
+ uint32_t mask;
48
49
- flags = (s->usr1 & s->ucr1) & (USR1_TRDY|USR1_RRDY);
50
- if (s->ucr1 & UCR1_TXMPTYEN) {
51
- flags |= (s->uts1 & UTS1_TXEMPTY);
52
- } else {
53
- flags &= ~USR1_TRDY;
54
- }
73
- }
55
+ /*
74
+ ARMCPU *cpu = ARM_CPU(obj);
56
+ * Lucky for us TRDY and RRDY has the same offset in both USR1 and
75
+ CPUARMState *env = &cpu->env;
57
+ * UCR1, so we can get away with something as simple as the
76
58
+ * following:
77
- ARM_CPU(obj)->kvm_adjvtime = true;
59
+ */
78
- object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
60
+ usr1 = s->usr1 & s->ucr1 & (USR1_TRDY | USR1_RRDY);
79
- kvm_no_adjvtime_set);
61
+ /*
80
- object_property_set_description(obj, "kvm-no-adjvtime",
62
+ * Bits that we want in USR2 are not as conveniently laid out,
81
- "Set on to disable the adjustment of "
63
+ * unfortunately.
82
- "the virtual counter. VM stopped time "
64
+ */
83
- "will be counted.");
65
+ mask = (s->ucr1 & UCR1_TXMPTYEN) ? USR2_TXFE : 0;
84
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
66
+ usr2 = s->usr2 & mask;
85
+ cpu->kvm_adjvtime = true;
67
86
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
68
- qemu_set_irq(s->irq, !!flags);
87
+ kvm_no_adjvtime_set);
69
+ qemu_set_irq(s->irq, usr1 || usr2);
88
+ object_property_set_description(obj, "kvm-no-adjvtime",
89
+ "Set on to disable the adjustment of "
90
+ "the virtual counter. VM stopped time "
91
+ "will be counted.");
92
+ }
70
}
93
}
71
94
72
static void imx_serial_reset(IMXSerialState *s)
95
bool kvm_arm_pmu_supported(CPUState *cpu)
73
--
96
--
74
2.16.2
97
2.20.1
75
98
76
99
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Jean-Christophe Dubois <jcd@tribudubois.net>
2
2
3
The sabrelite machine model used by qemu-system-arm is based on the
3
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
4
Freescale/NXP i.MX6Q processor. This SoC has an on-board ethernet
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
controller which is supported in QEMU using the imx_fec.c module
5
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
(actually called imx.enet for this model.)
6
[PMD: Fixed 32-bit format string using PRIx32/PRIx64]
7
7
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
The include/hw/arm/fsm-imx6.h file defines the interrupt vectors for the
9
imx.enet device like this:
10
11
#define FSL_IMX6_ENET_MAC_1588_IRQ 118
12
#define FSL_IMX6_ENET_MAC_IRQ 119
13
14
According to https://www.nxp.com/docs/en/reference-manual/IMX6DQRM.pdf,
15
page 225, in Table 3-1. ARM Cortex A9 domain interrupt summary,
16
interrupts are as follows.
17
18
150 ENET MAC 0 IRQ
19
151 ENET MAC 0 1588 Timer interrupt
20
21
where
22
23
150 - 32 == 118
24
151 - 32 == 119
25
26
In other words, the vector definitions in the fsl-imx6.h file are reversed.
27
28
Fixing the interrupts alone causes problems with older Linux kernels:
29
The Ethernet interface will fail to probe with Linux v4.9 and earlier.
30
Linux v4.1 and earlier will crash due to a bug in Ethernet driver probe
31
error handling. This is a Linux kernel problem, not a qemu problem:
32
the Linux kernel only worked by accident since it requested both interrupts.
33
34
For backward compatibility, generate the Ethernet interrupt on both interrupt
35
lines. This was shown to work from all Linux kernel releases starting with
36
v3.16.
37
38
Link: https://bugs.launchpad.net/qemu/+bug/1753309
39
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
40
Message-id: 1520723090-22130-1-git-send-email-linux@roeck-us.net
41
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
42
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
43
---
9
---
44
include/hw/arm/fsl-imx6.h | 4 ++--
10
hw/net/imx_fec.c | 106 +++++++++++++++++++-------------------------
45
hw/net/imx_fec.c | 28 +++++++++++++++++++++++++++-
11
hw/net/trace-events | 18 ++++++++
46
2 files changed, 29 insertions(+), 3 deletions(-)
12
2 files changed, 63 insertions(+), 61 deletions(-)
47
13
48
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/hw/arm/fsl-imx6.h
51
+++ b/include/hw/arm/fsl-imx6.h
52
@@ -XXX,XX +XXX,XX @@ typedef struct FslIMX6State {
53
#define FSL_IMX6_HDMI_MASTER_IRQ 115
54
#define FSL_IMX6_HDMI_CEC_IRQ 116
55
#define FSL_IMX6_MLB150_LOW_IRQ 117
56
-#define FSL_IMX6_ENET_MAC_1588_IRQ 118
57
-#define FSL_IMX6_ENET_MAC_IRQ 119
58
+#define FSL_IMX6_ENET_MAC_IRQ 118
59
+#define FSL_IMX6_ENET_MAC_1588_IRQ 119
60
#define FSL_IMX6_PCIE1_IRQ 120
61
#define FSL_IMX6_PCIE2_IRQ 121
62
#define FSL_IMX6_PCIE3_IRQ 122
63
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
14
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
64
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
65
--- a/hw/net/imx_fec.c
16
--- a/hw/net/imx_fec.c
66
+++ b/hw/net/imx_fec.c
17
+++ b/hw/net/imx_fec.c
67
@@ -XXX,XX +XXX,XX @@ static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr)
18
@@ -XXX,XX +XXX,XX @@
68
19
#include "qemu/module.h"
69
static void imx_eth_update(IMXFECState *s)
20
#include "net/checksum.h"
70
{
21
#include "net/eth.h"
71
- if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] & ENET_INT_TS_TIMER) {
22
+#include "trace.h"
72
+ /*
23
73
+ * Previous versions of qemu had the ENET_INT_MAC and ENET_INT_TS_TIMER
24
/* For crc32 */
74
+ * interrupts swapped. This worked with older versions of Linux (4.14
25
#include <zlib.h>
75
+ * and older) since Linux associated both interrupt lines with Ethernet
26
76
+ * MAC interrupts. Specifically,
27
-#ifndef DEBUG_IMX_FEC
77
+ * - Linux 4.15 and later have separate interrupt handlers for the MAC and
28
-#define DEBUG_IMX_FEC 0
78
+ * timer interrupts. Those versions of Linux fail with versions of QEMU
29
-#endif
79
+ * with swapped interrupt assignments.
30
-
80
+ * - In linux 4.14, both interrupt lines were registered with the Ethernet
31
-#define FEC_PRINTF(fmt, args...) \
81
+ * MAC interrupt handler. As a result, all versions of qemu happen to
32
- do { \
82
+ * work, though that is accidental.
33
- if (DEBUG_IMX_FEC) { \
83
+ * - In Linux 4.9 and older, the timer interrupt was registered directly
34
- fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
84
+ * with the Ethernet MAC interrupt handler. The MAC interrupt was
35
- __func__, ##args); \
85
+ * redirected to a GPIO interrupt to work around erratum ERR006687.
36
- } \
86
+ * This was implemented using the SOC's IOMUX block. In qemu, this GPIO
37
- } while (0)
87
+ * interrupt never fired since IOMUX is currently not supported in qemu.
38
-
88
+ * Linux instead received MAC interrupts on the timer interrupt.
39
-#ifndef DEBUG_IMX_PHY
89
+ * As a result, qemu versions with the swapped interrupt assignment work,
40
-#define DEBUG_IMX_PHY 0
90
+ * albeit accidentally, but qemu versions with the correct interrupt
41
-#endif
91
+ * assignment fail.
42
-
92
+ *
43
-#define PHY_PRINTF(fmt, args...) \
93
+ * To ensure that all versions of Linux work, generate ENET_INT_MAC
44
- do { \
94
+ * interrrupts on both interrupt lines. This should be changed if and when
45
- if (DEBUG_IMX_PHY) { \
95
+ * qemu supports IOMUX.
46
- fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
96
+ */
47
- __func__, ##args); \
97
+ if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] &
48
- } \
98
+ (ENET_INT_MAC | ENET_INT_TS_TIMER)) {
49
- } while (0)
99
qemu_set_irq(s->irq[1], 1);
50
-
51
#define IMX_MAX_DESC 1024
52
53
static const char *imx_default_reg_name(IMXFECState *s, uint32_t index)
54
@@ -XXX,XX +XXX,XX @@ static void imx_eth_update(IMXFECState *s);
55
* For now we don't handle any GPIO/interrupt line, so the OS will
56
* have to poll for the PHY status.
57
*/
58
-static void phy_update_irq(IMXFECState *s)
59
+static void imx_phy_update_irq(IMXFECState *s)
60
{
61
imx_eth_update(s);
62
}
63
64
-static void phy_update_link(IMXFECState *s)
65
+static void imx_phy_update_link(IMXFECState *s)
66
{
67
/* Autonegotiation status mirrors link status. */
68
if (qemu_get_queue(s->nic)->link_down) {
69
- PHY_PRINTF("link is down\n");
70
+ trace_imx_phy_update_link("down");
71
s->phy_status &= ~0x0024;
72
s->phy_int |= PHY_INT_DOWN;
100
} else {
73
} else {
101
qemu_set_irq(s->irq[1], 0);
74
- PHY_PRINTF("link is up\n");
75
+ trace_imx_phy_update_link("up");
76
s->phy_status |= 0x0024;
77
s->phy_int |= PHY_INT_ENERGYON;
78
s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
79
}
80
- phy_update_irq(s);
81
+ imx_phy_update_irq(s);
82
}
83
84
static void imx_eth_set_link(NetClientState *nc)
85
{
86
- phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
87
+ imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
88
}
89
90
-static void phy_reset(IMXFECState *s)
91
+static void imx_phy_reset(IMXFECState *s)
92
{
93
+ trace_imx_phy_reset();
94
+
95
s->phy_status = 0x7809;
96
s->phy_control = 0x3000;
97
s->phy_advertise = 0x01e1;
98
s->phy_int_mask = 0;
99
s->phy_int = 0;
100
- phy_update_link(s);
101
+ imx_phy_update_link(s);
102
}
103
104
-static uint32_t do_phy_read(IMXFECState *s, int reg)
105
+static uint32_t imx_phy_read(IMXFECState *s, int reg)
106
{
107
uint32_t val;
108
109
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
110
case 29: /* Interrupt source. */
111
val = s->phy_int;
112
s->phy_int = 0;
113
- phy_update_irq(s);
114
+ imx_phy_update_irq(s);
115
break;
116
case 30: /* Interrupt mask */
117
val = s->phy_int_mask;
118
@@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
119
break;
120
}
121
122
- PHY_PRINTF("read 0x%04x @ %d\n", val, reg);
123
+ trace_imx_phy_read(val, reg);
124
125
return val;
126
}
127
128
-static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
129
+static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
130
{
131
- PHY_PRINTF("write 0x%04x @ %d\n", val, reg);
132
+ trace_imx_phy_write(val, reg);
133
134
if (reg > 31) {
135
/* we only advertise one phy */
136
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
137
switch (reg) {
138
case 0: /* Basic Control */
139
if (val & 0x8000) {
140
- phy_reset(s);
141
+ imx_phy_reset(s);
142
} else {
143
s->phy_control = val & 0x7980;
144
/* Complete autonegotiation immediately. */
145
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
146
break;
147
case 30: /* Interrupt mask */
148
s->phy_int_mask = val & 0xff;
149
- phy_update_irq(s);
150
+ imx_phy_update_irq(s);
151
break;
152
case 17:
153
case 18:
154
@@ -XXX,XX +XXX,XX @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
155
static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr)
156
{
157
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
158
+
159
+ trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data);
160
}
161
162
static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
163
@@ -XXX,XX +XXX,XX @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
164
static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr)
165
{
166
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
167
+
168
+ trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data,
169
+ bd->option, bd->status);
170
}
171
172
static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr)
173
@@ -XXX,XX +XXX,XX @@ static void imx_fec_do_tx(IMXFECState *s)
174
int len;
175
176
imx_fec_read_bd(&bd, addr);
177
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n",
178
- addr, bd.flags, bd.length, bd.data);
179
if ((bd.flags & ENET_BD_R) == 0) {
180
+
181
/* Run out of descriptors to transmit. */
182
- FEC_PRINTF("tx_bd ran out of descriptors to transmit\n");
183
+ trace_imx_eth_tx_bd_busy();
184
+
185
break;
186
}
187
len = bd.length;
188
@@ -XXX,XX +XXX,XX @@ static void imx_enet_do_tx(IMXFECState *s, uint32_t index)
189
int len;
190
191
imx_enet_read_bd(&bd, addr);
192
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x "
193
- "status %04x\n", addr, bd.flags, bd.length, bd.data,
194
- bd.option, bd.status);
195
if ((bd.flags & ENET_BD_R) == 0) {
196
/* Run out of descriptors to transmit. */
197
+
198
+ trace_imx_eth_tx_bd_busy();
199
+
200
break;
201
}
202
len = bd.length;
203
@@ -XXX,XX +XXX,XX @@ static void imx_eth_enable_rx(IMXFECState *s, bool flush)
204
s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0;
205
206
if (!s->regs[ENET_RDAR]) {
207
- FEC_PRINTF("RX buffer full\n");
208
+ trace_imx_eth_rx_bd_full();
209
} else if (flush) {
210
qemu_flush_queued_packets(qemu_get_queue(s->nic));
211
}
212
@@ -XXX,XX +XXX,XX @@ static void imx_eth_reset(DeviceState *d)
213
memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor));
214
215
/* We also reset the PHY */
216
- phy_reset(s);
217
+ imx_phy_reset(s);
218
}
219
220
static uint32_t imx_default_read(IMXFECState *s, uint32_t index)
221
@@ -XXX,XX +XXX,XX @@ static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size)
222
break;
223
}
224
225
- FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
226
- value);
227
+ trace_imx_eth_read(index, imx_eth_reg_name(s, index), value);
228
229
return value;
230
}
231
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
232
const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s);
233
uint32_t index = offset >> 2;
234
235
- FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
236
- (uint32_t)value);
237
+ trace_imx_eth_write(index, imx_eth_reg_name(s, index), value);
238
239
switch (index) {
240
case ENET_EIR:
241
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
242
if (extract32(value, 29, 1)) {
243
/* This is a read operation */
244
s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16,
245
- do_phy_read(s,
246
+ imx_phy_read(s,
247
extract32(value,
248
18, 10)));
249
} else {
250
/* This a write operation */
251
- do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
252
+ imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
253
}
254
/* raise the interrupt as the PHY operation is done */
255
s->regs[ENET_EIR] |= ENET_INT_MII;
256
@@ -XXX,XX +XXX,XX @@ static bool imx_eth_can_receive(NetClientState *nc)
257
{
258
IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc));
259
260
- FEC_PRINTF("\n");
261
-
262
return !!s->regs[ENET_RDAR];
263
}
264
265
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
266
unsigned int buf_len;
267
size_t size = len;
268
269
- FEC_PRINTF("len %d\n", (int)size);
270
+ trace_imx_fec_receive(size);
271
272
if (!s->regs[ENET_RDAR]) {
273
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
274
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
275
bd.length = buf_len;
276
size -= buf_len;
277
278
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
279
+ trace_imx_fec_receive_len(addr, bd.length);
280
281
/* The last 4 bytes are the CRC. */
282
if (size < 4) {
283
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
284
if (size == 0) {
285
/* Last buffer in frame. */
286
bd.flags |= flags | ENET_BD_L;
287
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
288
+
289
+ trace_imx_fec_receive_last(bd.flags);
290
+
291
s->regs[ENET_EIR] |= ENET_INT_RXF;
292
} else {
293
s->regs[ENET_EIR] |= ENET_INT_RXB;
294
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
295
size_t size = len;
296
bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16;
297
298
- FEC_PRINTF("len %d\n", (int)size);
299
+ trace_imx_enet_receive(size);
300
301
if (!s->regs[ENET_RDAR]) {
302
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
303
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
304
bd.length = buf_len;
305
size -= buf_len;
306
307
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
308
+ trace_imx_enet_receive_len(addr, bd.length);
309
310
/* The last 4 bytes are the CRC. */
311
if (size < 4) {
312
@@ -XXX,XX +XXX,XX @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
313
if (size == 0) {
314
/* Last buffer in frame. */
315
bd.flags |= flags | ENET_BD_L;
316
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
317
+
318
+ trace_imx_enet_receive_last(bd.flags);
319
+
320
/* Indicate that we've updated the last buffer descriptor. */
321
bd.last_buffer = ENET_BD_BDU;
322
if (bd.option & ENET_BD_RX_INT) {
323
diff --git a/hw/net/trace-events b/hw/net/trace-events
324
index XXXXXXX..XXXXXXX 100644
325
--- a/hw/net/trace-events
326
+++ b/hw/net/trace-events
327
@@ -XXX,XX +XXX,XX @@ i82596_receive_packet(size_t sz) "len=%zu"
328
i82596_new_mac(const char *id_with_mac) "New MAC for: %s"
329
i82596_set_multicast(uint16_t count) "Added %d multicast entries"
330
i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
331
+
332
+# imx_fec.c
333
+imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]"
334
+imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]"
335
+imx_phy_update_link(const char *s) "%s"
336
+imx_phy_reset(void) ""
337
+imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
338
+imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x"
339
+imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit"
340
+imx_eth_rx_bd_full(void) "RX buffer is full"
341
+imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32
342
+imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64
343
+imx_fec_receive(size_t size) "len %zu"
344
+imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
345
+imx_fec_receive_last(int last) "rx frame flags 0x%04x"
346
+imx_enet_receive(size_t size) "len %zu"
347
+imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
348
+imx_enet_receive_last(int last) "rx frame flags 0x%04x"
102
--
349
--
103
2.16.2
350
2.20.1
104
351
105
352
diff view generated by jsdifflib
1
From: Andrey Smirnov <andrew.smirnov@gmail.com>
1
From: Guenter Roeck <linux@roeck-us.net>
2
2
3
Add support for "TX complete"/TXDC interrupt generate by real HW since
3
The Linux kernel's IMX code now uses vendor specific commands.
4
it is needed to support guests other than Linux.
4
This results in endless warnings when booting the Linux kernel.
5
5
6
Based on the patch by Bill Paul as found here:
6
sdhci-esdhc-imx 2194000.usdhc: esdhc_wait_for_card_clock_gate_off:
7
https://bugs.launchpad.net/qemu/+bug/1753314
7
    card clock still not gate off in 100us!.
8
8
9
Cc: qemu-devel@nongnu.org
9
Implement support for the vendor specific command implemented in IMX hardware
10
Cc: qemu-arm@nongnu.org
10
to be able to avoid this warning.
11
Cc: Bill Paul <wpaul@windriver.com>
11
12
Cc: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Signed-off-by: Bill Paul <wpaul@windriver.com>
13
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
14
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
15
Message-id: 20180315191141.6789-2-andrew.smirnov@gmail.com
15
Message-id: 20200603145258.195920-2-linux@roeck-us.net
16
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
17
---
19
include/hw/char/imx_serial.h | 3 +++
18
hw/sd/sdhci-internal.h | 5 +++++
20
hw/char/imx_serial.c | 20 +++++++++++++++++---
19
include/hw/sd/sdhci.h | 5 +++++
21
2 files changed, 20 insertions(+), 3 deletions(-)
20
hw/sd/sdhci.c | 18 +++++++++++++++++-
21
3 files changed, 27 insertions(+), 1 deletion(-)
22
22
23
diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h
23
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
24
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/hw/char/imx_serial.h
25
--- a/hw/sd/sdhci-internal.h
26
+++ b/include/hw/char/imx_serial.h
26
+++ b/hw/sd/sdhci-internal.h
27
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@
28
#define UCR2_RXEN (1<<1) /* Receiver enable */
28
#define SDHC_CMD_INHIBIT 0x00000001
29
#define UCR2_SRST (1<<0) /* Reset complete */
29
#define SDHC_DATA_INHIBIT 0x00000002
30
30
#define SDHC_DAT_LINE_ACTIVE 0x00000004
31
+#define UCR4_TCEN BIT(3) /* TX complete interrupt enable */
31
+#define SDHC_IMX_CLOCK_GATE_OFF 0x00000080
32
#define SDHC_DOING_WRITE 0x00000100
33
#define SDHC_DOING_READ 0x00000200
34
#define SDHC_SPACE_AVAILABLE 0x00000400
35
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
36
37
38
#define ESDHC_MIX_CTRL 0x48
32
+
39
+
33
#define UTS1_TXEMPTY (1<<6)
40
#define ESDHC_VENDOR_SPEC 0xc0
34
#define UTS1_RXEMPTY (1<<5)
41
+#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8)
35
#define UTS1_TXFULL (1<<4)
42
+
36
@@ -XXX,XX +XXX,XX @@ typedef struct IMXSerialState {
43
#define ESDHC_DLL_CTRL 0x60
37
uint32_t ubmr;
44
38
uint32_t ubrc;
45
#define ESDHC_TUNING_CTRL 0xcc
39
uint32_t ucr3;
46
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription sdhci_vmstate;
40
+ uint32_t ucr4;
47
#define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \
41
48
DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \
42
qemu_irq irq;
49
DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \
43
CharBackend chr;
50
+ DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \
44
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
51
\
52
/* Capabilities registers provide information on supported
53
* features of this specific host controller implementation */ \
54
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
45
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
46
--- a/hw/char/imx_serial.c
56
--- a/include/hw/sd/sdhci.h
47
+++ b/hw/char/imx_serial.c
57
+++ b/include/hw/sd/sdhci.h
48
@@ -XXX,XX +XXX,XX @@
58
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
49
59
uint16_t acmd12errsts; /* Auto CMD12 error status register */
50
static const VMStateDescription vmstate_imx_serial = {
60
uint16_t hostctl2; /* Host Control 2 */
51
.name = TYPE_IMX_SERIAL,
61
uint64_t admasysaddr; /* ADMA System Address Register */
52
- .version_id = 1,
62
+ uint16_t vendor_spec; /* Vendor specific register */
53
- .minimum_version_id = 1,
63
54
+ .version_id = 2,
64
/* Read-only registers */
55
+ .minimum_version_id = 2,
65
uint64_t capareg; /* Capabilities Register */
56
.fields = (VMStateField[]) {
66
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
57
VMSTATE_INT32(readbuff, IMXSerialState),
67
uint32_t quirks;
58
VMSTATE_UINT32(usr1, IMXSerialState),
68
uint8_t sd_spec_version;
59
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_imx_serial = {
69
uint8_t uhs_mode;
60
VMSTATE_UINT32(ubmr, IMXSerialState),
70
+ uint8_t vendor; /* For vendor specific functionality */
61
VMSTATE_UINT32(ubrc, IMXSerialState),
71
} SDHCIState;
62
VMSTATE_UINT32(ucr3, IMXSerialState),
72
63
+ VMSTATE_UINT32(ucr4, IMXSerialState),
73
+#define SDHCI_VENDOR_NONE 0
64
VMSTATE_END_OF_LIST()
74
+#define SDHCI_VENDOR_IMX 1
65
},
66
};
67
@@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s)
68
* unfortunately.
69
*/
70
mask = (s->ucr1 & UCR1_TXMPTYEN) ? USR2_TXFE : 0;
71
+ /*
72
+ * TCEN and TXDC are both bit 3
73
+ */
74
+ mask |= s->ucr4 & UCR4_TCEN;
75
+
75
+
76
usr2 = s->usr2 & mask;
76
/*
77
77
* Controller does not provide transfer-complete interrupt when not
78
qemu_set_irq(s->irq, usr1 || usr2);
78
* busy.
79
@@ -XXX,XX +XXX,XX @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
79
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
80
return s->ucr3;
80
index XXXXXXX..XXXXXXX 100644
81
81
--- a/hw/sd/sdhci.c
82
case 0x23: /* UCR4 */
82
+++ b/hw/sd/sdhci.c
83
+ return s->ucr4;
83
@@ -XXX,XX +XXX,XX @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
84
+
85
case 0x29: /* BRM Incremental */
86
return 0x0; /* TODO */
87
88
@@ -XXX,XX +XXX,XX @@ static void imx_serial_write(void *opaque, hwaddr offset,
89
* qemu_chr_fe_write and background I/O callbacks */
90
qemu_chr_fe_write_all(&s->chr, &ch, 1);
91
s->usr1 &= ~USR1_TRDY;
92
+ s->usr2 &= ~USR2_TXDC;
93
imx_update(s);
94
s->usr1 |= USR1_TRDY;
95
+ s->usr2 |= USR2_TXDC;
96
imx_update(s);
97
}
84
}
98
break;
85
break;
99
@@ -XXX,XX +XXX,XX @@ static void imx_serial_write(void *opaque, hwaddr offset,
86
100
s->ucr3 = value & 0xffff;
87
+ case ESDHC_VENDOR_SPEC:
101
break;
88
+ ret = s->vendor_spec;
102
89
+ break;
103
- case 0x2d: /* UTS1 */
90
case ESDHC_DLL_CTRL:
104
case 0x23: /* UCR4 */
91
case ESDHC_TUNE_CTRL_STATUS:
105
+ s->ucr4 = value & 0xffff;
92
case ESDHC_UNDOCUMENTED_REG27:
106
+ imx_update(s);
93
case ESDHC_TUNING_CTRL:
94
- case ESDHC_VENDOR_SPEC:
95
case ESDHC_MIX_CTRL:
96
case ESDHC_WTMK_LVL:
97
ret = 0;
98
@@ -XXX,XX +XXX,XX @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
99
case ESDHC_UNDOCUMENTED_REG27:
100
case ESDHC_TUNING_CTRL:
101
case ESDHC_WTMK_LVL:
107
+ break;
102
+ break;
108
+
103
+
109
+ case 0x2d: /* UTS1 */
104
case ESDHC_VENDOR_SPEC:
110
qemu_log_mask(LOG_UNIMP, "[%s]%s: Unimplemented reg 0x%"
105
+ s->vendor_spec = value;
111
HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
106
+ switch (s->vendor) {
112
/* TODO */
107
+ case SDHCI_VENDOR_IMX:
108
+ if (value & ESDHC_IMX_FRC_SDCLK_ON) {
109
+ s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
110
+ } else {
111
+ s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
112
+ }
113
+ break;
114
+ default:
115
+ break;
116
+ }
117
break;
118
119
case SDHC_HOSTCTL:
113
--
120
--
114
2.16.2
121
2.20.1
115
122
116
123
diff view generated by jsdifflib
1
From: Wei Huang <wei@redhat.com>
1
From: Guenter Roeck <linux@roeck-us.net>
2
2
3
For guest kernel that supports KASLR, the load address can change every
3
Set vendor property to IMX to enable IMX specific functionality
4
time when guest VM runs. To find the physical base address correctly,
4
in sdhci code.
5
current QEMU dump searches VMCOREINFO for the string "NUMBER(phys_base)=".
6
However this string pattern is only available on x86_64. AArch64 uses a
7
different field, called "NUMBER(PHYS_OFFSET)=". This patch makes sure
8
QEMU dump uses the correct string on AArch64.
9
5
10
Signed-off-by: Wei Huang <wei@redhat.com>
6
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
7
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
12
Message-id: 1520615003-20869-1-git-send-email-wei@redhat.com
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Message-id: 20200603145258.195920-3-linux@roeck-us.net
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
11
---
15
dump.c | 14 +++++++++++---
12
hw/arm/fsl-imx25.c | 6 ++++++
16
1 file changed, 11 insertions(+), 3 deletions(-)
13
hw/arm/fsl-imx6.c | 6 ++++++
14
hw/arm/fsl-imx6ul.c | 2 ++
15
hw/arm/fsl-imx7.c | 2 ++
16
4 files changed, 16 insertions(+)
17
17
18
diff --git a/dump.c b/dump.c
18
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
19
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
20
--- a/dump.c
20
--- a/hw/arm/fsl-imx25.c
21
+++ b/dump.c
21
+++ b/hw/arm/fsl-imx25.c
22
@@ -XXX,XX +XXX,XX @@ static void vmcoreinfo_update_phys_base(DumpState *s)
22
@@ -XXX,XX +XXX,XX @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
23
23
&err);
24
lines = g_strsplit((char *)vmci, "\n", -1);
24
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES,
25
for (i = 0; lines[i]; i++) {
25
"capareg", &err);
26
- if (g_str_has_prefix(lines[i], "NUMBER(phys_base)=")) {
26
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
27
- if (qemu_strtou64(lines[i] + 18, NULL, 16,
27
+ "vendor", &err);
28
+ const char *prefix = NULL;
28
+ if (err) {
29
+
29
+ error_propagate(errp, err);
30
+ if (s->dump_info.d_machine == EM_X86_64) {
30
+ return;
31
+ prefix = "NUMBER(phys_base)=";
32
+ } else if (s->dump_info.d_machine == EM_AARCH64) {
33
+ prefix = "NUMBER(PHYS_OFFSET)=";
34
+ }
31
+ }
35
+
32
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
36
+ if (prefix && g_str_has_prefix(lines[i], prefix)) {
33
if (err) {
37
+ if (qemu_strtou64(lines[i] + strlen(prefix), NULL, 16,
34
error_propagate(errp, err);
38
&phys_base) < 0) {
35
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
39
- warn_report("Failed to read NUMBER(phys_base)=");
36
index XXXXXXX..XXXXXXX 100644
40
+ warn_report("Failed to read %s", prefix);
37
--- a/hw/arm/fsl-imx6.c
41
} else {
38
+++ b/hw/arm/fsl-imx6.c
42
s->dump_info.phys_base = phys_base;
39
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
43
}
40
&err);
41
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES,
42
"capareg", &err);
43
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
44
+ "vendor", &err);
45
+ if (err) {
46
+ error_propagate(errp, err);
47
+ return;
48
+ }
49
object_property_set_bool(OBJECT(&s->esdhc[i]), true, "realized", &err);
50
if (err) {
51
error_propagate(errp, err);
52
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/arm/fsl-imx6ul.c
55
+++ b/hw/arm/fsl-imx6ul.c
56
@@ -XXX,XX +XXX,XX @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
57
FSL_IMX6UL_USDHC2_IRQ,
58
};
59
60
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
61
+ "vendor", &error_abort);
62
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
63
&error_abort);
64
65
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/arm/fsl-imx7.c
68
+++ b/hw/arm/fsl-imx7.c
69
@@ -XXX,XX +XXX,XX @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
70
FSL_IMX7_USDHC3_IRQ,
71
};
72
73
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
74
+ "vendor", &error_abort);
75
object_property_set_bool(OBJECT(&s->usdhc[i]), true, "realized",
76
&error_abort);
77
44
--
78
--
45
2.16.2
79
2.20.1
46
80
47
81
diff view generated by jsdifflib