1
The following changes since commit e5cd695266c5709308aa95b1baae499e4b5d4544:
1
Small pile of bug fixes for rc1. I've included my patches to get
2
our docs building with Sphinx 3, just for convenience...
2
3
3
Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging (2018-05-08 17:05:58 +0100)
4
-- PMM
5
6
The following changes since commit b149dea55cce97cb226683d06af61984a1c11e96:
7
8
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20201102' into staging (2020-11-02 10:57:48 +0000)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
git://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20180510
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20201102
8
13
9
for you to fetch changes up to 9a9f1f59521f46e8ff4527d9a2b52f83577e2aa3:
14
for you to fetch changes up to ffb4fbf90a2f63c9cb33e4bb9f854c79bf04ca4a:
10
15
11
target/arm: Clear SVE high bits for FMOV (2018-05-10 18:10:58 +0100)
16
tests/qtest/npcm7xx_rng-test: Disable randomness tests (2020-11-02 16:52:18 +0000)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
target-arm queue:
19
target-arm queue:
15
* hw/arm/iotkit.c: fix minor memory leak
20
* target/arm: Fix Neon emulation bugs on big-endian hosts
16
* softfloat: fix wrong-exception-flags bug for multiply-add corner case
21
* target/arm: fix handling of HCR.FB
17
* arm: isolate and clean up DTB generation
22
* target/arm: fix LORID_EL1 access check
18
* implement Arm v8.1-Atomics extension
23
* disas/capstone: Fix monitor disassembly of >32 bytes
19
* Fix some bugs and missing instructions in the v8.2-FP16 extension
24
* hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
25
* hw/arm/boot: fix SVE for EL3 direct kernel boot
26
* hw/display/omap_lcdc: Fix potential NULL pointer dereference
27
* hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
28
* target/arm: Get correct MMU index for other-security-state
29
* configure: Test that gio libs from pkg-config work
30
* hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
31
* docs: Fix building with Sphinx 3
32
* tests/qtest/npcm7xx_rng-test: Disable randomness tests
20
33
21
----------------------------------------------------------------
34
----------------------------------------------------------------
22
Igor Mammedov (4):
35
AlexChen (2):
23
pc: simplify MachineClass::get_hotplug_handler handling
36
hw/display/omap_lcdc: Fix potential NULL pointer dereference
24
platform-bus-device: use device plug callback instead of machine_done notifier
37
hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
25
arm/boot: split load_dtb() from arm_load_kernel()
26
make sure that we aren't overwriting mc->get_hotplug_handler by accident
27
38
28
Peter Maydell (3):
39
Peter Maydell (9):
29
hw/arm/iotkit.c: fix minor memory leak
40
target/arm: Fix float16 pairwise Neon ops on big-endian hosts
30
softfloat: Handle default NaN mode after pickNaNMulAdd, not before
41
target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts
31
atomic.h: Work around gcc spurious "unused value" warning
42
disas/capstone: Fix monitor disassembly of >32 bytes
43
target/arm: Get correct MMU index for other-security-state
44
configure: Test that gio libs from pkg-config work
45
hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
46
scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments
47
qemu-option-trace.rst.inc: Don't use option:: markup
48
tests/qtest/npcm7xx_rng-test: Disable randomness tests
32
49
33
Richard Henderson (14):
50
Philippe Mathieu-Daudé (1):
34
tcg: Introduce helpers for integer min/max
51
hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
35
target/arm: Use new min/max expanders
36
target/xtensa: Use new min/max expanders
37
tcg: Introduce atomic helpers for integer min/max
38
tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
39
target/riscv: Use new atomic min/max expanders
40
target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
41
target/arm: Fill in disas_ldst_atomic
42
target/arm: Implement CAS and CASP
43
target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
44
target/arm: Implement vector shifted SCVF/UCVF for fp16
45
target/arm: Implement vector shifted FCVT for fp16
46
target/arm: Fix float16 to/from int16
47
target/arm: Clear SVE high bits for FMOV
48
52
49
accel/tcg/atomic_template.h | 112 ++++++----
53
Richard Henderson (11):
50
accel/tcg/tcg-runtime.h | 8 +
54
target/arm: Introduce neon_full_reg_offset
51
hw/ppc/e500.h | 5 +
55
target/arm: Move neon_element_offset to translate.c
52
include/hw/arm/arm.h | 45 +++-
56
target/arm: Use neon_element_offset in neon_load/store_reg
53
include/hw/arm/sysbus-fdt.h | 37 +---
57
target/arm: Use neon_element_offset in vfp_reg_offset
54
include/hw/arm/virt.h | 1 +
58
target/arm: Add read/write_neon_element32
55
include/hw/i386/pc.h | 8 -
59
target/arm: Expand read/write_neon_element32 to all MemOp
56
include/hw/platform-bus.h | 4 +-
60
target/arm: Rename neon_load_reg32 to vfp_load_reg32
57
include/qemu/atomic.h | 2 +-
61
target/arm: Add read/write_neon_element64
58
target/arm/cpu.h | 1 +
62
target/arm: Rename neon_load_reg64 to vfp_load_reg64
59
target/arm/helper-a64.h | 2 +
63
target/arm: Simplify do_long_3d and do_2scalar_long
60
target/arm/helper.h | 4 +-
64
target/arm: Improve do_prewiden_3d
61
tcg/tcg-op.h | 50 +++++
62
tcg/tcg.h | 8 +
63
fpu/softfloat.c | 52 +++--
64
hw/arm/boot.c | 72 ++-----
65
hw/arm/iotkit.c | 1 +
66
hw/arm/sysbus-fdt.c | 64 +-----
67
hw/arm/virt.c | 96 ++++++---
68
hw/core/platform-bus.c | 29 +--
69
hw/i386/pc.c | 7 +-
70
hw/ppc/e500.c | 38 ++--
71
hw/ppc/e500plat.c | 32 +++
72
hw/ppc/spapr.c | 1 +
73
hw/s390x/s390-virtio-ccw.c | 1 +
74
linux-user/elfload.c | 1 +
75
target/arm/cpu64.c | 1 +
76
target/arm/helper-a64.c | 43 ++++
77
target/arm/helper.c | 53 ++++-
78
target/arm/translate-a64.c | 490 +++++++++++++++++++++++++++++++++-----------
79
target/riscv/translate.c | 72 ++-----
80
target/xtensa/translate.c | 50 +++--
81
tcg/tcg-op.c | 48 +++++
82
33 files changed, 934 insertions(+), 504 deletions(-)
83
65
66
Rémi Denis-Courmont (3):
67
target/arm: fix handling of HCR.FB
68
target/arm: fix LORID_EL1 access check
69
hw/arm/boot: fix SVE for EL3 direct kernel boot
70
71
docs/qemu-option-trace.rst.inc | 6 +-
72
configure | 10 +-
73
include/hw/intc/arm_gicv3_common.h | 1 -
74
disas/capstone.c | 2 +-
75
hw/arm/boot.c | 3 +
76
hw/arm/smmuv3.c | 3 +-
77
hw/display/exynos4210_fimd.c | 4 +-
78
hw/display/omap_lcdc.c | 10 +-
79
hw/intc/arm_gicv3_cpuif.c | 5 +-
80
target/arm/helper.c | 24 +-
81
target/arm/m_helper.c | 3 +-
82
target/arm/translate.c | 153 +++++++++---
83
target/arm/vec_helper.c | 12 +-
84
tests/qtest/npcm7xx_rng-test.c | 14 +-
85
scripts/kernel-doc | 18 +-
86
target/arm/translate-neon.c.inc | 472 ++++++++++++++++++++-----------------
87
target/arm/translate-vfp.c.inc | 341 +++++++++++----------------
88
17 files changed, 588 insertions(+), 493 deletions(-)
89
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This function makes it clear that we're talking about the whole
4
register, and not the 32-bit piece at index 0. This fixes a bug
5
when running on a big-endian host.
6
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20201030022618.785675-2-richard.henderson@linaro.org
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20180508151437.4232-10-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
11
---
8
target/arm/helper-a64.h | 2 +
12
target/arm/translate.c | 8 ++++++
9
target/arm/helper-a64.c | 43 ++++++++++++++
13
target/arm/translate-neon.c.inc | 44 ++++++++++++++++-----------------
10
target/arm/translate-a64.c | 119 ++++++++++++++++++++++++++++++++++++-
14
target/arm/translate-vfp.c.inc | 2 +-
11
3 files changed, 161 insertions(+), 3 deletions(-)
15
3 files changed, 31 insertions(+), 23 deletions(-)
12
16
13
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
17
diff --git a/target/arm/translate.c b/target/arm/translate.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper-a64.h
19
--- a/target/arm/translate.c
16
+++ b/target/arm/helper-a64.h
20
+++ b/target/arm/translate.c
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
21
@@ -XXX,XX +XXX,XX @@ static inline void gen_hlt(DisasContext *s, int imm)
18
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
22
unallocated_encoding(s);
19
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
20
i64, env, i64, i64, i64)
21
+DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
22
+DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
23
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
24
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
25
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
26
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/helper-a64.c
29
+++ b/target/arm/helper-a64.c
30
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
31
return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
32
}
23
}
33
24
34
+/* Writes back the old data into Rs. */
25
+/*
35
+void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
26
+ * Return the offset of a "full" NEON Dreg.
36
+ uint64_t new_lo, uint64_t new_hi)
27
+ */
28
+static long neon_full_reg_offset(unsigned reg)
37
+{
29
+{
38
+ uintptr_t ra = GETPC();
30
+ return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
39
+#ifndef CONFIG_ATOMIC128
40
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
41
+#else
42
+ Int128 oldv, cmpv, newv;
43
+
44
+ cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
45
+ newv = int128_make128(new_lo, new_hi);
46
+
47
+ int mem_idx = cpu_mmu_index(env, false);
48
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
49
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
50
+
51
+ env->xregs[rs] = int128_getlo(oldv);
52
+ env->xregs[rs + 1] = int128_gethi(oldv);
53
+#endif
54
+}
31
+}
55
+
32
+
56
+void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
33
static inline long vfp_reg_offset(bool dp, unsigned reg)
57
+ uint64_t new_hi, uint64_t new_lo)
34
{
58
+{
35
if (dp) {
59
+ uintptr_t ra = GETPC();
36
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
60
+#ifndef CONFIG_ATOMIC128
61
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
62
+#else
63
+ Int128 oldv, cmpv, newv;
64
+
65
+ cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
66
+ newv = int128_make128(new_lo, new_hi);
67
+
68
+ int mem_idx = cpu_mmu_index(env, false);
69
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
70
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
71
+
72
+ env->xregs[rs + 1] = int128_getlo(oldv);
73
+ env->xregs[rs] = int128_gethi(oldv);
74
+#endif
75
+}
76
+
77
/*
78
* AdvSIMD half-precision
79
*/
80
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
81
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
82
--- a/target/arm/translate-a64.c
38
--- a/target/arm/translate-neon.c.inc
83
+++ b/target/arm/translate-a64.c
39
+++ b/target/arm/translate-neon.c.inc
84
@@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
40
@@ -XXX,XX +XXX,XX @@ neon_element_offset(int reg, int element, MemOp size)
85
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
41
ofs ^= 8 - element_size;
42
}
43
#endif
44
- return neon_reg_offset(reg, 0) + ofs;
45
+ return neon_full_reg_offset(reg) + ofs;
86
}
46
}
87
47
88
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
48
static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
89
+ int rn, int size)
49
@@ -XXX,XX +XXX,XX @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
90
+{
50
* We cannot write 16 bytes at once because the
91
+ TCGv_i64 tcg_rs = cpu_reg(s, rs);
51
* destination is unaligned.
92
+ TCGv_i64 tcg_rt = cpu_reg(s, rt);
52
*/
93
+ int memidx = get_mem_index(s);
53
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
94
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
54
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
95
+
55
8, 8, tmp);
96
+ if (rn == 31) {
56
- tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
97
+ gen_check_sp_alignment(s);
57
- neon_reg_offset(vd, 0), 8, 8);
98
+ }
58
+ tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
99
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
59
+ neon_full_reg_offset(vd), 8, 8);
100
+ size | MO_ALIGN | s->be_data);
60
} else {
101
+}
61
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
102
+
62
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
103
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
63
vec_size, vec_size, tmp);
104
+ int rn, int size)
105
+{
106
+ TCGv_i64 s1 = cpu_reg(s, rs);
107
+ TCGv_i64 s2 = cpu_reg(s, rs + 1);
108
+ TCGv_i64 t1 = cpu_reg(s, rt);
109
+ TCGv_i64 t2 = cpu_reg(s, rt + 1);
110
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
111
+ int memidx = get_mem_index(s);
112
+
113
+ if (rn == 31) {
114
+ gen_check_sp_alignment(s);
115
+ }
116
+
117
+ if (size == 2) {
118
+ TCGv_i64 cmp = tcg_temp_new_i64();
119
+ TCGv_i64 val = tcg_temp_new_i64();
120
+
121
+ if (s->be_data == MO_LE) {
122
+ tcg_gen_concat32_i64(val, t1, t2);
123
+ tcg_gen_concat32_i64(cmp, s1, s2);
124
+ } else {
125
+ tcg_gen_concat32_i64(val, t2, t1);
126
+ tcg_gen_concat32_i64(cmp, s2, s1);
127
+ }
128
+
129
+ tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
130
+ MO_64 | MO_ALIGN | s->be_data);
131
+ tcg_temp_free_i64(val);
132
+
133
+ if (s->be_data == MO_LE) {
134
+ tcg_gen_extr32_i64(s1, s2, cmp);
135
+ } else {
136
+ tcg_gen_extr32_i64(s2, s1, cmp);
137
+ }
138
+ tcg_temp_free_i64(cmp);
139
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
140
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
141
+
142
+ if (s->be_data == MO_LE) {
143
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
144
+ } else {
145
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
146
+ }
147
+ tcg_temp_free_i32(tcg_rs);
148
+ } else {
149
+ TCGv_i64 d1 = tcg_temp_new_i64();
150
+ TCGv_i64 d2 = tcg_temp_new_i64();
151
+ TCGv_i64 a2 = tcg_temp_new_i64();
152
+ TCGv_i64 c1 = tcg_temp_new_i64();
153
+ TCGv_i64 c2 = tcg_temp_new_i64();
154
+ TCGv_i64 zero = tcg_const_i64(0);
155
+
156
+ /* Load the two words, in memory order. */
157
+ tcg_gen_qemu_ld_i64(d1, addr, memidx,
158
+ MO_64 | MO_ALIGN_16 | s->be_data);
159
+ tcg_gen_addi_i64(a2, addr, 8);
160
+ tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
161
+
162
+ /* Compare the two words, also in memory order. */
163
+ tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
164
+ tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
165
+ tcg_gen_and_i64(c2, c2, c1);
166
+
167
+ /* If compare equal, write back new data, else write back old data. */
168
+ tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
169
+ tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
170
+ tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
171
+ tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
172
+ tcg_temp_free_i64(a2);
173
+ tcg_temp_free_i64(c1);
174
+ tcg_temp_free_i64(c2);
175
+ tcg_temp_free_i64(zero);
176
+
177
+ /* Write back the data from memory to Rs. */
178
+ tcg_gen_mov_i64(s1, d1);
179
+ tcg_gen_mov_i64(s2, d2);
180
+ tcg_temp_free_i64(d1);
181
+ tcg_temp_free_i64(d2);
182
+ }
183
+}
184
+
185
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
186
* from the ARMv8 specs for LDR (Shared decode for all encodings).
187
*/
188
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
189
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
190
return;
191
}
64
}
192
- /* CASP / CASPL */
65
tcg_gen_addi_i32(addr, addr, 1 << size);
193
+ if (rt2 == 31
66
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
194
+ && ((rt | rs) & 1) == 0
67
static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
195
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
68
{
196
+ /* CASP / CASPL */
69
int vec_size = a->q ? 16 : 8;
197
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
70
- int rd_ofs = neon_reg_offset(a->vd, 0);
198
+ return;
71
- int rn_ofs = neon_reg_offset(a->vn, 0);
199
+ }
72
- int rm_ofs = neon_reg_offset(a->vm, 0);
200
break;
73
+ int rd_ofs = neon_full_reg_offset(a->vd);
201
74
+ int rn_ofs = neon_full_reg_offset(a->vn);
202
- case 0x6: case 0x7: /* CASP / LDXP */
75
+ int rm_ofs = neon_full_reg_offset(a->vm);
203
+ case 0x6: case 0x7: /* CASPA / LDXP */
76
204
if (size & 2) { /* LDXP / LDAXP */
77
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
205
if (rn == 31) {
78
return false;
206
gen_check_sp_alignment(s);
79
@@ -XXX,XX +XXX,XX @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
207
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
80
{
208
}
81
/* Handle a 2-reg-shift insn which can be vectorized. */
209
return;
82
int vec_size = a->q ? 16 : 8;
210
}
83
- int rd_ofs = neon_reg_offset(a->vd, 0);
211
- /* CASPA / CASPAL */
84
- int rm_ofs = neon_reg_offset(a->vm, 0);
212
+ if (rt2 == 31
85
+ int rd_ofs = neon_full_reg_offset(a->vd);
213
+ && ((rt | rs) & 1) == 0
86
+ int rm_ofs = neon_full_reg_offset(a->vm);
214
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
87
215
+ /* CASPA / CASPAL */
88
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
216
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
89
return false;
217
+ return;
90
@@ -XXX,XX +XXX,XX @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
218
+ }
91
{
219
break;
92
/* FP operations in 2-reg-and-shift group */
220
93
int vec_size = a->q ? 16 : 8;
221
case 0xa: /* CAS */
94
- int rd_ofs = neon_reg_offset(a->vd, 0);
222
case 0xb: /* CASL */
95
- int rm_ofs = neon_reg_offset(a->vm, 0);
223
case 0xe: /* CASA */
96
+ int rd_ofs = neon_full_reg_offset(a->vd);
224
case 0xf: /* CASAL */
97
+ int rm_ofs = neon_full_reg_offset(a->vm);
225
+ if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
98
TCGv_ptr fpst;
226
+ gen_compare_and_swap(s, rs, rt, rn, size);
99
227
+ return;
100
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
228
+ }
101
@@ -XXX,XX +XXX,XX @@ static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
229
break;
102
return true;
230
}
103
}
231
unallocated_encoding(s);
104
105
- reg_ofs = neon_reg_offset(a->vd, 0);
106
+ reg_ofs = neon_full_reg_offset(a->vd);
107
vec_size = a->q ? 16 : 8;
108
imm = asimd_imm_const(a->imm, a->cmode, a->op);
109
110
@@ -XXX,XX +XXX,XX @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
111
return true;
112
}
113
114
- tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
115
- neon_reg_offset(a->vn, 0),
116
- neon_reg_offset(a->vm, 0),
117
+ tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
118
+ neon_full_reg_offset(a->vn),
119
+ neon_full_reg_offset(a->vm),
120
16, 16, 0, fn_gvec);
121
return true;
122
}
123
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
124
{
125
/* Two registers and a scalar, using gvec */
126
int vec_size = a->q ? 16 : 8;
127
- int rd_ofs = neon_reg_offset(a->vd, 0);
128
- int rn_ofs = neon_reg_offset(a->vn, 0);
129
+ int rd_ofs = neon_full_reg_offset(a->vd);
130
+ int rn_ofs = neon_full_reg_offset(a->vn);
131
int rm_ofs;
132
int idx;
133
TCGv_ptr fpstatus;
134
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
135
/* a->vm is M:Vm, which encodes both register and index */
136
idx = extract32(a->vm, a->size + 2, 2);
137
a->vm = extract32(a->vm, 0, a->size + 2);
138
- rm_ofs = neon_reg_offset(a->vm, 0);
139
+ rm_ofs = neon_full_reg_offset(a->vm);
140
141
fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
142
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
143
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
144
return true;
145
}
146
147
- tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
148
+ tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
149
neon_element_offset(a->vm, a->index, a->size),
150
a->q ? 16 : 8, a->q ? 16 : 8);
151
return true;
152
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
153
static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
154
{
155
int vec_size = a->q ? 16 : 8;
156
- int rd_ofs = neon_reg_offset(a->vd, 0);
157
- int rm_ofs = neon_reg_offset(a->vm, 0);
158
+ int rd_ofs = neon_full_reg_offset(a->vd);
159
+ int rm_ofs = neon_full_reg_offset(a->vm);
160
161
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
162
return false;
163
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
164
index XXXXXXX..XXXXXXX 100644
165
--- a/target/arm/translate-vfp.c.inc
166
+++ b/target/arm/translate-vfp.c.inc
167
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
168
}
169
170
tmp = load_reg(s, a->rt);
171
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
172
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
173
vec_size, vec_size, tmp);
174
tcg_temp_free_i32(tmp);
175
232
--
176
--
233
2.17.0
177
2.20.1
234
178
235
179
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
3
This will shortly have users outside of translate-neon.c.inc.
4
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20180508151437.4232-6-richard.henderson@linaro.org
6
Message-id: 20201030022618.785675-3-richard.henderson@linaro.org
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
---
9
---
9
accel/tcg/atomic_template.h | 49 ++++++-------------------------------
10
target/arm/translate.c | 20 ++++++++++++++++++++
10
1 file changed, 7 insertions(+), 42 deletions(-)
11
target/arm/translate-neon.c.inc | 19 -------------------
12
2 files changed, 20 insertions(+), 19 deletions(-)
11
13
12
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
14
diff --git a/target/arm/translate.c b/target/arm/translate.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/atomic_template.h
16
--- a/target/arm/translate.c
15
+++ b/accel/tcg/atomic_template.h
17
+++ b/target/arm/translate.c
16
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
18
@@ -XXX,XX +XXX,XX @@ static long neon_full_reg_offset(unsigned reg)
17
19
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
18
#undef GEN_ATOMIC_HELPER
20
}
19
21
20
-/* Note that for addition, we need to use a separate cmpxchg loop instead
22
+/*
21
- of bswaps for the reverse-host-endian helpers. */
23
+ * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
22
-ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
24
+ * where 0 is the least significant end of the register.
23
- ABI_TYPE val EXTRA_ARGS)
25
+ */
26
+static long neon_element_offset(int reg, int element, MemOp size)
27
+{
28
+ int element_size = 1 << size;
29
+ int ofs = element * element_size;
30
+#ifdef HOST_WORDS_BIGENDIAN
31
+ /*
32
+ * Calculate the offset assuming fully little-endian,
33
+ * then XOR to account for the order of the 8-byte units.
34
+ */
35
+ if (element_size < 8) {
36
+ ofs ^= 8 - element_size;
37
+ }
38
+#endif
39
+ return neon_full_reg_offset(reg) + ofs;
40
+}
41
+
42
static inline long vfp_reg_offset(bool dp, unsigned reg)
43
{
44
if (dp) {
45
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/target/arm/translate-neon.c.inc
48
+++ b/target/arm/translate-neon.c.inc
49
@@ -XXX,XX +XXX,XX @@ static inline int neon_3same_fp_size(DisasContext *s, int x)
50
#include "decode-neon-ls.c.inc"
51
#include "decode-neon-shared.c.inc"
52
53
-/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
54
- * where 0 is the least significant end of the register.
55
- */
56
-static inline long
57
-neon_element_offset(int reg, int element, MemOp size)
24
-{
58
-{
25
- ATOMIC_MMU_DECLS;
59
- int element_size = 1 << size;
26
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
60
- int ofs = element * element_size;
27
- DATA_TYPE ldo, ldn, ret, sto;
61
-#ifdef HOST_WORDS_BIGENDIAN
28
-
62
- /* Calculate the offset assuming fully little-endian,
29
- ldo = atomic_read__nocheck(haddr);
63
- * then XOR to account for the order of the 8-byte units.
30
- while (1) {
64
- */
31
- ret = BSWAP(ldo);
65
- if (element_size < 8) {
32
- sto = BSWAP(ret + val);
66
- ofs ^= 8 - element_size;
33
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
34
- if (ldn == ldo) {
35
- ATOMIC_MMU_CLEANUP;
36
- return ret;
37
- }
38
- ldo = ldn;
39
- }
67
- }
68
-#endif
69
- return neon_full_reg_offset(reg) + ofs;
40
-}
70
-}
41
-
71
-
42
-ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
72
static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
43
- ABI_TYPE val EXTRA_ARGS)
73
{
44
-{
74
long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
45
- ATOMIC_MMU_DECLS;
46
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
47
- DATA_TYPE ldo, ldn, ret, sto;
48
-
49
- ldo = atomic_read__nocheck(haddr);
50
- while (1) {
51
- ret = BSWAP(ldo) + val;
52
- sto = BSWAP(ret);
53
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
54
- if (ldn == ldo) {
55
- ATOMIC_MMU_CLEANUP;
56
- return ret;
57
- }
58
- ldo = ldn;
59
- }
60
-}
61
-
62
/* These helpers are, as a whole, full barriers. Within the helper,
63
* the leading barrier is explicit and the trailing barrier is within
64
* cmpxchg primitive.
65
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
66
GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
67
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
68
69
+/* Note that for addition, we need to use a separate cmpxchg loop instead
70
+ of bswaps for the reverse-host-endian helpers. */
71
+#define ADD(X, Y) (X + Y)
72
+GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old)
73
+GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
74
+#undef ADD
75
+
76
#undef GEN_ATOMIC_HELPER_FN
77
#endif /* DATA_SIZE >= 16 */
78
79
--
75
--
80
2.17.0
76
2.20.1
81
77
82
78
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The insns in the ARMv8.1-Atomics are added to the existing
3
These are the only users of neon_reg_offset, so remove that.
4
load/store exclusive and load/store reg opcode spaces.
5
Rearrange the top-level decoders for these to accomodate.
6
The Atomics insns themselves still generate Unallocated.
7
4
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20180508151437.4232-8-richard.henderson@linaro.org
6
Message-id: 20201030022618.785675-4-richard.henderson@linaro.org
10
[PMM: Drop the ARM_FEATURE_V8_1 feature flag]
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/cpu.h | 1 +
10
target/arm/translate.c | 14 ++------------
15
linux-user/elfload.c | 1 +
11
1 file changed, 2 insertions(+), 12 deletions(-)
16
target/arm/translate-a64.c | 182 +++++++++++++++++++++++++++----------
17
3 files changed, 138 insertions(+), 46 deletions(-)
18
12
19
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
13
diff --git a/target/arm/translate.c b/target/arm/translate.c
20
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/cpu.h
15
--- a/target/arm/translate.c
22
+++ b/target/arm/cpu.h
16
+++ b/target/arm/translate.c
23
@@ -XXX,XX +XXX,XX @@ enum arm_features {
17
@@ -XXX,XX +XXX,XX @@ static inline long vfp_reg_offset(bool dp, unsigned reg)
24
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
25
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
26
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
27
+ ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
28
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
29
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
30
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
31
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/linux-user/elfload.c
34
+++ b/linux-user/elfload.c
35
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap(void)
36
GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
37
GET_FEATURE(ARM_FEATURE_V8_FP16,
38
ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
39
+ GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS);
40
GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM);
41
GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA);
42
#undef GET_FEATURE
43
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-a64.c
46
+++ b/target/arm/translate-a64.c
47
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
48
int rt = extract32(insn, 0, 5);
49
int rn = extract32(insn, 5, 5);
50
int rt2 = extract32(insn, 10, 5);
51
- int is_lasr = extract32(insn, 15, 1);
52
int rs = extract32(insn, 16, 5);
53
- int is_pair = extract32(insn, 21, 1);
54
- int is_store = !extract32(insn, 22, 1);
55
- int is_excl = !extract32(insn, 23, 1);
56
+ int is_lasr = extract32(insn, 15, 1);
57
+ int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
58
int size = extract32(insn, 30, 2);
59
TCGv_i64 tcg_addr;
60
61
- if ((!is_excl && !is_pair && !is_lasr) ||
62
- (!is_excl && is_pair) ||
63
- (is_pair && size < 2)) {
64
- unallocated_encoding(s);
65
+ switch (o2_L_o1_o0) {
66
+ case 0x0: /* STXR */
67
+ case 0x1: /* STLXR */
68
+ if (rn == 31) {
69
+ gen_check_sp_alignment(s);
70
+ }
71
+ if (is_lasr) {
72
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
73
+ }
74
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
75
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
76
return;
77
- }
78
79
- if (rn == 31) {
80
- gen_check_sp_alignment(s);
81
- }
82
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
83
-
84
- /* Note that since TCG is single threaded load-acquire/store-release
85
- * semantics require no extra if (is_lasr) { ... } handling.
86
- */
87
-
88
- if (is_excl) {
89
- if (!is_store) {
90
- s->is_ldex = true;
91
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
92
- if (is_lasr) {
93
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
94
- }
95
- } else {
96
- if (is_lasr) {
97
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
98
- }
99
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
100
+ case 0x4: /* LDXR */
101
+ case 0x5: /* LDAXR */
102
+ if (rn == 31) {
103
+ gen_check_sp_alignment(s);
104
}
105
- } else {
106
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
107
- bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
108
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
109
+ s->is_ldex = true;
110
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
111
+ if (is_lasr) {
112
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
113
+ }
114
+ return;
115
116
+ case 0x9: /* STLR */
117
/* Generate ISS for non-exclusive accesses including LASR. */
118
- if (is_store) {
119
+ if (rn == 31) {
120
+ gen_check_sp_alignment(s);
121
+ }
122
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
123
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
124
+ do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
125
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
126
+ return;
127
+
128
+ case 0xd: /* LDAR */
129
+ /* Generate ISS for non-exclusive accesses including LASR. */
130
+ if (rn == 31) {
131
+ gen_check_sp_alignment(s);
132
+ }
133
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
134
+ do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
135
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
136
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
137
+ return;
138
+
139
+ case 0x2: case 0x3: /* CASP / STXP */
140
+ if (size & 2) { /* STXP / STLXP */
141
+ if (rn == 31) {
142
+ gen_check_sp_alignment(s);
143
+ }
144
if (is_lasr) {
145
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
146
}
147
- do_gpr_st(s, tcg_rt, tcg_addr, size,
148
- true, rt, iss_sf, is_lasr);
149
- } else {
150
- do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
151
- true, rt, iss_sf, is_lasr);
152
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
153
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
154
+ return;
155
+ }
156
+ /* CASP / CASPL */
157
+ break;
158
+
159
+ case 0x6: case 0x7: /* CASP / LDXP */
160
+ if (size & 2) { /* LDXP / LDAXP */
161
+ if (rn == 31) {
162
+ gen_check_sp_alignment(s);
163
+ }
164
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
165
+ s->is_ldex = true;
166
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
167
if (is_lasr) {
168
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
169
}
170
+ return;
171
}
172
+ /* CASPA / CASPAL */
173
+ break;
174
+
175
+ case 0xa: /* CAS */
176
+ case 0xb: /* CASL */
177
+ case 0xe: /* CASA */
178
+ case 0xf: /* CASAL */
179
+ break;
180
}
181
+ unallocated_encoding(s);
182
}
183
184
/*
185
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
186
}
18
}
187
}
19
}
188
20
189
+/* Atomic memory operations
21
-/* Return the offset of a 32-bit piece of a NEON register.
190
+ *
22
- zero is the least significant end of the register. */
191
+ * 31 30 27 26 24 22 21 16 15 12 10 5 0
23
-static inline long
192
+ * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
24
-neon_reg_offset (int reg, int n)
193
+ * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
25
-{
194
+ * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
26
- int sreg;
195
+ *
27
- sreg = reg * 2 + n;
196
+ * Rt: the result register
28
- return vfp_reg_offset(0, sreg);
197
+ * Rn: base address or SP
29
-}
198
+ * Rs: the source register for the operation
30
-
199
+ * V: vector flag (always 0 as of v8.3)
31
static TCGv_i32 neon_load_reg(int reg, int pass)
200
+ * A: acquire flag
201
+ * R: release flag
202
+ */
203
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
204
+ int size, int rt, bool is_vector)
205
+{
206
+ int rs = extract32(insn, 16, 5);
207
+ int rn = extract32(insn, 5, 5);
208
+ int o3_opc = extract32(insn, 12, 4);
209
+ int feature = ARM_FEATURE_V8_ATOMICS;
210
+
211
+ if (is_vector) {
212
+ unallocated_encoding(s);
213
+ return;
214
+ }
215
+ switch (o3_opc) {
216
+ case 000: /* LDADD */
217
+ case 001: /* LDCLR */
218
+ case 002: /* LDEOR */
219
+ case 003: /* LDSET */
220
+ case 004: /* LDSMAX */
221
+ case 005: /* LDSMIN */
222
+ case 006: /* LDUMAX */
223
+ case 007: /* LDUMIN */
224
+ case 010: /* SWP */
225
+ default:
226
+ unallocated_encoding(s);
227
+ return;
228
+ }
229
+ if (!arm_dc_feature(s, feature)) {
230
+ unallocated_encoding(s);
231
+ return;
232
+ }
233
+
234
+ (void)rs;
235
+ (void)rn;
236
+}
237
+
238
/* Load/store register (all forms) */
239
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
240
{
32
{
241
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
33
TCGv_i32 tmp = tcg_temp_new_i32();
242
34
- tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
243
switch (extract32(insn, 24, 2)) {
35
+ tcg_gen_ld_i32(tmp, cpu_env, neon_element_offset(reg, pass, MO_32));
244
case 0:
36
return tmp;
245
- if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
246
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
247
- } else {
248
+ if (extract32(insn, 21, 1) == 0) {
249
/* Load/store register (unscaled immediate)
250
* Load/store immediate pre/post-indexed
251
* Load/store register unprivileged
252
*/
253
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
254
+ return;
255
+ }
256
+ switch (extract32(insn, 10, 2)) {
257
+ case 0:
258
+ disas_ldst_atomic(s, insn, size, rt, is_vector);
259
+ return;
260
+ case 2:
261
+ disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
262
+ return;
263
}
264
break;
265
case 1:
266
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
267
- break;
268
- default:
269
- unallocated_encoding(s);
270
- break;
271
+ return;
272
}
273
+ unallocated_encoding(s);
274
}
37
}
275
38
276
/* AdvSIMD load/store multiple structures
39
static void neon_store_reg(int reg, int pass, TCGv_i32 var)
40
{
41
- tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
42
+ tcg_gen_st_i32(var, cpu_env, neon_element_offset(reg, pass, MO_32));
43
tcg_temp_free_i32(var);
44
}
45
277
--
46
--
278
2.17.0
47
2.20.1
279
48
280
49
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
While we have some of the scalar paths for *CVF for fp16,
3
This seems a bit more readable than using offsetof CPU_DoubleU.
4
we failed to decode the fp16 version of these instructions.
5
4
6
Cc: qemu-stable@nongnu.org
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20180502221552.3873-2-richard.henderson@linaro.org
6
Message-id: 20201030022618.785675-5-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
9
---
12
target/arm/translate-a64.c | 33 ++++++++++++++++++++-------------
10
target/arm/translate.c | 13 ++++---------
13
1 file changed, 20 insertions(+), 13 deletions(-)
11
1 file changed, 4 insertions(+), 9 deletions(-)
14
12
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
13
diff --git a/target/arm/translate.c b/target/arm/translate.c
16
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
15
--- a/target/arm/translate.c
18
+++ b/target/arm/translate-a64.c
16
+++ b/target/arm/translate.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
17
@@ -XXX,XX +XXX,XX @@ static long neon_element_offset(int reg, int element, MemOp size)
20
int immh, int immb, int opcode,
18
return neon_full_reg_offset(reg) + ofs;
21
int rn, int rd)
19
}
20
21
-static inline long vfp_reg_offset(bool dp, unsigned reg)
22
+/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
23
+static long vfp_reg_offset(bool dp, unsigned reg)
22
{
24
{
23
- bool is_double = extract32(immh, 3, 1);
25
if (dp) {
24
- int size = is_double ? MO_64 : MO_32;
26
- return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
25
- int elements;
27
+ return neon_element_offset(reg, 0, MO_64);
26
+ int size, elements, fracbits;
28
} else {
27
int immhb = immh << 3 | immb;
29
- long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
28
- int fracbits = (is_double ? 128 : 64) - immhb;
30
- if (reg & 1) {
29
31
- ofs += offsetof(CPU_DoubleU, l.upper);
30
- if (!extract32(immh, 2, 2)) {
32
- } else {
31
+ if (immh & 8) {
33
- ofs += offsetof(CPU_DoubleU, l.lower);
32
+ size = MO_64;
34
- }
33
+ if (!is_scalar && !is_q) {
35
- return ofs;
34
+ unallocated_encoding(s);
36
+ return neon_element_offset(reg >> 1, reg & 1, MO_32);
35
+ return;
36
+ }
37
+ } else if (immh & 4) {
38
+ size = MO_32;
39
+ } else if (immh & 2) {
40
+ size = MO_16;
41
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
42
+ unallocated_encoding(s);
43
+ return;
44
+ }
45
+ } else {
46
+ /* immh == 0 would be a failure of the decode logic */
47
+ g_assert(immh == 1);
48
unallocated_encoding(s);
49
return;
50
}
37
}
51
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
52
if (is_scalar) {
53
elements = 1;
54
} else {
55
- elements = is_double ? 2 : is_q ? 4 : 2;
56
- if (is_double && !is_q) {
57
- unallocated_encoding(s);
58
- return;
59
- }
60
+ elements = (8 << is_q) >> size;
61
}
62
+ fracbits = (16 << size) - immhb;
63
64
if (!fp_access_check(s)) {
65
return;
66
}
67
68
- /* immh == 0 would be a failure of the decode logic */
69
- g_assert(immh);
70
-
71
handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
72
}
38
}
73
39
74
--
40
--
75
2.17.0
41
2.20.1
76
42
77
43
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The generic expanders replace nearly identical code in the translator.
3
Model these off the aa64 read/write_vec_element functions.
4
Use it within translate-neon.c.inc. The new functions do
5
not allocate or free temps, so this rearranges the calling
6
code a bit.
4
7
5
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20180508151437.4232-4-richard.henderson@linaro.org
9
Message-id: 20201030022618.785675-6-richard.henderson@linaro.org
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
12
---
10
target/xtensa/translate.c | 50 ++++++++++++++++++++++++++-------------
13
target/arm/translate.c | 26 ++++
11
1 file changed, 33 insertions(+), 17 deletions(-)
14
target/arm/translate-neon.c.inc | 256 ++++++++++++++++++++------------
15
2 files changed, 183 insertions(+), 99 deletions(-)
12
16
13
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
17
diff --git a/target/arm/translate.c b/target/arm/translate.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/xtensa/translate.c
19
--- a/target/arm/translate.c
16
+++ b/target/xtensa/translate.c
20
+++ b/target/arm/translate.c
17
@@ -XXX,XX +XXX,XX @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[],
21
@@ -XXX,XX +XXX,XX @@ static inline void neon_store_reg32(TCGv_i32 var, int reg)
18
TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]);
22
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
19
TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1);
23
}
20
24
21
- tcg_gen_movcond_i32(TCG_COND_GT, tmp1,
25
+static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp size)
22
- cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1);
26
+{
23
- tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]],
27
+ long off = neon_element_offset(reg, ele, size);
24
- tmp1, tmp2, tmp1, tmp2);
28
+
25
+ tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]);
29
+ switch (size) {
26
+ tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2);
30
+ case MO_32:
27
tcg_temp_free(tmp1);
31
+ tcg_gen_ld_i32(dest, cpu_env, off);
28
tcg_temp_free(tmp2);
32
+ break;
29
}
33
+ default:
30
@@ -XXX,XX +XXX,XX @@ static void translate_memw(DisasContext *dc, const uint32_t arg[],
34
+ g_assert_not_reached();
31
tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
32
}
33
34
-static void translate_minmax(DisasContext *dc, const uint32_t arg[],
35
- const uint32_t par[])
36
+static void translate_smin(DisasContext *dc, const uint32_t arg[],
37
+ const uint32_t par[])
38
{
39
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
40
- tcg_gen_movcond_i32(par[0], cpu_R[arg[0]],
41
- cpu_R[arg[1]], cpu_R[arg[2]],
42
- cpu_R[arg[1]], cpu_R[arg[2]]);
43
+ tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
44
+ }
35
+ }
45
+}
36
+}
46
+
37
+
47
+static void translate_umin(DisasContext *dc, const uint32_t arg[],
38
+static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp size)
48
+ const uint32_t par[])
49
+{
39
+{
50
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
40
+ long off = neon_element_offset(reg, ele, size);
51
+ tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
41
+
42
+ switch (size) {
43
+ case MO_32:
44
+ tcg_gen_st_i32(src, cpu_env, off);
45
+ break;
46
+ default:
47
+ g_assert_not_reached();
52
+ }
48
+ }
53
+}
49
+}
54
+
50
+
55
+static void translate_smax(DisasContext *dc, const uint32_t arg[],
51
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
56
+ const uint32_t par[])
52
{
57
+{
53
TCGv_ptr ret = tcg_temp_new_ptr();
58
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
54
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
59
+ tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
55
index XXXXXXX..XXXXXXX 100644
60
+ }
56
--- a/target/arm/translate-neon.c.inc
61
+}
57
+++ b/target/arm/translate-neon.c.inc
62
+
58
@@ -XXX,XX +XXX,XX @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
63
+static void translate_umax(DisasContext *dc, const uint32_t arg[],
59
* early. Since Q is 0 there are always just two passes, so instead
64
+ const uint32_t par[])
60
* of a complicated loop over each pass we just unroll.
65
+{
61
*/
66
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
62
- tmp = neon_load_reg(a->vn, 0);
67
+ tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
63
- tmp2 = neon_load_reg(a->vn, 1);
68
}
64
+ tmp = tcg_temp_new_i32();
69
}
65
+ tmp2 = tcg_temp_new_i32();
70
66
+ tmp3 = tcg_temp_new_i32();
71
@@ -XXX,XX +XXX,XX @@ static const XtensaOpcodeOps core_ops[] = {
67
+
72
.par = (const uint32_t[]){TCG_COND_NE},
68
+ read_neon_element32(tmp, a->vn, 0, MO_32);
73
}, {
69
+ read_neon_element32(tmp2, a->vn, 1, MO_32);
74
.name = "max",
70
fn(tmp, tmp, tmp2);
75
- .translate = translate_minmax,
71
- tcg_temp_free_i32(tmp2);
76
- .par = (const uint32_t[]){TCG_COND_GE},
72
77
+ .translate = translate_smax,
73
- tmp3 = neon_load_reg(a->vm, 0);
78
}, {
74
- tmp2 = neon_load_reg(a->vm, 1);
79
.name = "maxu",
75
+ read_neon_element32(tmp3, a->vm, 0, MO_32);
80
- .translate = translate_minmax,
76
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
81
- .par = (const uint32_t[]){TCG_COND_GEU},
77
fn(tmp3, tmp3, tmp2);
82
+ .translate = translate_umax,
78
- tcg_temp_free_i32(tmp2);
83
}, {
79
84
.name = "memw",
80
- neon_store_reg(a->vd, 0, tmp);
85
.translate = translate_memw,
81
- neon_store_reg(a->vd, 1, tmp3);
86
}, {
82
+ write_neon_element32(tmp, a->vd, 0, MO_32);
87
.name = "min",
83
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
88
- .translate = translate_minmax,
84
+
89
- .par = (const uint32_t[]){TCG_COND_LT},
85
+ tcg_temp_free_i32(tmp);
90
+ .translate = translate_smin,
86
+ tcg_temp_free_i32(tmp2);
91
}, {
87
+ tcg_temp_free_i32(tmp3);
92
.name = "minu",
88
return true;
93
- .translate = translate_minmax,
89
}
94
- .par = (const uint32_t[]){TCG_COND_LTU},
90
95
+ .translate = translate_umin,
91
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
96
}, {
92
* 2-reg-and-shift operations, size < 3 case, where the
97
.name = "mov",
93
* helper needs to be passed cpu_env.
98
.translate = translate_mov,
94
*/
95
- TCGv_i32 constimm;
96
+ TCGv_i32 constimm, tmp;
97
int pass;
98
99
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
100
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
101
* by immediate using the variable shift operations.
102
*/
103
constimm = tcg_const_i32(dup_const(a->size, a->shift));
104
+ tmp = tcg_temp_new_i32();
105
106
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
107
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
108
+ read_neon_element32(tmp, a->vm, pass, MO_32);
109
fn(tmp, cpu_env, tmp, constimm);
110
- neon_store_reg(a->vd, pass, tmp);
111
+ write_neon_element32(tmp, a->vd, pass, MO_32);
112
}
113
+ tcg_temp_free_i32(tmp);
114
tcg_temp_free_i32(constimm);
115
return true;
116
}
117
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
118
constimm = tcg_const_i64(-a->shift);
119
rm1 = tcg_temp_new_i64();
120
rm2 = tcg_temp_new_i64();
121
+ rd = tcg_temp_new_i32();
122
123
/* Load both inputs first to avoid potential overwrite if rm == rd */
124
neon_load_reg64(rm1, a->vm);
125
neon_load_reg64(rm2, a->vm + 1);
126
127
shiftfn(rm1, rm1, constimm);
128
- rd = tcg_temp_new_i32();
129
narrowfn(rd, cpu_env, rm1);
130
- neon_store_reg(a->vd, 0, rd);
131
+ write_neon_element32(rd, a->vd, 0, MO_32);
132
133
shiftfn(rm2, rm2, constimm);
134
- rd = tcg_temp_new_i32();
135
narrowfn(rd, cpu_env, rm2);
136
- neon_store_reg(a->vd, 1, rd);
137
+ write_neon_element32(rd, a->vd, 1, MO_32);
138
139
+ tcg_temp_free_i32(rd);
140
tcg_temp_free_i64(rm1);
141
tcg_temp_free_i64(rm2);
142
tcg_temp_free_i64(constimm);
143
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
144
constimm = tcg_const_i32(imm);
145
146
/* Load all inputs first to avoid potential overwrite */
147
- rm1 = neon_load_reg(a->vm, 0);
148
- rm2 = neon_load_reg(a->vm, 1);
149
- rm3 = neon_load_reg(a->vm + 1, 0);
150
- rm4 = neon_load_reg(a->vm + 1, 1);
151
+ rm1 = tcg_temp_new_i32();
152
+ rm2 = tcg_temp_new_i32();
153
+ rm3 = tcg_temp_new_i32();
154
+ rm4 = tcg_temp_new_i32();
155
+ read_neon_element32(rm1, a->vm, 0, MO_32);
156
+ read_neon_element32(rm2, a->vm, 1, MO_32);
157
+ read_neon_element32(rm3, a->vm, 2, MO_32);
158
+ read_neon_element32(rm4, a->vm, 3, MO_32);
159
rtmp = tcg_temp_new_i64();
160
161
shiftfn(rm1, rm1, constimm);
162
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
163
tcg_temp_free_i32(rm2);
164
165
narrowfn(rm1, cpu_env, rtmp);
166
- neon_store_reg(a->vd, 0, rm1);
167
+ write_neon_element32(rm1, a->vd, 0, MO_32);
168
+ tcg_temp_free_i32(rm1);
169
170
shiftfn(rm3, rm3, constimm);
171
shiftfn(rm4, rm4, constimm);
172
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
173
174
narrowfn(rm3, cpu_env, rtmp);
175
tcg_temp_free_i64(rtmp);
176
- neon_store_reg(a->vd, 1, rm3);
177
+ write_neon_element32(rm3, a->vd, 1, MO_32);
178
+ tcg_temp_free_i32(rm3);
179
return true;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
183
widen_mask = dup_const(a->size + 1, widen_mask);
184
}
185
186
- rm0 = neon_load_reg(a->vm, 0);
187
- rm1 = neon_load_reg(a->vm, 1);
188
+ rm0 = tcg_temp_new_i32();
189
+ rm1 = tcg_temp_new_i32();
190
+ read_neon_element32(rm0, a->vm, 0, MO_32);
191
+ read_neon_element32(rm1, a->vm, 1, MO_32);
192
tmp = tcg_temp_new_i64();
193
194
widenfn(tmp, rm0);
195
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
196
if (src1_wide) {
197
neon_load_reg64(rn0_64, a->vn);
198
} else {
199
- TCGv_i32 tmp = neon_load_reg(a->vn, 0);
200
+ TCGv_i32 tmp = tcg_temp_new_i32();
201
+ read_neon_element32(tmp, a->vn, 0, MO_32);
202
widenfn(rn0_64, tmp);
203
tcg_temp_free_i32(tmp);
204
}
205
- rm = neon_load_reg(a->vm, 0);
206
+ rm = tcg_temp_new_i32();
207
+ read_neon_element32(rm, a->vm, 0, MO_32);
208
209
widenfn(rm_64, rm);
210
tcg_temp_free_i32(rm);
211
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
212
if (src1_wide) {
213
neon_load_reg64(rn1_64, a->vn + 1);
214
} else {
215
- TCGv_i32 tmp = neon_load_reg(a->vn, 1);
216
+ TCGv_i32 tmp = tcg_temp_new_i32();
217
+ read_neon_element32(tmp, a->vn, 1, MO_32);
218
widenfn(rn1_64, tmp);
219
tcg_temp_free_i32(tmp);
220
}
221
- rm = neon_load_reg(a->vm, 1);
222
+ rm = tcg_temp_new_i32();
223
+ read_neon_element32(rm, a->vm, 1, MO_32);
224
225
neon_store_reg64(rn0_64, a->vd);
226
227
@@ -XXX,XX +XXX,XX @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
228
229
narrowfn(rd1, rn_64);
230
231
- neon_store_reg(a->vd, 0, rd0);
232
- neon_store_reg(a->vd, 1, rd1);
233
+ write_neon_element32(rd0, a->vd, 0, MO_32);
234
+ write_neon_element32(rd1, a->vd, 1, MO_32);
235
236
+ tcg_temp_free_i32(rd0);
237
+ tcg_temp_free_i32(rd1);
238
tcg_temp_free_i64(rn_64);
239
tcg_temp_free_i64(rm_64);
240
241
@@ -XXX,XX +XXX,XX @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
242
rd0 = tcg_temp_new_i64();
243
rd1 = tcg_temp_new_i64();
244
245
- rn = neon_load_reg(a->vn, 0);
246
- rm = neon_load_reg(a->vm, 0);
247
+ rn = tcg_temp_new_i32();
248
+ rm = tcg_temp_new_i32();
249
+ read_neon_element32(rn, a->vn, 0, MO_32);
250
+ read_neon_element32(rm, a->vm, 0, MO_32);
251
opfn(rd0, rn, rm);
252
- tcg_temp_free_i32(rn);
253
- tcg_temp_free_i32(rm);
254
255
- rn = neon_load_reg(a->vn, 1);
256
- rm = neon_load_reg(a->vm, 1);
257
+ read_neon_element32(rn, a->vn, 1, MO_32);
258
+ read_neon_element32(rm, a->vm, 1, MO_32);
259
opfn(rd1, rn, rm);
260
tcg_temp_free_i32(rn);
261
tcg_temp_free_i32(rm);
262
@@ -XXX,XX +XXX,XX @@ static void gen_neon_dup_high16(TCGv_i32 var)
263
264
static inline TCGv_i32 neon_get_scalar(int size, int reg)
265
{
266
- TCGv_i32 tmp;
267
- if (size == 1) {
268
- tmp = neon_load_reg(reg & 7, reg >> 4);
269
+ TCGv_i32 tmp = tcg_temp_new_i32();
270
+ if (size == MO_16) {
271
+ read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
272
if (reg & 8) {
273
gen_neon_dup_high16(tmp);
274
} else {
275
gen_neon_dup_low16(tmp);
276
}
277
} else {
278
- tmp = neon_load_reg(reg & 15, reg >> 4);
279
+ read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
280
}
281
return tmp;
282
}
283
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
284
* perform an accumulation operation of that result into the
285
* destination.
286
*/
287
- TCGv_i32 scalar;
288
+ TCGv_i32 scalar, tmp;
289
int pass;
290
291
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
292
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
293
}
294
295
scalar = neon_get_scalar(a->size, a->vm);
296
+ tmp = tcg_temp_new_i32();
297
298
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
299
- TCGv_i32 tmp = neon_load_reg(a->vn, pass);
300
+ read_neon_element32(tmp, a->vn, pass, MO_32);
301
opfn(tmp, tmp, scalar);
302
if (accfn) {
303
- TCGv_i32 rd = neon_load_reg(a->vd, pass);
304
+ TCGv_i32 rd = tcg_temp_new_i32();
305
+ read_neon_element32(rd, a->vd, pass, MO_32);
306
accfn(tmp, rd, tmp);
307
tcg_temp_free_i32(rd);
308
}
309
- neon_store_reg(a->vd, pass, tmp);
310
+ write_neon_element32(tmp, a->vd, pass, MO_32);
311
}
312
+ tcg_temp_free_i32(tmp);
313
tcg_temp_free_i32(scalar);
314
return true;
315
}
316
@@ -XXX,XX +XXX,XX @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
317
* performs a kind of fused op-then-accumulate using a helper
318
* function that takes all of rd, rn and the scalar at once.
319
*/
320
- TCGv_i32 scalar;
321
+ TCGv_i32 scalar, rn, rd;
322
int pass;
323
324
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
325
@@ -XXX,XX +XXX,XX @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
326
}
327
328
scalar = neon_get_scalar(a->size, a->vm);
329
+ rn = tcg_temp_new_i32();
330
+ rd = tcg_temp_new_i32();
331
332
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
333
- TCGv_i32 rn = neon_load_reg(a->vn, pass);
334
- TCGv_i32 rd = neon_load_reg(a->vd, pass);
335
+ read_neon_element32(rn, a->vn, pass, MO_32);
336
+ read_neon_element32(rd, a->vd, pass, MO_32);
337
opfn(rd, cpu_env, rn, scalar, rd);
338
- tcg_temp_free_i32(rn);
339
- neon_store_reg(a->vd, pass, rd);
340
+ write_neon_element32(rd, a->vd, pass, MO_32);
341
}
342
+ tcg_temp_free_i32(rn);
343
+ tcg_temp_free_i32(rd);
344
tcg_temp_free_i32(scalar);
345
346
return true;
347
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
348
scalar = neon_get_scalar(a->size, a->vm);
349
350
/* Load all inputs before writing any outputs, in case of overlap */
351
- rn = neon_load_reg(a->vn, 0);
352
+ rn = tcg_temp_new_i32();
353
+ read_neon_element32(rn, a->vn, 0, MO_32);
354
rn0_64 = tcg_temp_new_i64();
355
opfn(rn0_64, rn, scalar);
356
- tcg_temp_free_i32(rn);
357
358
- rn = neon_load_reg(a->vn, 1);
359
+ read_neon_element32(rn, a->vn, 1, MO_32);
360
rn1_64 = tcg_temp_new_i64();
361
opfn(rn1_64, rn, scalar);
362
tcg_temp_free_i32(rn);
363
@@ -XXX,XX +XXX,XX @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
364
return false;
365
}
366
n <<= 3;
367
+ tmp = tcg_temp_new_i32();
368
if (a->op) {
369
- tmp = neon_load_reg(a->vd, 0);
370
+ read_neon_element32(tmp, a->vd, 0, MO_32);
371
} else {
372
- tmp = tcg_temp_new_i32();
373
tcg_gen_movi_i32(tmp, 0);
374
}
375
- tmp2 = neon_load_reg(a->vm, 0);
376
+ tmp2 = tcg_temp_new_i32();
377
+ read_neon_element32(tmp2, a->vm, 0, MO_32);
378
ptr1 = vfp_reg_ptr(true, a->vn);
379
tmp4 = tcg_const_i32(n);
380
gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
381
- tcg_temp_free_i32(tmp);
382
+
383
if (a->op) {
384
- tmp = neon_load_reg(a->vd, 1);
385
+ read_neon_element32(tmp, a->vd, 1, MO_32);
386
} else {
387
- tmp = tcg_temp_new_i32();
388
tcg_gen_movi_i32(tmp, 0);
389
}
390
- tmp3 = neon_load_reg(a->vm, 1);
391
+ tmp3 = tcg_temp_new_i32();
392
+ read_neon_element32(tmp3, a->vm, 1, MO_32);
393
gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
394
+ tcg_temp_free_i32(tmp);
395
tcg_temp_free_i32(tmp4);
396
tcg_temp_free_ptr(ptr1);
397
- neon_store_reg(a->vd, 0, tmp2);
398
- neon_store_reg(a->vd, 1, tmp3);
399
- tcg_temp_free_i32(tmp);
400
+
401
+ write_neon_element32(tmp2, a->vd, 0, MO_32);
402
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
403
+ tcg_temp_free_i32(tmp2);
404
+ tcg_temp_free_i32(tmp3);
405
return true;
406
}
407
408
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
409
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
410
{
411
int pass, half;
412
+ TCGv_i32 tmp[2];
413
414
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
415
return false;
416
@@ -XXX,XX +XXX,XX @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
417
return true;
418
}
419
420
- for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
421
- TCGv_i32 tmp[2];
422
+ tmp[0] = tcg_temp_new_i32();
423
+ tmp[1] = tcg_temp_new_i32();
424
425
+ for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
426
for (half = 0; half < 2; half++) {
427
- tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
428
+ read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
429
switch (a->size) {
430
case 0:
431
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
432
@@ -XXX,XX +XXX,XX @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
433
g_assert_not_reached();
434
}
435
}
436
- neon_store_reg(a->vd, pass * 2, tmp[1]);
437
- neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
438
+ write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
439
+ write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
440
}
441
+
442
+ tcg_temp_free_i32(tmp[0]);
443
+ tcg_temp_free_i32(tmp[1]);
444
return true;
445
}
446
447
@@ -XXX,XX +XXX,XX @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
448
rm0_64 = tcg_temp_new_i64();
449
rm1_64 = tcg_temp_new_i64();
450
rd_64 = tcg_temp_new_i64();
451
- tmp = neon_load_reg(a->vm, pass * 2);
452
+
453
+ tmp = tcg_temp_new_i32();
454
+ read_neon_element32(tmp, a->vm, pass * 2, MO_32);
455
widenfn(rm0_64, tmp);
456
- tcg_temp_free_i32(tmp);
457
- tmp = neon_load_reg(a->vm, pass * 2 + 1);
458
+ read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
459
widenfn(rm1_64, tmp);
460
tcg_temp_free_i32(tmp);
461
+
462
opfn(rd_64, rm0_64, rm1_64);
463
tcg_temp_free_i64(rm0_64);
464
tcg_temp_free_i64(rm1_64);
465
@@ -XXX,XX +XXX,XX @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
466
narrowfn(rd0, cpu_env, rm);
467
neon_load_reg64(rm, a->vm + 1);
468
narrowfn(rd1, cpu_env, rm);
469
- neon_store_reg(a->vd, 0, rd0);
470
- neon_store_reg(a->vd, 1, rd1);
471
+ write_neon_element32(rd0, a->vd, 0, MO_32);
472
+ write_neon_element32(rd1, a->vd, 1, MO_32);
473
+ tcg_temp_free_i32(rd0);
474
+ tcg_temp_free_i32(rd1);
475
tcg_temp_free_i64(rm);
476
return true;
477
}
478
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
479
}
480
481
rd = tcg_temp_new_i64();
482
+ rm0 = tcg_temp_new_i32();
483
+ rm1 = tcg_temp_new_i32();
484
485
- rm0 = neon_load_reg(a->vm, 0);
486
- rm1 = neon_load_reg(a->vm, 1);
487
+ read_neon_element32(rm0, a->vm, 0, MO_32);
488
+ read_neon_element32(rm1, a->vm, 1, MO_32);
489
490
widenfn(rd, rm0);
491
tcg_gen_shli_i64(rd, rd, 8 << a->size);
492
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
493
494
fpst = fpstatus_ptr(FPST_STD);
495
ahp = get_ahp_flag();
496
- tmp = neon_load_reg(a->vm, 0);
497
+ tmp = tcg_temp_new_i32();
498
+ read_neon_element32(tmp, a->vm, 0, MO_32);
499
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
500
- tmp2 = neon_load_reg(a->vm, 1);
501
+ tmp2 = tcg_temp_new_i32();
502
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
503
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
504
tcg_gen_shli_i32(tmp2, tmp2, 16);
505
tcg_gen_or_i32(tmp2, tmp2, tmp);
506
- tcg_temp_free_i32(tmp);
507
- tmp = neon_load_reg(a->vm, 2);
508
+ read_neon_element32(tmp, a->vm, 2, MO_32);
509
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
510
- tmp3 = neon_load_reg(a->vm, 3);
511
- neon_store_reg(a->vd, 0, tmp2);
512
+ tmp3 = tcg_temp_new_i32();
513
+ read_neon_element32(tmp3, a->vm, 3, MO_32);
514
+ write_neon_element32(tmp2, a->vd, 0, MO_32);
515
+ tcg_temp_free_i32(tmp2);
516
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
517
tcg_gen_shli_i32(tmp3, tmp3, 16);
518
tcg_gen_or_i32(tmp3, tmp3, tmp);
519
- neon_store_reg(a->vd, 1, tmp3);
520
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
521
+ tcg_temp_free_i32(tmp3);
522
tcg_temp_free_i32(tmp);
523
tcg_temp_free_i32(ahp);
524
tcg_temp_free_ptr(fpst);
525
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
526
fpst = fpstatus_ptr(FPST_STD);
527
ahp = get_ahp_flag();
528
tmp3 = tcg_temp_new_i32();
529
- tmp = neon_load_reg(a->vm, 0);
530
- tmp2 = neon_load_reg(a->vm, 1);
531
+ tmp2 = tcg_temp_new_i32();
532
+ tmp = tcg_temp_new_i32();
533
+ read_neon_element32(tmp, a->vm, 0, MO_32);
534
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
535
tcg_gen_ext16u_i32(tmp3, tmp);
536
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
537
- neon_store_reg(a->vd, 0, tmp3);
538
+ write_neon_element32(tmp3, a->vd, 0, MO_32);
539
tcg_gen_shri_i32(tmp, tmp, 16);
540
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
541
- neon_store_reg(a->vd, 1, tmp);
542
- tmp3 = tcg_temp_new_i32();
543
+ write_neon_element32(tmp, a->vd, 1, MO_32);
544
+ tcg_temp_free_i32(tmp);
545
tcg_gen_ext16u_i32(tmp3, tmp2);
546
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
547
- neon_store_reg(a->vd, 2, tmp3);
548
+ write_neon_element32(tmp3, a->vd, 2, MO_32);
549
+ tcg_temp_free_i32(tmp3);
550
tcg_gen_shri_i32(tmp2, tmp2, 16);
551
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
552
- neon_store_reg(a->vd, 3, tmp2);
553
+ write_neon_element32(tmp2, a->vd, 3, MO_32);
554
+ tcg_temp_free_i32(tmp2);
555
tcg_temp_free_i32(ahp);
556
tcg_temp_free_ptr(fpst);
557
558
@@ -XXX,XX +XXX,XX @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
559
560
static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
561
{
562
+ TCGv_i32 tmp;
563
int pass;
564
565
/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
566
@@ -XXX,XX +XXX,XX @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
567
return true;
568
}
569
570
+ tmp = tcg_temp_new_i32();
571
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
572
- TCGv_i32 tmp = neon_load_reg(a->vm, pass);
573
+ read_neon_element32(tmp, a->vm, pass, MO_32);
574
fn(tmp, tmp);
575
- neon_store_reg(a->vd, pass, tmp);
576
+ write_neon_element32(tmp, a->vd, pass, MO_32);
577
}
578
+ tcg_temp_free_i32(tmp);
579
580
return true;
581
}
582
@@ -XXX,XX +XXX,XX @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a)
583
return true;
584
}
585
586
- if (a->size == 2) {
587
+ tmp = tcg_temp_new_i32();
588
+ tmp2 = tcg_temp_new_i32();
589
+ if (a->size == MO_32) {
590
for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
591
- tmp = neon_load_reg(a->vm, pass);
592
- tmp2 = neon_load_reg(a->vd, pass + 1);
593
- neon_store_reg(a->vm, pass, tmp2);
594
- neon_store_reg(a->vd, pass + 1, tmp);
595
+ read_neon_element32(tmp, a->vm, pass, MO_32);
596
+ read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
597
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
598
+ write_neon_element32(tmp, a->vd, pass + 1, MO_32);
599
}
600
} else {
601
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
602
- tmp = neon_load_reg(a->vm, pass);
603
- tmp2 = neon_load_reg(a->vd, pass);
604
- if (a->size == 0) {
605
+ read_neon_element32(tmp, a->vm, pass, MO_32);
606
+ read_neon_element32(tmp2, a->vd, pass, MO_32);
607
+ if (a->size == MO_8) {
608
gen_neon_trn_u8(tmp, tmp2);
609
} else {
610
gen_neon_trn_u16(tmp, tmp2);
611
}
612
- neon_store_reg(a->vm, pass, tmp2);
613
- neon_store_reg(a->vd, pass, tmp);
614
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
615
+ write_neon_element32(tmp, a->vd, pass, MO_32);
616
}
617
}
618
+ tcg_temp_free_i32(tmp);
619
+ tcg_temp_free_i32(tmp2);
620
return true;
621
}
99
--
622
--
100
2.17.0
623
2.20.1
101
624
102
625
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The generic expanders replace nearly identical code in the translator.
3
We can then use this to improve VMOV (scalar to gp) and
4
4
VMOV (gp to scalar) so that we simply perform the memory
5
operation that we wanted, rather than inserting or
6
extracting from a 32-bit quantity.
7
8
These were the last uses of neon_load/store_reg, so remove them.
9
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20201030022618.785675-7-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20180508151437.4232-3-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
14
---
10
target/arm/translate-a64.c | 46 ++++++++++++--------------------------
15
target/arm/translate.c | 50 +++++++++++++-----------
11
1 file changed, 14 insertions(+), 32 deletions(-)
16
target/arm/translate-vfp.c.inc | 71 +++++-----------------------------
12
17
2 files changed, 37 insertions(+), 84 deletions(-)
13
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
18
19
diff --git a/target/arm/translate.c b/target/arm/translate.c
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/translate-a64.c
21
--- a/target/arm/translate.c
16
+++ b/target/arm/translate-a64.c
22
+++ b/target/arm/translate.c
17
@@ -XXX,XX +XXX,XX @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
23
@@ -XXX,XX +XXX,XX @@ static long neon_full_reg_offset(unsigned reg)
18
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
24
* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
19
break;
25
* where 0 is the least significant end of the register.
20
case 0x0a: /* SMAXV / UMAXV */
26
*/
21
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
27
-static long neon_element_offset(int reg, int element, MemOp size)
22
- tcg_res,
28
+static long neon_element_offset(int reg, int element, MemOp memop)
23
- tcg_res, tcg_elt, tcg_res, tcg_elt);
29
{
24
+ if (is_u) {
30
- int element_size = 1 << size;
25
+ tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
31
+ int element_size = 1 << (memop & MO_SIZE);
26
+ } else {
32
int ofs = element * element_size;
27
+ tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
33
#ifdef HOST_WORDS_BIGENDIAN
28
+ }
34
/*
29
break;
35
@@ -XXX,XX +XXX,XX @@ static long vfp_reg_offset(bool dp, unsigned reg)
30
case 0x1a: /* SMINV / UMINV */
36
}
31
- tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
37
}
32
- tcg_res,
38
33
- tcg_res, tcg_elt, tcg_res, tcg_elt);
39
-static TCGv_i32 neon_load_reg(int reg, int pass)
34
- break;
35
+ if (is_u) {
36
+ tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
37
+ } else {
38
+ tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
39
+ }
40
break;
41
default:
42
g_assert_not_reached();
43
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
44
}
45
}
46
47
-/* Helper functions for 32 bit comparisons */
48
-static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
49
-{
40
-{
50
- tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
41
- TCGv_i32 tmp = tcg_temp_new_i32();
42
- tcg_gen_ld_i32(tmp, cpu_env, neon_element_offset(reg, pass, MO_32));
43
- return tmp;
51
-}
44
-}
52
-
45
-
53
-static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
46
-static void neon_store_reg(int reg, int pass, TCGv_i32 var)
54
-{
47
-{
55
- tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
48
- tcg_gen_st_i32(var, cpu_env, neon_element_offset(reg, pass, MO_32));
49
- tcg_temp_free_i32(var);
56
-}
50
-}
57
-
51
-
58
-static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
52
static inline void neon_load_reg64(TCGv_i64 var, int reg)
59
-{
53
{
60
- tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
54
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
61
-}
55
@@ -XXX,XX +XXX,XX @@ static inline void neon_store_reg32(TCGv_i32 var, int reg)
62
-
56
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
63
-static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
57
}
64
-{
58
65
- tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
59
-static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp size)
66
-}
60
+static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
67
-
61
{
68
/* Pairwise op subgroup of C3.6.16.
62
- long off = neon_element_offset(reg, ele, size);
69
*
63
+ long off = neon_element_offset(reg, ele, memop);
70
* This is called directly or via the handle_3same_float for float pairwise
64
71
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
65
- switch (size) {
72
static NeonGenTwoOpFn * const fns[3][2] = {
66
- case MO_32:
73
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
67
+ switch (memop) {
74
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
68
+ case MO_SB:
75
- { gen_max_s32, gen_max_u32 },
69
+ tcg_gen_ld8s_i32(dest, cpu_env, off);
76
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
70
+ break;
77
};
71
+ case MO_UB:
78
genfn = fns[size][u];
72
+ tcg_gen_ld8u_i32(dest, cpu_env, off);
79
break;
73
+ break;
80
@@ -XXX,XX +XXX,XX @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
74
+ case MO_SW:
81
static NeonGenTwoOpFn * const fns[3][2] = {
75
+ tcg_gen_ld16s_i32(dest, cpu_env, off);
82
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
76
+ break;
83
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
77
+ case MO_UW:
84
- { gen_min_s32, gen_min_u32 },
78
+ tcg_gen_ld16u_i32(dest, cpu_env, off);
85
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
79
+ break;
86
};
80
+ case MO_UL:
87
genfn = fns[size][u];
81
+ case MO_SL:
88
break;
82
tcg_gen_ld_i32(dest, cpu_env, off);
89
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
83
break;
90
static NeonGenTwoOpFn * const fns[3][2] = {
84
default:
91
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
85
@@ -XXX,XX +XXX,XX @@ static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp size)
92
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
86
}
93
- { gen_max_s32, gen_max_u32 },
87
}
94
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
88
95
};
89
-static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp size)
96
genfn = fns[size][u];
90
+static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
97
break;
91
{
98
@@ -XXX,XX +XXX,XX @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
92
- long off = neon_element_offset(reg, ele, size);
99
static NeonGenTwoOpFn * const fns[3][2] = {
93
+ long off = neon_element_offset(reg, ele, memop);
100
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
94
101
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
95
- switch (size) {
102
- { gen_min_s32, gen_min_u32 },
96
+ switch (memop) {
103
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
97
+ case MO_8:
104
};
98
+ tcg_gen_st8_i32(src, cpu_env, off);
105
genfn = fns[size][u];
99
+ break;
106
break;
100
+ case MO_16:
101
+ tcg_gen_st16_i32(src, cpu_env, off);
102
+ break;
103
case MO_32:
104
tcg_gen_st_i32(src, cpu_env, off);
105
break;
106
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/translate-vfp.c.inc
109
+++ b/target/arm/translate-vfp.c.inc
110
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
111
{
112
/* VMOV scalar to general purpose register */
113
TCGv_i32 tmp;
114
- int pass;
115
- uint32_t offset;
116
117
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
118
- if (a->size == 2
119
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
120
+ if (a->size == MO_32
121
? !dc_isar_feature(aa32_fpsp_v2, s)
122
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
123
return false;
124
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
125
return false;
126
}
127
128
- offset = a->index << a->size;
129
- pass = extract32(offset, 2, 1);
130
- offset = extract32(offset, 0, 2) * 8;
131
-
132
if (!vfp_access_check(s)) {
133
return true;
134
}
135
136
- tmp = neon_load_reg(a->vn, pass);
137
- switch (a->size) {
138
- case 0:
139
- if (offset) {
140
- tcg_gen_shri_i32(tmp, tmp, offset);
141
- }
142
- if (a->u) {
143
- gen_uxtb(tmp);
144
- } else {
145
- gen_sxtb(tmp);
146
- }
147
- break;
148
- case 1:
149
- if (a->u) {
150
- if (offset) {
151
- tcg_gen_shri_i32(tmp, tmp, 16);
152
- } else {
153
- gen_uxth(tmp);
154
- }
155
- } else {
156
- if (offset) {
157
- tcg_gen_sari_i32(tmp, tmp, 16);
158
- } else {
159
- gen_sxth(tmp);
160
- }
161
- }
162
- break;
163
- case 2:
164
- break;
165
- }
166
+ tmp = tcg_temp_new_i32();
167
+ read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
168
store_reg(s, a->rt, tmp);
169
170
return true;
171
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
172
static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
173
{
174
/* VMOV general purpose register to scalar */
175
- TCGv_i32 tmp, tmp2;
176
- int pass;
177
- uint32_t offset;
178
+ TCGv_i32 tmp;
179
180
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
181
- if (a->size == 2
182
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
183
+ if (a->size == MO_32
184
? !dc_isar_feature(aa32_fpsp_v2, s)
185
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
186
return false;
187
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
188
return false;
189
}
190
191
- offset = a->index << a->size;
192
- pass = extract32(offset, 2, 1);
193
- offset = extract32(offset, 0, 2) * 8;
194
-
195
if (!vfp_access_check(s)) {
196
return true;
197
}
198
199
tmp = load_reg(s, a->rt);
200
- switch (a->size) {
201
- case 0:
202
- tmp2 = neon_load_reg(a->vn, pass);
203
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
204
- tcg_temp_free_i32(tmp2);
205
- break;
206
- case 1:
207
- tmp2 = neon_load_reg(a->vn, pass);
208
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
209
- tcg_temp_free_i32(tmp2);
210
- break;
211
- case 2:
212
- break;
213
- }
214
- neon_store_reg(a->vn, pass, tmp);
215
+ write_neon_element32(tmp, a->vn, a->index, a->size);
216
+ tcg_temp_free_i32(tmp);
217
218
return true;
219
}
107
--
220
--
108
2.17.0
221
2.20.1
109
222
110
223
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The only uses of this function are for loading VFP
4
single-precision values, and nothing to do with NEON.
5
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20201030022618.785675-8-richard.henderson@linaro.org
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20180508151437.4232-11-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
10
---
8
target/arm/cpu64.c | 1 +
11
target/arm/translate.c | 4 +-
9
1 file changed, 1 insertion(+)
12
target/arm/translate-vfp.c.inc | 184 ++++++++++++++++-----------------
13
2 files changed, 94 insertions(+), 94 deletions(-)
10
14
11
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
15
diff --git a/target/arm/translate.c b/target/arm/translate.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu64.c
17
--- a/target/arm/translate.c
14
+++ b/target/arm/cpu64.c
18
+++ b/target/arm/translate.c
15
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
19
@@ -XXX,XX +XXX,XX @@ static inline void neon_store_reg64(TCGv_i64 var, int reg)
16
set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
20
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
17
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
21
}
18
set_feature(&cpu->env, ARM_FEATURE_CRC);
22
19
+ set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
23
-static inline void neon_load_reg32(TCGv_i32 var, int reg)
20
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
24
+static inline void vfp_load_reg32(TCGv_i32 var, int reg)
21
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
25
{
22
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
26
tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
27
}
28
29
-static inline void neon_store_reg32(TCGv_i32 var, int reg)
30
+static inline void vfp_store_reg32(TCGv_i32 var, int reg)
31
{
32
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
33
}
34
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/translate-vfp.c.inc
37
+++ b/target/arm/translate-vfp.c.inc
38
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
39
frn = tcg_temp_new_i32();
40
frm = tcg_temp_new_i32();
41
dest = tcg_temp_new_i32();
42
- neon_load_reg32(frn, rn);
43
- neon_load_reg32(frm, rm);
44
+ vfp_load_reg32(frn, rn);
45
+ vfp_load_reg32(frm, rm);
46
switch (a->cc) {
47
case 0: /* eq: Z */
48
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
49
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
50
if (sz == 1) {
51
tcg_gen_andi_i32(dest, dest, 0xffff);
52
}
53
- neon_store_reg32(dest, rd);
54
+ vfp_store_reg32(dest, rd);
55
tcg_temp_free_i32(frn);
56
tcg_temp_free_i32(frm);
57
tcg_temp_free_i32(dest);
58
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
59
TCGv_i32 tcg_res;
60
tcg_op = tcg_temp_new_i32();
61
tcg_res = tcg_temp_new_i32();
62
- neon_load_reg32(tcg_op, rm);
63
+ vfp_load_reg32(tcg_op, rm);
64
if (sz == 1) {
65
gen_helper_rinth(tcg_res, tcg_op, fpst);
66
} else {
67
gen_helper_rints(tcg_res, tcg_op, fpst);
68
}
69
- neon_store_reg32(tcg_res, rd);
70
+ vfp_store_reg32(tcg_res, rd);
71
tcg_temp_free_i32(tcg_op);
72
tcg_temp_free_i32(tcg_res);
73
}
74
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
75
gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
76
}
77
tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
78
- neon_store_reg32(tcg_tmp, rd);
79
+ vfp_store_reg32(tcg_tmp, rd);
80
tcg_temp_free_i32(tcg_tmp);
81
tcg_temp_free_i64(tcg_res);
82
tcg_temp_free_i64(tcg_double);
83
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
84
TCGv_i32 tcg_single, tcg_res;
85
tcg_single = tcg_temp_new_i32();
86
tcg_res = tcg_temp_new_i32();
87
- neon_load_reg32(tcg_single, rm);
88
+ vfp_load_reg32(tcg_single, rm);
89
if (sz == 1) {
90
if (is_signed) {
91
gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
92
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
93
gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
94
}
95
}
96
- neon_store_reg32(tcg_res, rd);
97
+ vfp_store_reg32(tcg_res, rd);
98
tcg_temp_free_i32(tcg_res);
99
tcg_temp_free_i32(tcg_single);
100
}
101
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
102
if (a->l) {
103
/* VFP to general purpose register */
104
tmp = tcg_temp_new_i32();
105
- neon_load_reg32(tmp, a->vn);
106
+ vfp_load_reg32(tmp, a->vn);
107
tcg_gen_andi_i32(tmp, tmp, 0xffff);
108
store_reg(s, a->rt, tmp);
109
} else {
110
/* general purpose register to VFP */
111
tmp = load_reg(s, a->rt);
112
tcg_gen_andi_i32(tmp, tmp, 0xffff);
113
- neon_store_reg32(tmp, a->vn);
114
+ vfp_store_reg32(tmp, a->vn);
115
tcg_temp_free_i32(tmp);
116
}
117
118
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
119
if (a->l) {
120
/* VFP to general purpose register */
121
tmp = tcg_temp_new_i32();
122
- neon_load_reg32(tmp, a->vn);
123
+ vfp_load_reg32(tmp, a->vn);
124
if (a->rt == 15) {
125
/* Set the 4 flag bits in the CPSR. */
126
gen_set_nzcv(tmp);
127
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
128
} else {
129
/* general purpose register to VFP */
130
tmp = load_reg(s, a->rt);
131
- neon_store_reg32(tmp, a->vn);
132
+ vfp_store_reg32(tmp, a->vn);
133
tcg_temp_free_i32(tmp);
134
}
135
136
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
137
if (a->op) {
138
/* fpreg to gpreg */
139
tmp = tcg_temp_new_i32();
140
- neon_load_reg32(tmp, a->vm);
141
+ vfp_load_reg32(tmp, a->vm);
142
store_reg(s, a->rt, tmp);
143
tmp = tcg_temp_new_i32();
144
- neon_load_reg32(tmp, a->vm + 1);
145
+ vfp_load_reg32(tmp, a->vm + 1);
146
store_reg(s, a->rt2, tmp);
147
} else {
148
/* gpreg to fpreg */
149
tmp = load_reg(s, a->rt);
150
- neon_store_reg32(tmp, a->vm);
151
+ vfp_store_reg32(tmp, a->vm);
152
tcg_temp_free_i32(tmp);
153
tmp = load_reg(s, a->rt2);
154
- neon_store_reg32(tmp, a->vm + 1);
155
+ vfp_store_reg32(tmp, a->vm + 1);
156
tcg_temp_free_i32(tmp);
157
}
158
159
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
160
if (a->op) {
161
/* fpreg to gpreg */
162
tmp = tcg_temp_new_i32();
163
- neon_load_reg32(tmp, a->vm * 2);
164
+ vfp_load_reg32(tmp, a->vm * 2);
165
store_reg(s, a->rt, tmp);
166
tmp = tcg_temp_new_i32();
167
- neon_load_reg32(tmp, a->vm * 2 + 1);
168
+ vfp_load_reg32(tmp, a->vm * 2 + 1);
169
store_reg(s, a->rt2, tmp);
170
} else {
171
/* gpreg to fpreg */
172
tmp = load_reg(s, a->rt);
173
- neon_store_reg32(tmp, a->vm * 2);
174
+ vfp_store_reg32(tmp, a->vm * 2);
175
tcg_temp_free_i32(tmp);
176
tmp = load_reg(s, a->rt2);
177
- neon_store_reg32(tmp, a->vm * 2 + 1);
178
+ vfp_store_reg32(tmp, a->vm * 2 + 1);
179
tcg_temp_free_i32(tmp);
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
183
tmp = tcg_temp_new_i32();
184
if (a->l) {
185
gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
186
- neon_store_reg32(tmp, a->vd);
187
+ vfp_store_reg32(tmp, a->vd);
188
} else {
189
- neon_load_reg32(tmp, a->vd);
190
+ vfp_load_reg32(tmp, a->vd);
191
gen_aa32_st16(s, tmp, addr, get_mem_index(s));
192
}
193
tcg_temp_free_i32(tmp);
194
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
195
tmp = tcg_temp_new_i32();
196
if (a->l) {
197
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
198
- neon_store_reg32(tmp, a->vd);
199
+ vfp_store_reg32(tmp, a->vd);
200
} else {
201
- neon_load_reg32(tmp, a->vd);
202
+ vfp_load_reg32(tmp, a->vd);
203
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
204
}
205
tcg_temp_free_i32(tmp);
206
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
207
if (a->l) {
208
/* load */
209
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
210
- neon_store_reg32(tmp, a->vd + i);
211
+ vfp_store_reg32(tmp, a->vd + i);
212
} else {
213
/* store */
214
- neon_load_reg32(tmp, a->vd + i);
215
+ vfp_load_reg32(tmp, a->vd + i);
216
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
217
}
218
tcg_gen_addi_i32(addr, addr, offset);
219
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
220
fd = tcg_temp_new_i32();
221
fpst = fpstatus_ptr(FPST_FPCR);
222
223
- neon_load_reg32(f0, vn);
224
- neon_load_reg32(f1, vm);
225
+ vfp_load_reg32(f0, vn);
226
+ vfp_load_reg32(f1, vm);
227
228
for (;;) {
229
if (reads_vd) {
230
- neon_load_reg32(fd, vd);
231
+ vfp_load_reg32(fd, vd);
232
}
233
fn(fd, f0, f1, fpst);
234
- neon_store_reg32(fd, vd);
235
+ vfp_store_reg32(fd, vd);
236
237
if (veclen == 0) {
238
break;
239
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
240
veclen--;
241
vd = vfp_advance_sreg(vd, delta_d);
242
vn = vfp_advance_sreg(vn, delta_d);
243
- neon_load_reg32(f0, vn);
244
+ vfp_load_reg32(f0, vn);
245
if (delta_m) {
246
vm = vfp_advance_sreg(vm, delta_m);
247
- neon_load_reg32(f1, vm);
248
+ vfp_load_reg32(f1, vm);
249
}
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
253
fd = tcg_temp_new_i32();
254
fpst = fpstatus_ptr(FPST_FPCR_F16);
255
256
- neon_load_reg32(f0, vn);
257
- neon_load_reg32(f1, vm);
258
+ vfp_load_reg32(f0, vn);
259
+ vfp_load_reg32(f1, vm);
260
261
if (reads_vd) {
262
- neon_load_reg32(fd, vd);
263
+ vfp_load_reg32(fd, vd);
264
}
265
fn(fd, f0, f1, fpst);
266
- neon_store_reg32(fd, vd);
267
+ vfp_store_reg32(fd, vd);
268
269
tcg_temp_free_i32(f0);
270
tcg_temp_free_i32(f1);
271
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
272
f0 = tcg_temp_new_i32();
273
fd = tcg_temp_new_i32();
274
275
- neon_load_reg32(f0, vm);
276
+ vfp_load_reg32(f0, vm);
277
278
for (;;) {
279
fn(fd, f0);
280
- neon_store_reg32(fd, vd);
281
+ vfp_store_reg32(fd, vd);
282
283
if (veclen == 0) {
284
break;
285
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
286
/* single source one-many */
287
while (veclen--) {
288
vd = vfp_advance_sreg(vd, delta_d);
289
- neon_store_reg32(fd, vd);
290
+ vfp_store_reg32(fd, vd);
291
}
292
break;
293
}
294
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
295
veclen--;
296
vd = vfp_advance_sreg(vd, delta_d);
297
vm = vfp_advance_sreg(vm, delta_m);
298
- neon_load_reg32(f0, vm);
299
+ vfp_load_reg32(f0, vm);
300
}
301
302
tcg_temp_free_i32(f0);
303
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
304
}
305
306
f0 = tcg_temp_new_i32();
307
- neon_load_reg32(f0, vm);
308
+ vfp_load_reg32(f0, vm);
309
fn(f0, f0);
310
- neon_store_reg32(f0, vd);
311
+ vfp_store_reg32(f0, vd);
312
tcg_temp_free_i32(f0);
313
314
return true;
315
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
316
vm = tcg_temp_new_i32();
317
vd = tcg_temp_new_i32();
318
319
- neon_load_reg32(vn, a->vn);
320
- neon_load_reg32(vm, a->vm);
321
+ vfp_load_reg32(vn, a->vn);
322
+ vfp_load_reg32(vm, a->vm);
323
if (neg_n) {
324
/* VFNMS, VFMS */
325
gen_helper_vfp_negh(vn, vn);
326
}
327
- neon_load_reg32(vd, a->vd);
328
+ vfp_load_reg32(vd, a->vd);
329
if (neg_d) {
330
/* VFNMA, VFNMS */
331
gen_helper_vfp_negh(vd, vd);
332
}
333
fpst = fpstatus_ptr(FPST_FPCR_F16);
334
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
335
- neon_store_reg32(vd, a->vd);
336
+ vfp_store_reg32(vd, a->vd);
337
338
tcg_temp_free_ptr(fpst);
339
tcg_temp_free_i32(vn);
340
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
341
vm = tcg_temp_new_i32();
342
vd = tcg_temp_new_i32();
343
344
- neon_load_reg32(vn, a->vn);
345
- neon_load_reg32(vm, a->vm);
346
+ vfp_load_reg32(vn, a->vn);
347
+ vfp_load_reg32(vm, a->vm);
348
if (neg_n) {
349
/* VFNMS, VFMS */
350
gen_helper_vfp_negs(vn, vn);
351
}
352
- neon_load_reg32(vd, a->vd);
353
+ vfp_load_reg32(vd, a->vd);
354
if (neg_d) {
355
/* VFNMA, VFNMS */
356
gen_helper_vfp_negs(vd, vd);
357
}
358
fpst = fpstatus_ptr(FPST_FPCR);
359
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
360
- neon_store_reg32(vd, a->vd);
361
+ vfp_store_reg32(vd, a->vd);
362
363
tcg_temp_free_ptr(fpst);
364
tcg_temp_free_i32(vn);
365
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
366
}
367
368
fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
369
- neon_store_reg32(fd, a->vd);
370
+ vfp_store_reg32(fd, a->vd);
371
tcg_temp_free_i32(fd);
372
return true;
373
}
374
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
375
fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
376
377
for (;;) {
378
- neon_store_reg32(fd, vd);
379
+ vfp_store_reg32(fd, vd);
380
381
if (veclen == 0) {
382
break;
383
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
384
vd = tcg_temp_new_i32();
385
vm = tcg_temp_new_i32();
386
387
- neon_load_reg32(vd, a->vd);
388
+ vfp_load_reg32(vd, a->vd);
389
if (a->z) {
390
tcg_gen_movi_i32(vm, 0);
391
} else {
392
- neon_load_reg32(vm, a->vm);
393
+ vfp_load_reg32(vm, a->vm);
394
}
395
396
if (a->e) {
397
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
398
vd = tcg_temp_new_i32();
399
vm = tcg_temp_new_i32();
400
401
- neon_load_reg32(vd, a->vd);
402
+ vfp_load_reg32(vd, a->vd);
403
if (a->z) {
404
tcg_gen_movi_i32(vm, 0);
405
} else {
406
- neon_load_reg32(vm, a->vm);
407
+ vfp_load_reg32(vm, a->vm);
408
}
409
410
if (a->e) {
411
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
412
/* The T bit tells us if we want the low or high 16 bits of Vm */
413
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
414
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
415
- neon_store_reg32(tmp, a->vd);
416
+ vfp_store_reg32(tmp, a->vd);
417
tcg_temp_free_i32(ahp_mode);
418
tcg_temp_free_ptr(fpst);
419
tcg_temp_free_i32(tmp);
420
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
421
ahp_mode = get_ahp_flag();
422
tmp = tcg_temp_new_i32();
423
424
- neon_load_reg32(tmp, a->vm);
425
+ vfp_load_reg32(tmp, a->vm);
426
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
427
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
428
tcg_temp_free_i32(ahp_mode);
429
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
430
}
431
432
tmp = tcg_temp_new_i32();
433
- neon_load_reg32(tmp, a->vm);
434
+ vfp_load_reg32(tmp, a->vm);
435
fpst = fpstatus_ptr(FPST_FPCR_F16);
436
gen_helper_rinth(tmp, tmp, fpst);
437
- neon_store_reg32(tmp, a->vd);
438
+ vfp_store_reg32(tmp, a->vd);
439
tcg_temp_free_ptr(fpst);
440
tcg_temp_free_i32(tmp);
441
return true;
442
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
443
}
444
445
tmp = tcg_temp_new_i32();
446
- neon_load_reg32(tmp, a->vm);
447
+ vfp_load_reg32(tmp, a->vm);
448
fpst = fpstatus_ptr(FPST_FPCR);
449
gen_helper_rints(tmp, tmp, fpst);
450
- neon_store_reg32(tmp, a->vd);
451
+ vfp_store_reg32(tmp, a->vd);
452
tcg_temp_free_ptr(fpst);
453
tcg_temp_free_i32(tmp);
454
return true;
455
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
456
}
457
458
tmp = tcg_temp_new_i32();
459
- neon_load_reg32(tmp, a->vm);
460
+ vfp_load_reg32(tmp, a->vm);
461
fpst = fpstatus_ptr(FPST_FPCR_F16);
462
tcg_rmode = tcg_const_i32(float_round_to_zero);
463
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
464
gen_helper_rinth(tmp, tmp, fpst);
465
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
466
- neon_store_reg32(tmp, a->vd);
467
+ vfp_store_reg32(tmp, a->vd);
468
tcg_temp_free_ptr(fpst);
469
tcg_temp_free_i32(tcg_rmode);
470
tcg_temp_free_i32(tmp);
471
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
472
}
473
474
tmp = tcg_temp_new_i32();
475
- neon_load_reg32(tmp, a->vm);
476
+ vfp_load_reg32(tmp, a->vm);
477
fpst = fpstatus_ptr(FPST_FPCR);
478
tcg_rmode = tcg_const_i32(float_round_to_zero);
479
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
480
gen_helper_rints(tmp, tmp, fpst);
481
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
482
- neon_store_reg32(tmp, a->vd);
483
+ vfp_store_reg32(tmp, a->vd);
484
tcg_temp_free_ptr(fpst);
485
tcg_temp_free_i32(tcg_rmode);
486
tcg_temp_free_i32(tmp);
487
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
488
}
489
490
tmp = tcg_temp_new_i32();
491
- neon_load_reg32(tmp, a->vm);
492
+ vfp_load_reg32(tmp, a->vm);
493
fpst = fpstatus_ptr(FPST_FPCR_F16);
494
gen_helper_rinth_exact(tmp, tmp, fpst);
495
- neon_store_reg32(tmp, a->vd);
496
+ vfp_store_reg32(tmp, a->vd);
497
tcg_temp_free_ptr(fpst);
498
tcg_temp_free_i32(tmp);
499
return true;
500
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
501
}
502
503
tmp = tcg_temp_new_i32();
504
- neon_load_reg32(tmp, a->vm);
505
+ vfp_load_reg32(tmp, a->vm);
506
fpst = fpstatus_ptr(FPST_FPCR);
507
gen_helper_rints_exact(tmp, tmp, fpst);
508
- neon_store_reg32(tmp, a->vd);
509
+ vfp_store_reg32(tmp, a->vd);
510
tcg_temp_free_ptr(fpst);
511
tcg_temp_free_i32(tmp);
512
return true;
513
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
514
515
vm = tcg_temp_new_i32();
516
vd = tcg_temp_new_i64();
517
- neon_load_reg32(vm, a->vm);
518
+ vfp_load_reg32(vm, a->vm);
519
gen_helper_vfp_fcvtds(vd, vm, cpu_env);
520
neon_store_reg64(vd, a->vd);
521
tcg_temp_free_i32(vm);
522
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
523
vm = tcg_temp_new_i64();
524
neon_load_reg64(vm, a->vm);
525
gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
526
- neon_store_reg32(vd, a->vd);
527
+ vfp_store_reg32(vd, a->vd);
528
tcg_temp_free_i32(vd);
529
tcg_temp_free_i64(vm);
530
return true;
531
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
532
}
533
534
vm = tcg_temp_new_i32();
535
- neon_load_reg32(vm, a->vm);
536
+ vfp_load_reg32(vm, a->vm);
537
fpst = fpstatus_ptr(FPST_FPCR_F16);
538
if (a->s) {
539
/* i32 -> f16 */
540
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
541
/* u32 -> f16 */
542
gen_helper_vfp_uitoh(vm, vm, fpst);
543
}
544
- neon_store_reg32(vm, a->vd);
545
+ vfp_store_reg32(vm, a->vd);
546
tcg_temp_free_i32(vm);
547
tcg_temp_free_ptr(fpst);
548
return true;
549
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
550
}
551
552
vm = tcg_temp_new_i32();
553
- neon_load_reg32(vm, a->vm);
554
+ vfp_load_reg32(vm, a->vm);
555
fpst = fpstatus_ptr(FPST_FPCR);
556
if (a->s) {
557
/* i32 -> f32 */
558
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
559
/* u32 -> f32 */
560
gen_helper_vfp_uitos(vm, vm, fpst);
561
}
562
- neon_store_reg32(vm, a->vd);
563
+ vfp_store_reg32(vm, a->vd);
564
tcg_temp_free_i32(vm);
565
tcg_temp_free_ptr(fpst);
566
return true;
567
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
568
569
vm = tcg_temp_new_i32();
570
vd = tcg_temp_new_i64();
571
- neon_load_reg32(vm, a->vm);
572
+ vfp_load_reg32(vm, a->vm);
573
fpst = fpstatus_ptr(FPST_FPCR);
574
if (a->s) {
575
/* i32 -> f64 */
576
@@ -XXX,XX +XXX,XX @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
577
vd = tcg_temp_new_i32();
578
neon_load_reg64(vm, a->vm);
579
gen_helper_vjcvt(vd, vm, cpu_env);
580
- neon_store_reg32(vd, a->vd);
581
+ vfp_store_reg32(vd, a->vd);
582
tcg_temp_free_i64(vm);
583
tcg_temp_free_i32(vd);
584
return true;
585
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
586
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
587
588
vd = tcg_temp_new_i32();
589
- neon_load_reg32(vd, a->vd);
590
+ vfp_load_reg32(vd, a->vd);
591
592
fpst = fpstatus_ptr(FPST_FPCR_F16);
593
shift = tcg_const_i32(frac_bits);
594
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
595
g_assert_not_reached();
596
}
597
598
- neon_store_reg32(vd, a->vd);
599
+ vfp_store_reg32(vd, a->vd);
600
tcg_temp_free_i32(vd);
601
tcg_temp_free_i32(shift);
602
tcg_temp_free_ptr(fpst);
603
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
604
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
605
606
vd = tcg_temp_new_i32();
607
- neon_load_reg32(vd, a->vd);
608
+ vfp_load_reg32(vd, a->vd);
609
610
fpst = fpstatus_ptr(FPST_FPCR);
611
shift = tcg_const_i32(frac_bits);
612
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
613
g_assert_not_reached();
614
}
615
616
- neon_store_reg32(vd, a->vd);
617
+ vfp_store_reg32(vd, a->vd);
618
tcg_temp_free_i32(vd);
619
tcg_temp_free_i32(shift);
620
tcg_temp_free_ptr(fpst);
621
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
622
623
fpst = fpstatus_ptr(FPST_FPCR_F16);
624
vm = tcg_temp_new_i32();
625
- neon_load_reg32(vm, a->vm);
626
+ vfp_load_reg32(vm, a->vm);
627
628
if (a->s) {
629
if (a->rz) {
630
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
631
gen_helper_vfp_touih(vm, vm, fpst);
632
}
633
}
634
- neon_store_reg32(vm, a->vd);
635
+ vfp_store_reg32(vm, a->vd);
636
tcg_temp_free_i32(vm);
637
tcg_temp_free_ptr(fpst);
638
return true;
639
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
640
641
fpst = fpstatus_ptr(FPST_FPCR);
642
vm = tcg_temp_new_i32();
643
- neon_load_reg32(vm, a->vm);
644
+ vfp_load_reg32(vm, a->vm);
645
646
if (a->s) {
647
if (a->rz) {
648
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
649
gen_helper_vfp_touis(vm, vm, fpst);
650
}
651
}
652
- neon_store_reg32(vm, a->vd);
653
+ vfp_store_reg32(vm, a->vd);
654
tcg_temp_free_i32(vm);
655
tcg_temp_free_ptr(fpst);
656
return true;
657
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
658
gen_helper_vfp_touid(vd, vm, fpst);
659
}
660
}
661
- neon_store_reg32(vd, a->vd);
662
+ vfp_store_reg32(vd, a->vd);
663
tcg_temp_free_i32(vd);
664
tcg_temp_free_i64(vm);
665
tcg_temp_free_ptr(fpst);
666
@@ -XXX,XX +XXX,XX @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
667
/* Insert low half of Vm into high half of Vd */
668
rm = tcg_temp_new_i32();
669
rd = tcg_temp_new_i32();
670
- neon_load_reg32(rm, a->vm);
671
- neon_load_reg32(rd, a->vd);
672
+ vfp_load_reg32(rm, a->vm);
673
+ vfp_load_reg32(rd, a->vd);
674
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
675
- neon_store_reg32(rd, a->vd);
676
+ vfp_store_reg32(rd, a->vd);
677
tcg_temp_free_i32(rm);
678
tcg_temp_free_i32(rd);
679
return true;
680
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
681
682
/* Set Vd to high half of Vm */
683
rm = tcg_temp_new_i32();
684
- neon_load_reg32(rm, a->vm);
685
+ vfp_load_reg32(rm, a->vm);
686
tcg_gen_shri_i32(rm, rm, 16);
687
- neon_store_reg32(rm, a->vd);
688
+ vfp_store_reg32(rm, a->vd);
689
tcg_temp_free_i32(rm);
690
return true;
691
}
23
--
692
--
24
2.17.0
693
2.20.1
25
694
26
695
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
These operations are re-invented by several targets so far.
3
Replace all uses of neon_load/store_reg64 within translate-neon.c.inc.
4
Several supported hosts have insns for these, so place the
4
5
expanders out-of-line for a future introduction of tcg opcodes.
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
6
Message-id: 20201030022618.785675-9-richard.henderson@linaro.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20180508151437.4232-2-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
9
---
12
tcg/tcg-op.h | 16 ++++++++++++++++
10
target/arm/translate.c | 26 +++++++++
13
tcg/tcg-op.c | 40 ++++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-neon.c.inc | 94 ++++++++++++++++-----------------
14
2 files changed, 56 insertions(+)
12
2 files changed, 73 insertions(+), 47 deletions(-)
15
13
16
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
14
diff --git a/target/arm/translate.c b/target/arm/translate.c
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/tcg-op.h
16
--- a/target/arm/translate.c
19
+++ b/tcg/tcg-op.h
17
+++ b/target/arm/translate.c
20
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
18
@@ -XXX,XX +XXX,XX @@ static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
21
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
22
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
23
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
24
+void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
25
+void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
26
+void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
27
+void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
28
29
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
30
{
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
32
void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
33
void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
34
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
35
+void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
36
+void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
37
+void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
38
+void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
39
40
#if TCG_TARGET_REG_BITS == 64
41
static inline void tcg_gen_discard_i64(TCGv_i64 arg)
42
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
43
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
44
#define tcg_gen_muls2_tl tcg_gen_muls2_i64
45
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
46
+#define tcg_gen_smin_tl tcg_gen_smin_i64
47
+#define tcg_gen_umin_tl tcg_gen_umin_i64
48
+#define tcg_gen_smax_tl tcg_gen_smax_i64
49
+#define tcg_gen_umax_tl tcg_gen_umax_i64
50
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
51
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
52
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
53
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
54
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
55
#define tcg_gen_muls2_tl tcg_gen_muls2_i32
56
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
57
+#define tcg_gen_smin_tl tcg_gen_smin_i32
58
+#define tcg_gen_umin_tl tcg_gen_umin_i32
59
+#define tcg_gen_smax_tl tcg_gen_smax_i32
60
+#define tcg_gen_umax_tl tcg_gen_umax_i32
61
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
62
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
63
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
64
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/tcg-op.c
67
+++ b/tcg/tcg-op.c
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
69
}
19
}
70
}
20
}
71
21
72
+void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
22
+static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
73
+{
23
+{
74
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
24
+ long off = neon_element_offset(reg, ele, memop);
25
+
26
+ switch (memop) {
27
+ case MO_Q:
28
+ tcg_gen_ld_i64(dest, cpu_env, off);
29
+ break;
30
+ default:
31
+ g_assert_not_reached();
32
+ }
75
+}
33
+}
76
+
34
+
77
+void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
35
static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
36
{
37
long off = neon_element_offset(reg, ele, memop);
38
@@ -XXX,XX +XXX,XX @@ static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
39
}
40
}
41
42
+static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
78
+{
43
+{
79
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
44
+ long off = neon_element_offset(reg, ele, memop);
45
+
46
+ switch (memop) {
47
+ case MO_64:
48
+ tcg_gen_st_i64(src, cpu_env, off);
49
+ break;
50
+ default:
51
+ g_assert_not_reached();
52
+ }
80
+}
53
+}
81
+
54
+
82
+void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
55
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
83
+{
56
{
84
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
57
TCGv_ptr ret = tcg_temp_new_ptr();
85
+}
58
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
86
+
59
index XXXXXXX..XXXXXXX 100644
87
+void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
60
--- a/target/arm/translate-neon.c.inc
88
+{
61
+++ b/target/arm/translate-neon.c.inc
89
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
62
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
90
+}
63
for (pass = 0; pass < a->q + 1; pass++) {
91
+
64
TCGv_i64 tmp = tcg_temp_new_i64();
92
/* 64-bit ops */
65
93
66
- neon_load_reg64(tmp, a->vm + pass);
94
#if TCG_TARGET_REG_BITS == 32
67
+ read_neon_element64(tmp, a->vm, pass, MO_64);
95
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
68
fn(tmp, cpu_env, tmp, constimm);
96
tcg_temp_free_i64(t2);
69
- neon_store_reg64(tmp, a->vd + pass);
70
+ write_neon_element64(tmp, a->vd, pass, MO_64);
71
tcg_temp_free_i64(tmp);
72
}
73
tcg_temp_free_i64(constimm);
74
@@ -XXX,XX +XXX,XX @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
75
rd = tcg_temp_new_i32();
76
77
/* Load both inputs first to avoid potential overwrite if rm == rd */
78
- neon_load_reg64(rm1, a->vm);
79
- neon_load_reg64(rm2, a->vm + 1);
80
+ read_neon_element64(rm1, a->vm, 0, MO_64);
81
+ read_neon_element64(rm2, a->vm, 1, MO_64);
82
83
shiftfn(rm1, rm1, constimm);
84
narrowfn(rd, cpu_env, rm1);
85
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
86
tcg_gen_shli_i64(tmp, tmp, a->shift);
87
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
88
}
89
- neon_store_reg64(tmp, a->vd);
90
+ write_neon_element64(tmp, a->vd, 0, MO_64);
91
92
widenfn(tmp, rm1);
93
tcg_temp_free_i32(rm1);
94
@@ -XXX,XX +XXX,XX @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
95
tcg_gen_shli_i64(tmp, tmp, a->shift);
96
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
97
}
98
- neon_store_reg64(tmp, a->vd + 1);
99
+ write_neon_element64(tmp, a->vd, 1, MO_64);
100
tcg_temp_free_i64(tmp);
101
return true;
97
}
102
}
98
103
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
99
+void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
104
rm_64 = tcg_temp_new_i64();
100
+{
105
101
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
106
if (src1_wide) {
102
+}
107
- neon_load_reg64(rn0_64, a->vn);
103
+
108
+ read_neon_element64(rn0_64, a->vn, 0, MO_64);
104
+void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
109
} else {
105
+{
110
TCGv_i32 tmp = tcg_temp_new_i32();
106
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
111
read_neon_element32(tmp, a->vn, 0, MO_32);
107
+}
112
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
108
+
113
* avoid incorrect results if a narrow input overlaps with the result.
109
+void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
114
*/
110
+{
115
if (src1_wide) {
111
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
116
- neon_load_reg64(rn1_64, a->vn + 1);
112
+}
117
+ read_neon_element64(rn1_64, a->vn, 1, MO_64);
113
+
118
} else {
114
+void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
119
TCGv_i32 tmp = tcg_temp_new_i32();
115
+{
120
read_neon_element32(tmp, a->vn, 1, MO_32);
116
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
121
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
117
+}
122
rm = tcg_temp_new_i32();
118
+
123
read_neon_element32(rm, a->vm, 1, MO_32);
119
/* Size changing operations. */
124
120
125
- neon_store_reg64(rn0_64, a->vd);
121
void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
126
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
127
128
widenfn(rm_64, rm);
129
tcg_temp_free_i32(rm);
130
opfn(rn1_64, rn1_64, rm_64);
131
- neon_store_reg64(rn1_64, a->vd + 1);
132
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
133
134
tcg_temp_free_i64(rn0_64);
135
tcg_temp_free_i64(rn1_64);
136
@@ -XXX,XX +XXX,XX @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
137
rd0 = tcg_temp_new_i32();
138
rd1 = tcg_temp_new_i32();
139
140
- neon_load_reg64(rn_64, a->vn);
141
- neon_load_reg64(rm_64, a->vm);
142
+ read_neon_element64(rn_64, a->vn, 0, MO_64);
143
+ read_neon_element64(rm_64, a->vm, 0, MO_64);
144
145
opfn(rn_64, rn_64, rm_64);
146
147
narrowfn(rd0, rn_64);
148
149
- neon_load_reg64(rn_64, a->vn + 1);
150
- neon_load_reg64(rm_64, a->vm + 1);
151
+ read_neon_element64(rn_64, a->vn, 1, MO_64);
152
+ read_neon_element64(rm_64, a->vm, 1, MO_64);
153
154
opfn(rn_64, rn_64, rm_64);
155
156
@@ -XXX,XX +XXX,XX @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
157
/* Don't store results until after all loads: they might overlap */
158
if (accfn) {
159
tmp = tcg_temp_new_i64();
160
- neon_load_reg64(tmp, a->vd);
161
+ read_neon_element64(tmp, a->vd, 0, MO_64);
162
accfn(tmp, tmp, rd0);
163
- neon_store_reg64(tmp, a->vd);
164
- neon_load_reg64(tmp, a->vd + 1);
165
+ write_neon_element64(tmp, a->vd, 0, MO_64);
166
+ read_neon_element64(tmp, a->vd, 1, MO_64);
167
accfn(tmp, tmp, rd1);
168
- neon_store_reg64(tmp, a->vd + 1);
169
+ write_neon_element64(tmp, a->vd, 1, MO_64);
170
tcg_temp_free_i64(tmp);
171
} else {
172
- neon_store_reg64(rd0, a->vd);
173
- neon_store_reg64(rd1, a->vd + 1);
174
+ write_neon_element64(rd0, a->vd, 0, MO_64);
175
+ write_neon_element64(rd1, a->vd, 1, MO_64);
176
}
177
178
tcg_temp_free_i64(rd0);
179
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
180
181
if (accfn) {
182
TCGv_i64 t64 = tcg_temp_new_i64();
183
- neon_load_reg64(t64, a->vd);
184
+ read_neon_element64(t64, a->vd, 0, MO_64);
185
accfn(t64, t64, rn0_64);
186
- neon_store_reg64(t64, a->vd);
187
- neon_load_reg64(t64, a->vd + 1);
188
+ write_neon_element64(t64, a->vd, 0, MO_64);
189
+ read_neon_element64(t64, a->vd, 1, MO_64);
190
accfn(t64, t64, rn1_64);
191
- neon_store_reg64(t64, a->vd + 1);
192
+ write_neon_element64(t64, a->vd, 1, MO_64);
193
tcg_temp_free_i64(t64);
194
} else {
195
- neon_store_reg64(rn0_64, a->vd);
196
- neon_store_reg64(rn1_64, a->vd + 1);
197
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
198
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
199
}
200
tcg_temp_free_i64(rn0_64);
201
tcg_temp_free_i64(rn1_64);
202
@@ -XXX,XX +XXX,XX @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
203
right = tcg_temp_new_i64();
204
dest = tcg_temp_new_i64();
205
206
- neon_load_reg64(right, a->vn);
207
- neon_load_reg64(left, a->vm);
208
+ read_neon_element64(right, a->vn, 0, MO_64);
209
+ read_neon_element64(left, a->vm, 0, MO_64);
210
tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
211
- neon_store_reg64(dest, a->vd);
212
+ write_neon_element64(dest, a->vd, 0, MO_64);
213
214
tcg_temp_free_i64(left);
215
tcg_temp_free_i64(right);
216
@@ -XXX,XX +XXX,XX @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
217
destright = tcg_temp_new_i64();
218
219
if (a->imm < 8) {
220
- neon_load_reg64(right, a->vn);
221
- neon_load_reg64(middle, a->vn + 1);
222
+ read_neon_element64(right, a->vn, 0, MO_64);
223
+ read_neon_element64(middle, a->vn, 1, MO_64);
224
tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
225
- neon_load_reg64(left, a->vm);
226
+ read_neon_element64(left, a->vm, 0, MO_64);
227
tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
228
} else {
229
- neon_load_reg64(right, a->vn + 1);
230
- neon_load_reg64(middle, a->vm);
231
+ read_neon_element64(right, a->vn, 1, MO_64);
232
+ read_neon_element64(middle, a->vm, 0, MO_64);
233
tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
234
- neon_load_reg64(left, a->vm + 1);
235
+ read_neon_element64(left, a->vm, 1, MO_64);
236
tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
237
}
238
239
- neon_store_reg64(destright, a->vd);
240
- neon_store_reg64(destleft, a->vd + 1);
241
+ write_neon_element64(destright, a->vd, 0, MO_64);
242
+ write_neon_element64(destleft, a->vd, 1, MO_64);
243
244
tcg_temp_free_i64(destright);
245
tcg_temp_free_i64(destleft);
246
@@ -XXX,XX +XXX,XX @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
247
248
if (accfn) {
249
TCGv_i64 tmp64 = tcg_temp_new_i64();
250
- neon_load_reg64(tmp64, a->vd + pass);
251
+ read_neon_element64(tmp64, a->vd, pass, MO_64);
252
accfn(rd_64, tmp64, rd_64);
253
tcg_temp_free_i64(tmp64);
254
}
255
- neon_store_reg64(rd_64, a->vd + pass);
256
+ write_neon_element64(rd_64, a->vd, pass, MO_64);
257
tcg_temp_free_i64(rd_64);
258
}
259
return true;
260
@@ -XXX,XX +XXX,XX @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
261
rd0 = tcg_temp_new_i32();
262
rd1 = tcg_temp_new_i32();
263
264
- neon_load_reg64(rm, a->vm);
265
+ read_neon_element64(rm, a->vm, 0, MO_64);
266
narrowfn(rd0, cpu_env, rm);
267
- neon_load_reg64(rm, a->vm + 1);
268
+ read_neon_element64(rm, a->vm, 1, MO_64);
269
narrowfn(rd1, cpu_env, rm);
270
write_neon_element32(rd0, a->vd, 0, MO_32);
271
write_neon_element32(rd1, a->vd, 1, MO_32);
272
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
273
274
widenfn(rd, rm0);
275
tcg_gen_shli_i64(rd, rd, 8 << a->size);
276
- neon_store_reg64(rd, a->vd);
277
+ write_neon_element64(rd, a->vd, 0, MO_64);
278
widenfn(rd, rm1);
279
tcg_gen_shli_i64(rd, rd, 8 << a->size);
280
- neon_store_reg64(rd, a->vd + 1);
281
+ write_neon_element64(rd, a->vd, 1, MO_64);
282
283
tcg_temp_free_i64(rd);
284
tcg_temp_free_i32(rm0);
285
@@ -XXX,XX +XXX,XX @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a)
286
rm = tcg_temp_new_i64();
287
rd = tcg_temp_new_i64();
288
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
289
- neon_load_reg64(rm, a->vm + pass);
290
- neon_load_reg64(rd, a->vd + pass);
291
- neon_store_reg64(rm, a->vd + pass);
292
- neon_store_reg64(rd, a->vm + pass);
293
+ read_neon_element64(rm, a->vm, pass, MO_64);
294
+ read_neon_element64(rd, a->vd, pass, MO_64);
295
+ write_neon_element64(rm, a->vd, pass, MO_64);
296
+ write_neon_element64(rd, a->vm, pass, MO_64);
297
}
298
tcg_temp_free_i64(rm);
299
tcg_temp_free_i64(rd);
122
--
300
--
123
2.17.0
301
2.20.1
124
302
125
303
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Given that this atomic operation will be used by both risc-v
3
The only uses of this function are for loading VFP
4
and aarch64, let's not duplicate code across the two targets.
4
double-precision values, and nothing to do with NEON.
5
5
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20201030022618.785675-10-richard.henderson@linaro.org
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20180508151437.4232-5-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++
11
target/arm/translate.c | 8 ++--
12
accel/tcg/tcg-runtime.h | 8 +++++
12
target/arm/translate-vfp.c.inc | 84 +++++++++++++++++-----------------
13
tcg/tcg-op.h | 34 ++++++++++++++++++
13
2 files changed, 46 insertions(+), 46 deletions(-)
14
tcg/tcg.h | 8 +++++
14
15
tcg/tcg-op.c | 8 +++++
15
diff --git a/target/arm/translate.c b/target/arm/translate.c
16
5 files changed, 129 insertions(+)
17
18
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/atomic_template.h
17
--- a/target/arm/translate.c
21
+++ b/accel/tcg/atomic_template.h
18
+++ b/target/arm/translate.c
22
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ static long vfp_reg_offset(bool dp, unsigned reg)
23
#elif DATA_SIZE == 8
24
# define SUFFIX q
25
# define DATA_TYPE uint64_t
26
+# define SDATA_TYPE int64_t
27
# define BSWAP bswap64
28
#elif DATA_SIZE == 4
29
# define SUFFIX l
30
# define DATA_TYPE uint32_t
31
+# define SDATA_TYPE int32_t
32
# define BSWAP bswap32
33
#elif DATA_SIZE == 2
34
# define SUFFIX w
35
# define DATA_TYPE uint16_t
36
+# define SDATA_TYPE int16_t
37
# define BSWAP bswap16
38
#elif DATA_SIZE == 1
39
# define SUFFIX b
40
# define DATA_TYPE uint8_t
41
+# define SDATA_TYPE int8_t
42
# define BSWAP
43
#else
44
# error unsupported data size
45
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(or_fetch)
46
GEN_ATOMIC_HELPER(xor_fetch)
47
48
#undef GEN_ATOMIC_HELPER
49
+
50
+/* These helpers are, as a whole, full barriers. Within the helper,
51
+ * the leading barrier is explicit and the trailing barrier is within
52
+ * cmpxchg primitive.
53
+ */
54
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
55
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
56
+ ABI_TYPE xval EXTRA_ARGS) \
57
+{ \
58
+ ATOMIC_MMU_DECLS; \
59
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
60
+ XDATA_TYPE cmp, old, new, val = xval; \
61
+ smp_mb(); \
62
+ cmp = atomic_read__nocheck(haddr); \
63
+ do { \
64
+ old = cmp; new = FN(old, val); \
65
+ cmp = atomic_cmpxchg__nocheck(haddr, old, new); \
66
+ } while (cmp != old); \
67
+ ATOMIC_MMU_CLEANUP; \
68
+ return RET; \
69
+}
70
+
71
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
72
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
73
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
74
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
75
+
76
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
77
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
78
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
79
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
80
+
81
+#undef GEN_ATOMIC_HELPER_FN
82
#endif /* DATA SIZE >= 16 */
83
84
#undef END
85
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
86
ldo = ldn;
87
}
20
}
88
}
21
}
89
+
22
90
+/* These helpers are, as a whole, full barriers. Within the helper,
23
-static inline void neon_load_reg64(TCGv_i64 var, int reg)
91
+ * the leading barrier is explicit and the trailing barrier is within
24
+static inline void vfp_load_reg64(TCGv_i64 var, int reg)
92
+ * cmpxchg primitive.
25
{
93
+ */
26
- tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
94
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
27
+ tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
95
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
28
}
96
+ ABI_TYPE xval EXTRA_ARGS) \
29
97
+{ \
30
-static inline void neon_store_reg64(TCGv_i64 var, int reg)
98
+ ATOMIC_MMU_DECLS; \
31
+static inline void vfp_store_reg64(TCGv_i64 var, int reg)
99
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
32
{
100
+ XDATA_TYPE ldo, ldn, old, new, val = xval; \
33
- tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
101
+ smp_mb(); \
34
+ tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
102
+ ldn = atomic_read__nocheck(haddr); \
35
}
103
+ do { \
36
104
+ ldo = ldn; old = BSWAP(ldo); new = FN(old, val); \
37
static inline void vfp_load_reg32(TCGv_i32 var, int reg)
105
+ ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \
38
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
106
+ } while (ldo != ldn); \
107
+ ATOMIC_MMU_CLEANUP; \
108
+ return RET; \
109
+}
110
+
111
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
112
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
113
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
114
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
115
+
116
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
117
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
118
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
119
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
120
+
121
+#undef GEN_ATOMIC_HELPER_FN
122
#endif /* DATA_SIZE >= 16 */
123
124
#undef END
125
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
126
#undef BSWAP
127
#undef ABI_TYPE
128
#undef DATA_TYPE
129
+#undef SDATA_TYPE
130
#undef SUFFIX
131
#undef DATA_SIZE
132
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
133
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
134
--- a/accel/tcg/tcg-runtime.h
40
--- a/target/arm/translate-vfp.c.inc
135
+++ b/accel/tcg/tcg-runtime.h
41
+++ b/target/arm/translate-vfp.c.inc
136
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPERS(fetch_add)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
137
GEN_ATOMIC_HELPERS(fetch_and)
43
tcg_gen_ext_i32_i64(nf, cpu_NF);
138
GEN_ATOMIC_HELPERS(fetch_or)
44
tcg_gen_ext_i32_i64(vf, cpu_VF);
139
GEN_ATOMIC_HELPERS(fetch_xor)
45
140
+GEN_ATOMIC_HELPERS(fetch_smin)
46
- neon_load_reg64(frn, rn);
141
+GEN_ATOMIC_HELPERS(fetch_umin)
47
- neon_load_reg64(frm, rm);
142
+GEN_ATOMIC_HELPERS(fetch_smax)
48
+ vfp_load_reg64(frn, rn);
143
+GEN_ATOMIC_HELPERS(fetch_umax)
49
+ vfp_load_reg64(frm, rm);
144
50
switch (a->cc) {
145
GEN_ATOMIC_HELPERS(add_fetch)
51
case 0: /* eq: Z */
146
GEN_ATOMIC_HELPERS(and_fetch)
52
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
147
GEN_ATOMIC_HELPERS(or_fetch)
53
@@ -XXX,XX +XXX,XX @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
148
GEN_ATOMIC_HELPERS(xor_fetch)
54
tcg_temp_free_i64(tmp);
149
+GEN_ATOMIC_HELPERS(smin_fetch)
55
break;
150
+GEN_ATOMIC_HELPERS(umin_fetch)
56
}
151
+GEN_ATOMIC_HELPERS(smax_fetch)
57
- neon_store_reg64(dest, rd);
152
+GEN_ATOMIC_HELPERS(umax_fetch)
58
+ vfp_store_reg64(dest, rd);
153
59
tcg_temp_free_i64(frn);
154
GEN_ATOMIC_HELPERS(xchg)
60
tcg_temp_free_i64(frm);
155
61
tcg_temp_free_i64(dest);
156
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
62
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
157
index XXXXXXX..XXXXXXX 100644
63
TCGv_i64 tcg_res;
158
--- a/tcg/tcg-op.h
64
tcg_op = tcg_temp_new_i64();
159
+++ b/tcg/tcg-op.h
65
tcg_res = tcg_temp_new_i64();
160
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
66
- neon_load_reg64(tcg_op, rm);
161
67
+ vfp_load_reg64(tcg_op, rm);
162
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
68
gen_helper_rintd(tcg_res, tcg_op, fpst);
163
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
69
- neon_store_reg64(tcg_res, rd);
164
+
70
+ vfp_store_reg64(tcg_res, rd);
165
void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
71
tcg_temp_free_i64(tcg_op);
166
void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
72
tcg_temp_free_i64(tcg_res);
167
void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
73
} else {
168
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
74
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
169
void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
75
tcg_double = tcg_temp_new_i64();
170
void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
76
tcg_res = tcg_temp_new_i64();
171
void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
77
tcg_tmp = tcg_temp_new_i32();
172
+void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
78
- neon_load_reg64(tcg_double, rm);
173
+void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
79
+ vfp_load_reg64(tcg_double, rm);
174
+void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
80
if (is_signed) {
175
+void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
81
gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
176
+void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
82
} else {
177
+void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
83
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
178
+void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
84
tmp = tcg_temp_new_i64();
179
+void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
85
if (a->l) {
180
+
86
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
181
void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
87
- neon_store_reg64(tmp, a->vd);
182
void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
88
+ vfp_store_reg64(tmp, a->vd);
183
void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
89
} else {
184
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
90
- neon_load_reg64(tmp, a->vd);
185
void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
91
+ vfp_load_reg64(tmp, a->vd);
186
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
92
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
187
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
93
}
188
+void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
94
tcg_temp_free_i64(tmp);
189
+void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
95
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
190
+void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
96
if (a->l) {
191
+void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
97
/* load */
192
+void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
98
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
193
+void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
99
- neon_store_reg64(tmp, a->vd + i);
194
+void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
100
+ vfp_store_reg64(tmp, a->vd + i);
195
+void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
101
} else {
196
102
/* store */
197
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
103
- neon_load_reg64(tmp, a->vd + i);
198
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
104
+ vfp_load_reg64(tmp, a->vd + i);
199
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
105
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
200
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
106
}
201
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
107
tcg_gen_addi_i32(addr, addr, offset);
202
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
108
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
203
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64
109
fd = tcg_temp_new_i64();
204
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64
110
fpst = fpstatus_ptr(FPST_FPCR);
205
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64
111
206
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64
112
- neon_load_reg64(f0, vn);
207
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
113
- neon_load_reg64(f1, vm);
208
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
114
+ vfp_load_reg64(f0, vn);
209
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
115
+ vfp_load_reg64(f1, vm);
210
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
116
211
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64
117
for (;;) {
212
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64
118
if (reads_vd) {
213
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
119
- neon_load_reg64(fd, vd);
214
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
120
+ vfp_load_reg64(fd, vd);
215
#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
121
}
216
#else
122
fn(fd, f0, f1, fpst);
217
#define tcg_gen_movi_tl tcg_gen_movi_i32
123
- neon_store_reg64(fd, vd);
218
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
124
+ vfp_store_reg64(fd, vd);
219
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
125
220
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
126
if (veclen == 0) {
221
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
127
break;
222
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32
128
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
223
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32
129
veclen--;
224
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32
130
vd = vfp_advance_dreg(vd, delta_d);
225
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32
131
vn = vfp_advance_dreg(vn, delta_d);
226
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
132
- neon_load_reg64(f0, vn);
227
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
133
+ vfp_load_reg64(f0, vn);
228
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
134
if (delta_m) {
229
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
135
vm = vfp_advance_dreg(vm, delta_m);
230
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32
136
- neon_load_reg64(f1, vm);
231
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32
137
+ vfp_load_reg64(f1, vm);
232
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
138
}
233
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
139
}
234
#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
140
235
#endif
141
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
236
142
f0 = tcg_temp_new_i64();
237
diff --git a/tcg/tcg.h b/tcg/tcg.h
143
fd = tcg_temp_new_i64();
238
index XXXXXXX..XXXXXXX 100644
144
239
--- a/tcg/tcg.h
145
- neon_load_reg64(f0, vm);
240
+++ b/tcg/tcg.h
146
+ vfp_load_reg64(f0, vm);
241
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(fetch_sub)
147
242
GEN_ATOMIC_HELPER_ALL(fetch_and)
148
for (;;) {
243
GEN_ATOMIC_HELPER_ALL(fetch_or)
149
fn(fd, f0);
244
GEN_ATOMIC_HELPER_ALL(fetch_xor)
150
- neon_store_reg64(fd, vd);
245
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
151
+ vfp_store_reg64(fd, vd);
246
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
152
247
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
153
if (veclen == 0) {
248
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
154
break;
249
155
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
250
GEN_ATOMIC_HELPER_ALL(add_fetch)
156
/* single source one-many */
251
GEN_ATOMIC_HELPER_ALL(sub_fetch)
157
while (veclen--) {
252
GEN_ATOMIC_HELPER_ALL(and_fetch)
158
vd = vfp_advance_dreg(vd, delta_d);
253
GEN_ATOMIC_HELPER_ALL(or_fetch)
159
- neon_store_reg64(fd, vd);
254
GEN_ATOMIC_HELPER_ALL(xor_fetch)
160
+ vfp_store_reg64(fd, vd);
255
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
161
}
256
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
162
break;
257
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
163
}
258
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
164
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
259
165
veclen--;
260
GEN_ATOMIC_HELPER_ALL(xchg)
166
vd = vfp_advance_dreg(vd, delta_d);
261
167
vd = vfp_advance_dreg(vm, delta_m);
262
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
168
- neon_load_reg64(f0, vm);
263
index XXXXXXX..XXXXXXX 100644
169
+ vfp_load_reg64(f0, vm);
264
--- a/tcg/tcg-op.c
170
}
265
+++ b/tcg/tcg-op.c
171
266
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(fetch_add, add, 0)
172
tcg_temp_free_i64(f0);
267
GEN_ATOMIC_HELPER(fetch_and, and, 0)
173
@@ -XXX,XX +XXX,XX @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
268
GEN_ATOMIC_HELPER(fetch_or, or, 0)
174
vm = tcg_temp_new_i64();
269
GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
175
vd = tcg_temp_new_i64();
270
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
176
271
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
177
- neon_load_reg64(vn, a->vn);
272
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
178
- neon_load_reg64(vm, a->vm);
273
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
179
+ vfp_load_reg64(vn, a->vn);
274
180
+ vfp_load_reg64(vm, a->vm);
275
GEN_ATOMIC_HELPER(add_fetch, add, 1)
181
if (neg_n) {
276
GEN_ATOMIC_HELPER(and_fetch, and, 1)
182
/* VFNMS, VFMS */
277
GEN_ATOMIC_HELPER(or_fetch, or, 1)
183
gen_helper_vfp_negd(vn, vn);
278
GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
184
}
279
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
185
- neon_load_reg64(vd, a->vd);
280
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
186
+ vfp_load_reg64(vd, a->vd);
281
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
187
if (neg_d) {
282
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
188
/* VFNMA, VFNMS */
283
189
gen_helper_vfp_negd(vd, vd);
284
static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
190
}
285
{
191
fpst = fpstatus_ptr(FPST_FPCR);
192
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
193
- neon_store_reg64(vd, a->vd);
194
+ vfp_store_reg64(vd, a->vd);
195
196
tcg_temp_free_ptr(fpst);
197
tcg_temp_free_i64(vn);
198
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
199
fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
200
201
for (;;) {
202
- neon_store_reg64(fd, vd);
203
+ vfp_store_reg64(fd, vd);
204
205
if (veclen == 0) {
206
break;
207
@@ -XXX,XX +XXX,XX @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
208
vd = tcg_temp_new_i64();
209
vm = tcg_temp_new_i64();
210
211
- neon_load_reg64(vd, a->vd);
212
+ vfp_load_reg64(vd, a->vd);
213
if (a->z) {
214
tcg_gen_movi_i64(vm, 0);
215
} else {
216
- neon_load_reg64(vm, a->vm);
217
+ vfp_load_reg64(vm, a->vm);
218
}
219
220
if (a->e) {
221
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
222
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
223
vd = tcg_temp_new_i64();
224
gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
225
- neon_store_reg64(vd, a->vd);
226
+ vfp_store_reg64(vd, a->vd);
227
tcg_temp_free_i32(ahp_mode);
228
tcg_temp_free_ptr(fpst);
229
tcg_temp_free_i32(tmp);
230
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
231
tmp = tcg_temp_new_i32();
232
vm = tcg_temp_new_i64();
233
234
- neon_load_reg64(vm, a->vm);
235
+ vfp_load_reg64(vm, a->vm);
236
gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
237
tcg_temp_free_i64(vm);
238
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
239
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
240
}
241
242
tmp = tcg_temp_new_i64();
243
- neon_load_reg64(tmp, a->vm);
244
+ vfp_load_reg64(tmp, a->vm);
245
fpst = fpstatus_ptr(FPST_FPCR);
246
gen_helper_rintd(tmp, tmp, fpst);
247
- neon_store_reg64(tmp, a->vd);
248
+ vfp_store_reg64(tmp, a->vd);
249
tcg_temp_free_ptr(fpst);
250
tcg_temp_free_i64(tmp);
251
return true;
252
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
253
}
254
255
tmp = tcg_temp_new_i64();
256
- neon_load_reg64(tmp, a->vm);
257
+ vfp_load_reg64(tmp, a->vm);
258
fpst = fpstatus_ptr(FPST_FPCR);
259
tcg_rmode = tcg_const_i32(float_round_to_zero);
260
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
261
gen_helper_rintd(tmp, tmp, fpst);
262
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
263
- neon_store_reg64(tmp, a->vd);
264
+ vfp_store_reg64(tmp, a->vd);
265
tcg_temp_free_ptr(fpst);
266
tcg_temp_free_i64(tmp);
267
tcg_temp_free_i32(tcg_rmode);
268
@@ -XXX,XX +XXX,XX @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
269
}
270
271
tmp = tcg_temp_new_i64();
272
- neon_load_reg64(tmp, a->vm);
273
+ vfp_load_reg64(tmp, a->vm);
274
fpst = fpstatus_ptr(FPST_FPCR);
275
gen_helper_rintd_exact(tmp, tmp, fpst);
276
- neon_store_reg64(tmp, a->vd);
277
+ vfp_store_reg64(tmp, a->vd);
278
tcg_temp_free_ptr(fpst);
279
tcg_temp_free_i64(tmp);
280
return true;
281
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
282
vd = tcg_temp_new_i64();
283
vfp_load_reg32(vm, a->vm);
284
gen_helper_vfp_fcvtds(vd, vm, cpu_env);
285
- neon_store_reg64(vd, a->vd);
286
+ vfp_store_reg64(vd, a->vd);
287
tcg_temp_free_i32(vm);
288
tcg_temp_free_i64(vd);
289
return true;
290
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
291
292
vd = tcg_temp_new_i32();
293
vm = tcg_temp_new_i64();
294
- neon_load_reg64(vm, a->vm);
295
+ vfp_load_reg64(vm, a->vm);
296
gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
297
vfp_store_reg32(vd, a->vd);
298
tcg_temp_free_i32(vd);
299
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
300
/* u32 -> f64 */
301
gen_helper_vfp_uitod(vd, vm, fpst);
302
}
303
- neon_store_reg64(vd, a->vd);
304
+ vfp_store_reg64(vd, a->vd);
305
tcg_temp_free_i32(vm);
306
tcg_temp_free_i64(vd);
307
tcg_temp_free_ptr(fpst);
308
@@ -XXX,XX +XXX,XX @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
309
310
vm = tcg_temp_new_i64();
311
vd = tcg_temp_new_i32();
312
- neon_load_reg64(vm, a->vm);
313
+ vfp_load_reg64(vm, a->vm);
314
gen_helper_vjcvt(vd, vm, cpu_env);
315
vfp_store_reg32(vd, a->vd);
316
tcg_temp_free_i64(vm);
317
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
318
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
319
320
vd = tcg_temp_new_i64();
321
- neon_load_reg64(vd, a->vd);
322
+ vfp_load_reg64(vd, a->vd);
323
324
fpst = fpstatus_ptr(FPST_FPCR);
325
shift = tcg_const_i32(frac_bits);
326
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
327
g_assert_not_reached();
328
}
329
330
- neon_store_reg64(vd, a->vd);
331
+ vfp_store_reg64(vd, a->vd);
332
tcg_temp_free_i64(vd);
333
tcg_temp_free_i32(shift);
334
tcg_temp_free_ptr(fpst);
335
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
336
fpst = fpstatus_ptr(FPST_FPCR);
337
vm = tcg_temp_new_i64();
338
vd = tcg_temp_new_i32();
339
- neon_load_reg64(vm, a->vm);
340
+ vfp_load_reg64(vm, a->vm);
341
342
if (a->s) {
343
if (a->rz) {
286
--
344
--
287
2.17.0
345
2.20.1
288
346
289
347
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Reviewed-by: Michael Clark <mjc@sifive.com>
3
In both cases, we can sink the write-back and perform
4
the accumulate into the normal destination temps.
5
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20180508151437.4232-7-richard.henderson@linaro.org
7
Message-id: 20201030022618.785675-11-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
10
---
8
target/riscv/translate.c | 72 +++++++++++-----------------------------
11
target/arm/translate-neon.c.inc | 23 +++++++++--------------
9
1 file changed, 20 insertions(+), 52 deletions(-)
12
1 file changed, 9 insertions(+), 14 deletions(-)
10
13
11
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
14
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/target/riscv/translate.c
16
--- a/target/arm/translate-neon.c.inc
14
+++ b/target/riscv/translate.c
17
+++ b/target/arm/translate-neon.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
18
@@ -XXX,XX +XXX,XX @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
16
TCGv src1, src2, dat;
19
if (accfn) {
17
TCGLabel *l1, *l2;
20
tmp = tcg_temp_new_i64();
18
TCGMemOp mop;
21
read_neon_element64(tmp, a->vd, 0, MO_64);
19
- TCGCond cond;
22
- accfn(tmp, tmp, rd0);
20
bool aq, rl;
23
- write_neon_element64(tmp, a->vd, 0, MO_64);
21
24
+ accfn(rd0, tmp, rd0);
22
/* Extract the size of the atomic operation. */
25
read_neon_element64(tmp, a->vd, 1, MO_64);
23
@@ -XXX,XX +XXX,XX @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
26
- accfn(tmp, tmp, rd1);
24
tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop);
27
- write_neon_element64(tmp, a->vd, 1, MO_64);
25
gen_set_gpr(rd, src2);
28
+ accfn(rd1, tmp, rd1);
26
break;
29
tcg_temp_free_i64(tmp);
27
-
30
- } else {
28
case OPC_RISC_AMOMIN:
31
- write_neon_element64(rd0, a->vd, 0, MO_64);
29
- cond = TCG_COND_LT;
32
- write_neon_element64(rd1, a->vd, 1, MO_64);
30
- goto do_minmax;
33
}
31
- case OPC_RISC_AMOMAX:
34
32
- cond = TCG_COND_GT;
35
+ write_neon_element64(rd0, a->vd, 0, MO_64);
33
- goto do_minmax;
36
+ write_neon_element64(rd1, a->vd, 1, MO_64);
34
- case OPC_RISC_AMOMINU:
37
tcg_temp_free_i64(rd0);
35
- cond = TCG_COND_LTU;
38
tcg_temp_free_i64(rd1);
36
- goto do_minmax;
39
37
- case OPC_RISC_AMOMAXU:
40
@@ -XXX,XX +XXX,XX @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
38
- cond = TCG_COND_GTU;
41
if (accfn) {
39
- goto do_minmax;
42
TCGv_i64 t64 = tcg_temp_new_i64();
40
- do_minmax:
43
read_neon_element64(t64, a->vd, 0, MO_64);
41
- /* Handle the RL barrier. The AQ barrier is handled along the
44
- accfn(t64, t64, rn0_64);
42
- parallel path by the SC atomic cmpxchg. On the serial path,
45
- write_neon_element64(t64, a->vd, 0, MO_64);
43
- of course, barriers do not matter. */
46
+ accfn(rn0_64, t64, rn0_64);
44
- if (rl) {
47
read_neon_element64(t64, a->vd, 1, MO_64);
45
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
48
- accfn(t64, t64, rn1_64);
46
- }
49
- write_neon_element64(t64, a->vd, 1, MO_64);
47
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
50
+ accfn(rn1_64, t64, rn1_64);
48
- l1 = gen_new_label();
51
tcg_temp_free_i64(t64);
49
- gen_set_label(l1);
52
- } else {
50
- } else {
53
- write_neon_element64(rn0_64, a->vd, 0, MO_64);
51
- l1 = NULL;
54
- write_neon_element64(rn1_64, a->vd, 1, MO_64);
52
- }
55
}
53
-
56
+
54
gen_get_gpr(src1, rs1);
57
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
55
gen_get_gpr(src2, rs2);
58
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
56
- if ((mop & MO_SSIZE) == MO_SL) {
59
tcg_temp_free_i64(rn0_64);
57
- /* Sign-extend the register comparison input. */
60
tcg_temp_free_i64(rn1_64);
58
- tcg_gen_ext32s_tl(src2, src2);
61
return true;
59
- }
60
- dat = tcg_temp_local_new();
61
- tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop);
62
- tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2);
63
-
64
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
65
- /* Parallel context. Make this operation atomic by verifying
66
- that the memory didn't change while we computed the result. */
67
- tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop);
68
-
69
- /* If the cmpxchg failed, retry. */
70
- /* ??? There is an assumption here that this will eventually
71
- succeed, such that we don't live-lock. This is not unlike
72
- a similar loop that the compiler would generate for e.g.
73
- __atomic_fetch_and_xor, so don't worry about it. */
74
- tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1);
75
- } else {
76
- /* Serial context. Directly store the result. */
77
- tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop);
78
- }
79
- gen_set_gpr(rd, dat);
80
- tcg_temp_free(dat);
81
+ tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop);
82
+ gen_set_gpr(rd, src2);
83
+ break;
84
+ case OPC_RISC_AMOMAX:
85
+ gen_get_gpr(src1, rs1);
86
+ gen_get_gpr(src2, rs2);
87
+ tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop);
88
+ gen_set_gpr(rd, src2);
89
+ break;
90
+ case OPC_RISC_AMOMINU:
91
+ gen_get_gpr(src1, rs1);
92
+ gen_get_gpr(src2, rs2);
93
+ tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop);
94
+ gen_set_gpr(rd, src2);
95
+ break;
96
+ case OPC_RISC_AMOMAXU:
97
+ gen_get_gpr(src1, rs1);
98
+ gen_get_gpr(src2, rs2);
99
+ tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop);
100
+ gen_set_gpr(rd, src2);
101
break;
102
103
default:
104
--
62
--
105
2.17.0
63
2.20.1
106
64
107
65
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This implements all of the v8.1-Atomics instructions except
3
We can use proper widening loads to extend 32-bit inputs,
4
for compare-and-swap, which is decoded elsewhere.
4
and skip the "widenfn" step.
5
5
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20201030022618.785675-12-richard.henderson@linaro.org
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20180508151437.4232-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
11
target/arm/translate.c | 6 +++
12
1 file changed, 36 insertions(+), 2 deletions(-)
12
target/arm/translate-neon.c.inc | 66 ++++++++++++++++++---------------
13
2 files changed, 43 insertions(+), 29 deletions(-)
13
14
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
15
diff --git a/target/arm/translate.c b/target/arm/translate.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-a64.c
17
--- a/target/arm/translate.c
17
+++ b/target/arm/translate-a64.c
18
+++ b/target/arm/translate.c
18
@@ -XXX,XX +XXX,XX @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
19
@@ -XXX,XX +XXX,XX @@ static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
19
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
20
long off = neon_element_offset(reg, ele, memop);
20
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
21
21
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
22
switch (memop) {
22
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
23
+ case MO_SL:
23
24
+ tcg_gen_ld32s_i64(dest, cpu_env, off);
24
/* Note that the gvec expanders operate on offsets + sizes. */
25
+ break;
25
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
26
+ case MO_UL:
26
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
27
+ tcg_gen_ld32u_i64(dest, cpu_env, off);
27
int rn = extract32(insn, 5, 5);
28
+ break;
28
int o3_opc = extract32(insn, 12, 4);
29
case MO_Q:
29
int feature = ARM_FEATURE_V8_ATOMICS;
30
tcg_gen_ld_i64(dest, cpu_env, off);
30
+ TCGv_i64 tcg_rn, tcg_rs;
31
break;
31
+ AtomicThreeOpFn *fn;
32
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
32
33
index XXXXXXX..XXXXXXX 100644
33
if (is_vector) {
34
--- a/target/arm/translate-neon.c.inc
34
unallocated_encoding(s);
35
+++ b/target/arm/translate-neon.c.inc
35
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
36
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
37
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
38
NeonGenWidenFn *widenfn,
39
NeonGenTwo64OpFn *opfn,
40
- bool src1_wide)
41
+ int src1_mop, int src2_mop)
42
{
43
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
44
TCGv_i64 rn0_64, rn1_64, rm_64;
45
- TCGv_i32 rm;
46
47
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
48
return false;
49
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
50
return false;
36
}
51
}
37
switch (o3_opc) {
52
38
case 000: /* LDADD */
53
- if (!widenfn || !opfn) {
39
+ fn = tcg_gen_atomic_fetch_add_i64;
54
+ if (!opfn) {
40
+ break;
55
/* size == 3 case, which is an entirely different insn group */
41
case 001: /* LDCLR */
56
return false;
42
+ fn = tcg_gen_atomic_fetch_and_i64;
43
+ break;
44
case 002: /* LDEOR */
45
+ fn = tcg_gen_atomic_fetch_xor_i64;
46
+ break;
47
case 003: /* LDSET */
48
+ fn = tcg_gen_atomic_fetch_or_i64;
49
+ break;
50
case 004: /* LDSMAX */
51
+ fn = tcg_gen_atomic_fetch_smax_i64;
52
+ break;
53
case 005: /* LDSMIN */
54
+ fn = tcg_gen_atomic_fetch_smin_i64;
55
+ break;
56
case 006: /* LDUMAX */
57
+ fn = tcg_gen_atomic_fetch_umax_i64;
58
+ break;
59
case 007: /* LDUMIN */
60
+ fn = tcg_gen_atomic_fetch_umin_i64;
61
+ break;
62
case 010: /* SWP */
63
+ fn = tcg_gen_atomic_xchg_i64;
64
+ break;
65
default:
66
unallocated_encoding(s);
67
return;
68
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
69
return;
70
}
57
}
71
58
72
- (void)rs;
59
- if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
73
- (void)rn;
60
+ if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
74
+ if (rn == 31) {
61
return false;
75
+ gen_check_sp_alignment(s);
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
65
rn1_64 = tcg_temp_new_i64();
66
rm_64 = tcg_temp_new_i64();
67
68
- if (src1_wide) {
69
- read_neon_element64(rn0_64, a->vn, 0, MO_64);
70
+ if (src1_mop >= 0) {
71
+ read_neon_element64(rn0_64, a->vn, 0, src1_mop);
72
} else {
73
TCGv_i32 tmp = tcg_temp_new_i32();
74
read_neon_element32(tmp, a->vn, 0, MO_32);
75
widenfn(rn0_64, tmp);
76
tcg_temp_free_i32(tmp);
77
}
78
- rm = tcg_temp_new_i32();
79
- read_neon_element32(rm, a->vm, 0, MO_32);
80
+ if (src2_mop >= 0) {
81
+ read_neon_element64(rm_64, a->vm, 0, src2_mop);
82
+ } else {
83
+ TCGv_i32 tmp = tcg_temp_new_i32();
84
+ read_neon_element32(tmp, a->vm, 0, MO_32);
85
+ widenfn(rm_64, tmp);
86
+ tcg_temp_free_i32(tmp);
76
+ }
87
+ }
77
+ tcg_rn = cpu_reg_sp(s, rn);
88
78
+ tcg_rs = read_cpu_reg(s, rs, true);
89
- widenfn(rm_64, rm);
79
+
90
- tcg_temp_free_i32(rm);
80
+ if (o3_opc == 1) { /* LDCLR */
91
opfn(rn0_64, rn0_64, rm_64);
81
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
92
93
/*
94
* Load second pass inputs before storing the first pass result, to
95
* avoid incorrect results if a narrow input overlaps with the result.
96
*/
97
- if (src1_wide) {
98
- read_neon_element64(rn1_64, a->vn, 1, MO_64);
99
+ if (src1_mop >= 0) {
100
+ read_neon_element64(rn1_64, a->vn, 1, src1_mop);
101
} else {
102
TCGv_i32 tmp = tcg_temp_new_i32();
103
read_neon_element32(tmp, a->vn, 1, MO_32);
104
widenfn(rn1_64, tmp);
105
tcg_temp_free_i32(tmp);
106
}
107
- rm = tcg_temp_new_i32();
108
- read_neon_element32(rm, a->vm, 1, MO_32);
109
+ if (src2_mop >= 0) {
110
+ read_neon_element64(rm_64, a->vm, 1, src2_mop);
111
+ } else {
112
+ TCGv_i32 tmp = tcg_temp_new_i32();
113
+ read_neon_element32(tmp, a->vm, 1, MO_32);
114
+ widenfn(rm_64, tmp);
115
+ tcg_temp_free_i32(tmp);
82
+ }
116
+ }
83
+
117
84
+ /* The tcg atomic primitives are all full barriers. Therefore we
118
write_neon_element64(rn0_64, a->vd, 0, MO_64);
85
+ * can ignore the Acquire and Release bits of this instruction.
119
86
+ */
120
- widenfn(rm_64, rm);
87
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
121
- tcg_temp_free_i32(rm);
88
+ s->be_data | size | MO_ALIGN);
122
opfn(rn1_64, rn1_64, rm_64);
123
write_neon_element64(rn1_64, a->vd, 1, MO_64);
124
125
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
126
return true;
89
}
127
}
90
128
91
/* Load/store register (all forms) */
129
-#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
130
+#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
131
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
132
{ \
133
static NeonGenWidenFn * const widenfn[] = { \
134
gen_helper_neon_widen_##S##8, \
135
gen_helper_neon_widen_##S##16, \
136
- tcg_gen_##EXT##_i32_i64, \
137
- NULL, \
138
+ NULL, NULL, \
139
}; \
140
static NeonGenTwo64OpFn * const addfn[] = { \
141
gen_helper_neon_##OP##l_u16, \
142
@@ -XXX,XX +XXX,XX @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
143
tcg_gen_##OP##_i64, \
144
NULL, \
145
}; \
146
- return do_prewiden_3d(s, a, widenfn[a->size], \
147
- addfn[a->size], SRC1WIDE); \
148
+ int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
149
+ return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
150
+ SRC1WIDE ? MO_Q : narrow_mop, \
151
+ narrow_mop); \
152
}
153
154
-DO_PREWIDEN(VADDL_S, s, ext, add, false)
155
-DO_PREWIDEN(VADDL_U, u, extu, add, false)
156
-DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
157
-DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
158
-DO_PREWIDEN(VADDW_S, s, ext, add, true)
159
-DO_PREWIDEN(VADDW_U, u, extu, add, true)
160
-DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
161
-DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
162
+DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
163
+DO_PREWIDEN(VADDL_U, u, add, false, 0)
164
+DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
165
+DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
166
+DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
167
+DO_PREWIDEN(VADDW_U, u, add, true, 0)
168
+DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
169
+DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
170
171
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
172
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
92
--
173
--
93
2.17.0
174
2.20.1
94
175
95
176
diff view generated by jsdifflib
1
It is implementation defined whether a multiply-add of
1
In the neon_padd/pmax/pmin helpers for float16, a cut-and-paste error
2
(0,inf,qnan) or (inf,0,qnan) raises InvalidaOperation or
2
meant we were using the H4() address swizzler macro rather than the
3
not, so we let the target-specific pickNaNMulAdd function
3
H2() which is required for 2-byte data. This had no effect on
4
handle this. This means that we must do the "return the
4
little-endian hosts but meant we put the result data into the
5
default NaN in default NaN mode" check after the call,
5
destination Dreg in the wrong order on big-endian hosts.
6
not before. Correct the ordering, and restore the comment
7
from the old propagateFloat64MulAddNaN() that warned about
8
this corner case.
9
6
10
This fixes a regression from 2.11 for Arm guests where we would
11
incorrectly fail to set the Invalid flag for these cases.
12
13
Cc: qemu-stable@nongnu.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
17
Tested-by: Alex Bennée <alex.bennee@linaro.org>
10
Message-id: 20201028191712.4910-2-peter.maydell@linaro.org
18
Message-id: 20180504100547.14621-1-peter.maydell@linaro.org
19
---
11
---
20
fpu/softfloat.c | 52 ++++++++++++++++++++++++++++---------------------
12
target/arm/vec_helper.c | 8 ++++----
21
1 file changed, 30 insertions(+), 22 deletions(-)
13
1 file changed, 4 insertions(+), 4 deletions(-)
22
14
23
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
15
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
24
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
25
--- a/fpu/softfloat.c
17
--- a/target/arm/vec_helper.c
26
+++ b/fpu/softfloat.c
18
+++ b/target/arm/vec_helper.c
27
@@ -XXX,XX +XXX,XX @@ static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
19
@@ -XXX,XX +XXX,XX @@ DO_ABA(gvec_uaba_d, uint64_t)
28
static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
20
r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
29
bool inf_zero, float_status *s)
21
r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
30
{
22
\
31
+ int which;
23
- d[H4(0)] = r0; \
32
+
24
- d[H4(1)] = r1; \
33
if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
25
- d[H4(2)] = r2; \
34
s->float_exception_flags |= float_flag_invalid;
26
- d[H4(3)] = r3; \
27
+ d[H2(0)] = r0; \
28
+ d[H2(1)] = r1; \
29
+ d[H2(2)] = r2; \
30
+ d[H2(3)] = r3; \
35
}
31
}
36
32
37
- if (s->default_nan_mode) {
33
DO_NEON_PAIRWISE(neon_padd, add)
38
- a.cls = float_class_dnan;
39
- } else {
40
- switch (pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
41
- is_qnan(b.cls), is_snan(b.cls),
42
- is_qnan(c.cls), is_snan(c.cls),
43
- inf_zero, s)) {
44
- case 0:
45
- break;
46
- case 1:
47
- a = b;
48
- break;
49
- case 2:
50
- a = c;
51
- break;
52
- case 3:
53
- a.cls = float_class_dnan;
54
- return a;
55
- default:
56
- g_assert_not_reached();
57
- }
58
+ which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
59
+ is_qnan(b.cls), is_snan(b.cls),
60
+ is_qnan(c.cls), is_snan(c.cls),
61
+ inf_zero, s);
62
63
- a.cls = float_class_msnan;
64
+ if (s->default_nan_mode) {
65
+ /* Note that this check is after pickNaNMulAdd so that function
66
+ * has an opportunity to set the Invalid flag.
67
+ */
68
+ a.cls = float_class_dnan;
69
+ return a;
70
}
71
+
72
+ switch (which) {
73
+ case 0:
74
+ break;
75
+ case 1:
76
+ a = b;
77
+ break;
78
+ case 2:
79
+ a = c;
80
+ break;
81
+ case 3:
82
+ a.cls = float_class_dnan;
83
+ return a;
84
+ default:
85
+ g_assert_not_reached();
86
+ }
87
+ a.cls = float_class_msnan;
88
+
89
return a;
90
}
91
92
--
34
--
93
2.17.0
35
2.20.1
94
36
95
37
diff view generated by jsdifflib
1
Some versions of gcc produce a spurious warning if the result of
1
The helper functions for performing the udot/sdot operations against
2
__atomic_compare_echange_n() is not used and the type involved
2
a scalar were not using an address-swizzling macro when converting
3
is a signed 8 bit value:
3
the index of the scalar element into a pointer into the vm array.
4
error: value computed is not used [-Werror=unused-value]
4
This had no effect on little-endian hosts but meant we generated
5
This has been seen on at least
5
incorrect results on big-endian hosts.
6
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
7
6
8
Work around this by using an explicit cast to void to indicate
7
For these insns, the index is indexing over group of 4 8-bit values,
9
that we don't care about the return value.
8
so 32 bits per indexed entity, and H4() is therefore what we want.
10
9
(For Neon the only possible input indexes are 0 and 1.)
11
We don't currently use our atomic_cmpxchg() macro on any signed
12
8 bit types, but the upcoming support for the Arm v8.1-Atomics
13
will require it.
14
10
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Message-id: 20201028191712.4910-3-peter.maydell@linaro.org
18
---
15
---
19
include/qemu/atomic.h | 2 +-
16
target/arm/vec_helper.c | 4 ++--
20
1 file changed, 1 insertion(+), 1 deletion(-)
17
1 file changed, 2 insertions(+), 2 deletions(-)
21
18
22
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
19
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
23
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/atomic.h
21
--- a/target/arm/vec_helper.c
25
+++ b/include/qemu/atomic.h
22
+++ b/target/arm/vec_helper.c
26
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
27
/* Returns the eventual value, failed or not */
24
intptr_t index = simd_data(desc);
28
#define atomic_cmpxchg__nocheck(ptr, old, new) ({ \
25
uint32_t *d = vd;
29
typeof_strip_qual(*ptr) _old = (old); \
26
int8_t *n = vn;
30
- __atomic_compare_exchange_n(ptr, &_old, new, false, \
27
- int8_t *m_indexed = (int8_t *)vm + index * 4;
31
+ (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \
28
+ int8_t *m_indexed = (int8_t *)vm + H4(index) * 4;
32
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
29
33
_old; \
30
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
34
})
31
* Otherwise opr_sz is a multiple of 16.
32
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
33
intptr_t index = simd_data(desc);
34
uint32_t *d = vd;
35
uint8_t *n = vn;
36
- uint8_t *m_indexed = (uint8_t *)vm + index * 4;
37
+ uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4;
38
39
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
40
* Otherwise opr_sz is a multiple of 16.
35
--
41
--
36
2.17.0
42
2.20.1
37
43
38
44
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
2
2
3
The instruction "ucvtf v0.4h, v04h, #2", with input 0x8000u,
3
HCR should be applied when NS is set, not when it is cleared.
4
overflows the intermediate float16 to infinity before we have a
5
chance to scale the output. Use float64 as the intermediate type
6
so that no input argument (uint32_t in this case) can overflow
7
or round before scaling. Given the declared argument, the signed
8
int32_t function has the same problem.
9
4
10
When converting from float16 to integer, using u/int32_t instead
5
Signed-off-by: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
11
of u/int16_t means that the bounding is incorrect.
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
13
Cc: qemu-stable@nongnu.org
14
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
Message-id: 20180502221552.3873-4-richard.henderson@linaro.org
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
8
---
19
target/arm/helper.h | 4 +--
9
target/arm/helper.c | 5 ++---
20
target/arm/helper.c | 53 ++++++++++++++++++++++++++++++++++++--
10
1 file changed, 2 insertions(+), 3 deletions(-)
21
target/arm/translate-a64.c | 4 +--
22
3 files changed, 55 insertions(+), 6 deletions(-)
23
11
24
diff --git a/target/arm/helper.h b/target/arm/helper.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/helper.h
27
+++ b/target/arm/helper.h
28
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
29
DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
30
DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
31
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
32
-DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
33
-DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
34
+DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
35
+DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
36
DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
37
DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
38
DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
39
diff --git a/target/arm/helper.c b/target/arm/helper.c
12
diff --git a/target/arm/helper.c b/target/arm/helper.c
40
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/helper.c
14
--- a/target/arm/helper.c
42
+++ b/target/arm/helper.c
15
+++ b/target/arm/helper.c
43
@@ -XXX,XX +XXX,XX @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
16
@@ -XXX,XX +XXX,XX @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
44
VFP_CONV_FIX(uh, s, 32, 32, uint16)
17
45
VFP_CONV_FIX(ul, s, 32, 32, uint32)
18
/*
46
VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
19
* Non-IS variants of TLB operations are upgraded to
47
-VFP_CONV_FIX_A64(sl, h, 16, 32, int32)
20
- * IS versions if we are at NS EL1 and HCR_EL2.FB is set to
48
-VFP_CONV_FIX_A64(ul, h, 16, 32, uint32)
21
+ * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
49
+
22
* force broadcast of these operations.
50
#undef VFP_CONV_FIX
23
*/
51
#undef VFP_CONV_FIX_FLOAT
24
static bool tlb_force_broadcast(CPUARMState *env)
52
#undef VFP_CONV_FLOAT_FIX_ROUND
25
{
53
+#undef VFP_CONV_FIX_A64
26
- return (env->cp15.hcr_el2 & HCR_FB) &&
54
+
27
- arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
55
+/* Conversion to/from f16 can overflow to infinity before/after scaling.
28
+ return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
56
+ * Therefore we convert to f64 (which does not round), scale,
29
}
57
+ * and then convert f64 to f16 (which may round).
30
58
+ */
31
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
59
+
60
+static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
61
+{
62
+ return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
63
+}
64
+
65
+float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
66
+{
67
+ return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
68
+}
69
+
70
+float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
71
+{
72
+ return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
73
+}
74
+
75
+static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
76
+{
77
+ if (unlikely(float16_is_any_nan(f))) {
78
+ float_raise(float_flag_invalid, fpst);
79
+ return 0;
80
+ } else {
81
+ int old_exc_flags = get_float_exception_flags(fpst);
82
+ float64 ret;
83
+
84
+ ret = float16_to_float64(f, true, fpst);
85
+ ret = float64_scalbn(ret, shift, fpst);
86
+ old_exc_flags |= get_float_exception_flags(fpst)
87
+ & float_flag_input_denormal;
88
+ set_float_exception_flags(old_exc_flags, fpst);
89
+
90
+ return ret;
91
+ }
92
+}
93
+
94
+uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst)
95
+{
96
+ return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
97
+}
98
+
99
+uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
100
+{
101
+ return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
102
+}
103
104
/* Set the current fp rounding mode and return the old one.
105
* The argument is a softfloat float_round_ value.
106
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/translate-a64.c
109
+++ b/target/arm/translate-a64.c
110
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
111
switch (size) {
112
case MO_16:
113
if (is_u) {
114
- fn = gen_helper_vfp_toulh;
115
+ fn = gen_helper_vfp_touhh;
116
} else {
117
- fn = gen_helper_vfp_toslh;
118
+ fn = gen_helper_vfp_toshh;
119
}
120
break;
121
case MO_32:
122
--
32
--
123
2.17.0
33
2.20.1
124
34
125
35
diff view generated by jsdifflib
1
From: Igor Mammedov <imammedo@redhat.com>
1
From: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
2
2
3
platform-bus were using machine_done notifier to get and map
3
Secure mode is not exempted from checking SCR_EL3.TLOR, and in the
4
(assign irq/mmio resources) dynamically added sysbus devices
4
future HCR_EL2.TLOR when S-EL2 is enabled.
5
after all '-device' options had been processed.
6
That however creates non obvious dependencies on ordering of
7
machine_done notifiers and requires carefull line juggling
8
to keep it working. For example see comment above
9
create_platform_bus() and 'straitforward' arm_load_kernel()
10
had to converted to machine_done notifier and that lead to
11
yet another machine_done notifier to keep it working
12
arm_register_platform_bus_fdt_creator().
13
5
14
Instead of hiding resource assignment in platform-bus-device
6
Signed-off-by: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
15
to magically initialize sysbus devices, use device plug
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
16
callback and assign resources explicitly at board level
17
at the moment each -device option is being processed.
18
19
That adds a bunch of machine declaration boiler plate to
20
e500plat board, similar to ARM/x86 but gets rid of hidden
21
machine_done notifier and would allow to remove the dependent
22
notifiers in ARM code simplifying it and making code flow
23
easier to follow.
24
25
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
26
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
27
Acked-by: David Gibson <david@gibson.dropbear.id.au>
28
Message-id: 1525691524-32265-3-git-send-email-imammedo@redhat.com
29
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
30
---
9
---
31
hw/ppc/e500.h | 5 +++++
10
target/arm/helper.c | 19 +++++--------------
32
include/hw/arm/virt.h | 1 +
11
1 file changed, 5 insertions(+), 14 deletions(-)
33
include/hw/platform-bus.h | 4 ++--
34
hw/arm/sysbus-fdt.c | 3 ---
35
hw/arm/virt.c | 31 +++++++++++++++++++++++++++++++
36
hw/core/platform-bus.c | 29 +++++------------------------
37
hw/ppc/e500.c | 38 +++++++++++++++++---------------------
38
hw/ppc/e500plat.c | 31 +++++++++++++++++++++++++++++++
39
8 files changed, 92 insertions(+), 50 deletions(-)
40
12
41
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
13
diff --git a/target/arm/helper.c b/target/arm/helper.c
42
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/ppc/e500.h
15
--- a/target/arm/helper.c
44
+++ b/hw/ppc/e500.h
16
+++ b/target/arm/helper.c
45
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
46
#define PPCE500_H
18
#endif
47
19
48
#include "hw/boards.h"
20
/* Shared logic between LORID and the rest of the LOR* registers.
49
+#include "hw/platform-bus.h"
21
- * Secure state has already been delt with.
50
22
+ * Secure state exclusion has already been dealt with.
51
typedef struct PPCE500MachineState {
23
*/
52
/*< private >*/
24
-static CPAccessResult access_lor_ns(CPUARMState *env)
53
MachineState parent_obj;
25
+static CPAccessResult access_lor_ns(CPUARMState *env,
54
26
+ const ARMCPRegInfo *ri, bool isread)
55
+ /* points to instance of TYPE_PLATFORM_BUS_DEVICE if
27
{
56
+ * board supports dynamic sysbus devices
28
int el = arm_current_el(env);
57
+ */
29
58
+ PlatformBusDevice *pbus_dev;
30
@@ -XXX,XX +XXX,XX @@ static CPAccessResult access_lor_ns(CPUARMState *env)
59
} PPCE500MachineState;
31
return CP_ACCESS_OK;
60
61
typedef struct PPCE500MachineClass {
62
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
63
index XXXXXXX..XXXXXXX 100644
64
--- a/include/hw/arm/virt.h
65
+++ b/include/hw/arm/virt.h
66
@@ -XXX,XX +XXX,XX @@ typedef struct {
67
typedef struct {
68
MachineState parent;
69
Notifier machine_done;
70
+ DeviceState *platform_bus_dev;
71
FWCfgState *fw_cfg;
72
bool secure;
73
bool highmem;
74
diff --git a/include/hw/platform-bus.h b/include/hw/platform-bus.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/include/hw/platform-bus.h
77
+++ b/include/hw/platform-bus.h
78
@@ -XXX,XX +XXX,XX @@ typedef struct PlatformBusDevice PlatformBusDevice;
79
struct PlatformBusDevice {
80
/*< private >*/
81
SysBusDevice parent_obj;
82
- Notifier notifier;
83
- bool done_gathering;
84
85
/*< public >*/
86
uint32_t mmio_size;
87
@@ -XXX,XX +XXX,XX @@ int platform_bus_get_irqn(PlatformBusDevice *platform_bus, SysBusDevice *sbdev,
88
hwaddr platform_bus_get_mmio_addr(PlatformBusDevice *pbus, SysBusDevice *sbdev,
89
int n);
90
91
+void platform_bus_link_device(PlatformBusDevice *pbus, SysBusDevice *sbdev);
92
+
93
#endif /* HW_PLATFORM_BUS_H */
94
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/arm/sysbus-fdt.c
97
+++ b/hw/arm/sysbus-fdt.c
98
@@ -XXX,XX +XXX,XX @@ static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params)
99
dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE);
100
pbus = PLATFORM_BUS_DEVICE(dev);
101
102
- /* We can only create dt nodes for dynamic devices when they're ready */
103
- assert(pbus->done_gathering);
104
-
105
PlatformBusFDTData data = {
106
.fdt = fdt,
107
.irq_start = irq_start,
108
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/hw/arm/virt.c
111
+++ b/hw/arm/virt.c
112
@@ -XXX,XX +XXX,XX @@ static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
113
qdev_prop_set_uint32(dev, "mmio_size",
114
platform_bus_params.platform_bus_size);
115
qdev_init_nofail(dev);
116
+ vms->platform_bus_dev = dev;
117
s = SYS_BUS_DEVICE(dev);
118
119
for (i = 0; i < platform_bus_params.platform_bus_num_irqs; i++) {
120
@@ -XXX,XX +XXX,XX @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
121
return ms->possible_cpus;
122
}
32
}
123
33
124
+static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
34
-static CPAccessResult access_lorid(CPUARMState *env, const ARMCPRegInfo *ri,
125
+ DeviceState *dev, Error **errp)
35
- bool isread)
126
+{
127
+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
128
+
129
+ if (vms->platform_bus_dev) {
130
+ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
131
+ platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
132
+ SYS_BUS_DEVICE(dev));
133
+ }
134
+ }
135
+}
136
+
137
+static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
138
+ DeviceState *dev)
139
+{
140
+ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
141
+ return HOTPLUG_HANDLER(machine);
142
+ }
143
+
144
+ return NULL;
145
+}
146
+
147
static void virt_machine_class_init(ObjectClass *oc, void *data)
148
{
149
MachineClass *mc = MACHINE_CLASS(oc);
150
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
151
152
mc->init = machvirt_init;
153
/* Start max_cpus at the maximum QEMU supports. We'll further restrict
154
@@ -XXX,XX +XXX,XX @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
155
mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
156
mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
157
mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
158
+ mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
159
+ hc->plug = virt_machine_device_plug_cb;
160
}
161
162
static const TypeInfo virt_machine_info = {
163
@@ -XXX,XX +XXX,XX @@ static const TypeInfo virt_machine_info = {
164
.instance_size = sizeof(VirtMachineState),
165
.class_size = sizeof(VirtMachineClass),
166
.class_init = virt_machine_class_init,
167
+ .interfaces = (InterfaceInfo[]) {
168
+ { TYPE_HOTPLUG_HANDLER },
169
+ { }
170
+ },
171
};
172
173
static void machvirt_machine_init(void)
174
diff --git a/hw/core/platform-bus.c b/hw/core/platform-bus.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/hw/core/platform-bus.c
177
+++ b/hw/core/platform-bus.c
178
@@ -XXX,XX +XXX,XX @@ static void plaform_bus_refresh_irqs(PlatformBusDevice *pbus)
179
{
180
bitmap_zero(pbus->used_irqs, pbus->num_irqs);
181
foreach_dynamic_sysbus_device(platform_bus_count_irqs, pbus);
182
- pbus->done_gathering = true;
183
}
184
185
static void platform_bus_map_irq(PlatformBusDevice *pbus, SysBusDevice *sbdev,
186
@@ -XXX,XX +XXX,XX @@ static void platform_bus_map_mmio(PlatformBusDevice *pbus, SysBusDevice *sbdev,
187
}
188
189
/*
190
- * For each sysbus device, look for unassigned IRQ lines as well as
191
- * unassociated MMIO regions. Connect them to the platform bus if available.
192
+ * Look for unassigned IRQ lines as well as unassociated MMIO regions.
193
+ * Connect them to the platform bus if available.
194
*/
195
-static void link_sysbus_device(SysBusDevice *sbdev, void *opaque)
196
+void platform_bus_link_device(PlatformBusDevice *pbus, SysBusDevice *sbdev)
197
{
198
- PlatformBusDevice *pbus = opaque;
199
int i;
200
201
for (i = 0; sysbus_has_irq(sbdev, i); i++) {
202
@@ -XXX,XX +XXX,XX @@ static void link_sysbus_device(SysBusDevice *sbdev, void *opaque)
203
}
204
}
205
206
-static void platform_bus_init_notify(Notifier *notifier, void *data)
207
-{
36
-{
208
- PlatformBusDevice *pb = container_of(notifier, PlatformBusDevice, notifier);
37
- if (arm_is_secure_below_el3(env)) {
209
-
38
- /* Access ok in secure mode. */
210
- /*
39
- return CP_ACCESS_OK;
211
- * Generate a bitmap of used IRQ lines, as the user might have specified
40
- }
212
- * them on the command line.
41
- return access_lor_ns(env);
213
- */
214
- plaform_bus_refresh_irqs(pb);
215
-
216
- foreach_dynamic_sysbus_device(link_sysbus_device, pb);
217
-}
42
-}
218
-
43
-
219
static void platform_bus_realize(DeviceState *dev, Error **errp)
44
static CPAccessResult access_lor_other(CPUARMState *env,
45
const ARMCPRegInfo *ri, bool isread)
220
{
46
{
221
PlatformBusDevice *pbus;
47
@@ -XXX,XX +XXX,XX @@ static CPAccessResult access_lor_other(CPUARMState *env,
222
@@ -XXX,XX +XXX,XX @@ static void platform_bus_realize(DeviceState *dev, Error **errp)
48
/* Access denied in secure mode. */
223
sysbus_init_irq(d, &pbus->irqs[i]);
49
return CP_ACCESS_TRAP;
224
}
50
}
225
51
- return access_lor_ns(env);
226
- /*
52
+ return access_lor_ns(env, ri, isread);
227
- * Register notifier that allows us to gather dangling devices once the
228
- * machine is completely assembled
229
- */
230
- pbus->notifier.notify = platform_bus_init_notify;
231
- qemu_add_machine_init_done_notifier(&pbus->notifier);
232
+ /* some devices might be initialized before so update used IRQs map */
233
+ plaform_bus_refresh_irqs(pbus);
234
}
53
}
235
54
236
static Property platform_bus_properties[] = {
55
/*
237
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
56
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo lor_reginfo[] = {
238
index XXXXXXX..XXXXXXX 100644
57
.type = ARM_CP_CONST, .resetvalue = 0 },
239
--- a/hw/ppc/e500.c
58
{ .name = "LORID_EL1", .state = ARM_CP_STATE_AA64,
240
+++ b/hw/ppc/e500.c
59
.opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7,
241
@@ -XXX,XX +XXX,XX @@ static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
60
- .access = PL1_R, .accessfn = access_lorid,
242
}
61
+ .access = PL1_R, .accessfn = access_lor_ns,
243
}
62
.type = ARM_CP_CONST, .resetvalue = 0 },
244
63
REGINFO_SENTINEL
245
-static void platform_bus_create_devtree(const PPCE500MachineClass *pmc,
246
+static void platform_bus_create_devtree(PPCE500MachineState *pms,
247
void *fdt, const char *mpic)
248
{
249
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
250
gchar *node = g_strdup_printf("/platform@%"PRIx64, pmc->platform_bus_base);
251
const char platcomp[] = "qemu,platform\0simple-bus";
252
uint64_t addr = pmc->platform_bus_base;
253
uint64_t size = pmc->platform_bus_size;
254
int irq_start = pmc->platform_bus_first_irq;
255
- PlatformBusDevice *pbus;
256
- DeviceState *dev;
257
258
/* Create a /platform node that we can put all devices into */
259
260
@@ -XXX,XX +XXX,XX @@ static void platform_bus_create_devtree(const PPCE500MachineClass *pmc,
261
262
qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
263
264
- dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE);
265
- pbus = PLATFORM_BUS_DEVICE(dev);
266
+ /* Create dt nodes for dynamic devices */
267
+ PlatformDevtreeData data = {
268
+ .fdt = fdt,
269
+ .mpic = mpic,
270
+ .irq_start = irq_start,
271
+ .node = node,
272
+ .pbus = pms->pbus_dev,
273
+ };
274
275
- /* We can only create dt nodes for dynamic devices when they're ready */
276
- if (pbus->done_gathering) {
277
- PlatformDevtreeData data = {
278
- .fdt = fdt,
279
- .mpic = mpic,
280
- .irq_start = irq_start,
281
- .node = node,
282
- .pbus = pbus,
283
- };
284
-
285
- /* Loop through all dynamic sysbus devices and create nodes for them */
286
- foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data);
287
- }
288
+ /* Loop through all dynamic sysbus devices and create nodes for them */
289
+ foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data);
290
291
g_free(node);
292
}
293
@@ -XXX,XX +XXX,XX @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
294
}
295
g_free(soc);
296
297
- if (pmc->has_platform_bus) {
298
- platform_bus_create_devtree(pmc, fdt, mpic);
299
+ if (pms->pbus_dev) {
300
+ platform_bus_create_devtree(pms, fdt, mpic);
301
}
302
g_free(mpic);
303
304
@@ -XXX,XX +XXX,XX @@ void ppce500_init(MachineState *machine)
305
qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs);
306
qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size);
307
qdev_init_nofail(dev);
308
- s = SYS_BUS_DEVICE(dev);
309
+ pms->pbus_dev = PLATFORM_BUS_DEVICE(dev);
310
311
+ s = SYS_BUS_DEVICE(pms->pbus_dev);
312
for (i = 0; i < pmc->platform_bus_num_irqs; i++) {
313
int irqn = pmc->platform_bus_first_irq + i;
314
sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, irqn));
315
@@ -XXX,XX +XXX,XX @@ static const TypeInfo ppce500_info = {
316
.name = TYPE_PPCE500_MACHINE,
317
.parent = TYPE_MACHINE,
318
.abstract = true,
319
+ .instance_size = sizeof(PPCE500MachineState),
320
.class_size = sizeof(PPCE500MachineClass),
321
};
64
};
322
323
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
324
index XXXXXXX..XXXXXXX 100644
325
--- a/hw/ppc/e500plat.c
326
+++ b/hw/ppc/e500plat.c
327
@@ -XXX,XX +XXX,XX @@ static void e500plat_init(MachineState *machine)
328
ppce500_init(machine);
329
}
330
331
+static void e500plat_machine_device_plug_cb(HotplugHandler *hotplug_dev,
332
+ DeviceState *dev, Error **errp)
333
+{
334
+ PPCE500MachineState *pms = PPCE500_MACHINE(hotplug_dev);
335
+
336
+ if (pms->pbus_dev) {
337
+ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
338
+ platform_bus_link_device(pms->pbus_dev, SYS_BUS_DEVICE(dev));
339
+ }
340
+ }
341
+}
342
+
343
+static
344
+HotplugHandler *e500plat_machine_get_hotpug_handler(MachineState *machine,
345
+ DeviceState *dev)
346
+{
347
+ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
348
+ return HOTPLUG_HANDLER(machine);
349
+ }
350
+
351
+ return NULL;
352
+}
353
+
354
#define TYPE_E500PLAT_MACHINE MACHINE_TYPE_NAME("ppce500")
355
356
static void e500plat_machine_class_init(ObjectClass *oc, void *data)
357
{
358
PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc);
359
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
360
MachineClass *mc = MACHINE_CLASS(oc);
361
362
+ mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler;
363
+ hc->plug = e500plat_machine_device_plug_cb;
364
+
365
pmc->pci_first_slot = 0x1;
366
pmc->pci_nr_slots = PCI_SLOT_MAX - 1;
367
pmc->fixup_devtree = e500plat_fixup_devtree;
368
@@ -XXX,XX +XXX,XX @@ static const TypeInfo e500plat_info = {
369
.name = TYPE_E500PLAT_MACHINE,
370
.parent = TYPE_PPCE500_MACHINE,
371
.class_init = e500plat_machine_class_init,
372
+ .interfaces = (InterfaceInfo[]) {
373
+ { TYPE_HOTPLUG_HANDLER },
374
+ { }
375
+ }
376
};
377
378
static void e500plat_register_types(void)
379
--
65
--
380
2.17.0
66
2.20.1
381
67
382
68
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
If we're using the capstone disassembler, disassembly of a run of
2
instructions more than 32 bytes long disassembles the wrong data for
3
instructions beyond the 32 byte mark:
2
4
3
Use write_fp_dreg and clear_vec_high to zero the bits
5
(qemu) xp /16x 0x100
4
that need zeroing for these cases.
6
0000000000000100: 0x00000005 0x54410001 0x00000001 0x00001000
7
0000000000000110: 0x00000000 0x00000004 0x54410002 0x3c000000
8
0000000000000120: 0x00000000 0x00000004 0x54410009 0x74736574
9
0000000000000130: 0x00000000 0x00000000 0x00000000 0x00000000
10
(qemu) xp /16i 0x100
11
0x00000100: 00000005 andeq r0, r0, r5
12
0x00000104: 54410001 strbpl r0, [r1], #-1
13
0x00000108: 00000001 andeq r0, r0, r1
14
0x0000010c: 00001000 andeq r1, r0, r0
15
0x00000110: 00000000 andeq r0, r0, r0
16
0x00000114: 00000004 andeq r0, r0, r4
17
0x00000118: 54410002 strbpl r0, [r1], #-2
18
0x0000011c: 3c000000 .byte 0x00, 0x00, 0x00, 0x3c
19
0x00000120: 54410001 strbpl r0, [r1], #-1
20
0x00000124: 00000001 andeq r0, r0, r1
21
0x00000128: 00001000 andeq r1, r0, r0
22
0x0000012c: 00000000 andeq r0, r0, r0
23
0x00000130: 00000004 andeq r0, r0, r4
24
0x00000134: 54410002 strbpl r0, [r1], #-2
25
0x00000138: 3c000000 .byte 0x00, 0x00, 0x00, 0x3c
26
0x0000013c: 00000000 andeq r0, r0, r0
27
28
Here the disassembly of 0x120..0x13f is using the data that is in
29
0x104..0x123.
30
31
This is caused by passing the wrong value to the read_memory_func().
32
The intention is that at this point in the loop the 'cap_buf' buffer
33
already contains 'csize' bytes of data for the instruction at guest
34
addr 'pc', and we want to read in an extra 'tsize' bytes. Those
35
extra bytes are therefore at 'pc + csize', not 'pc'. On the first
36
time through the loop 'csize' happens to be zero, so the initial read
37
of 32 bytes into cap_buf is correct and as long as the disassembly
38
never needs to read more data we return the correct information.
39
40
Use the correct guest address in the call to read_memory_func().
5
41
6
Cc: qemu-stable@nongnu.org
42
Cc: qemu-stable@nongnu.org
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
43
Fixes: https://bugs.launchpad.net/qemu/+bug/1900779
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20180502221552.3873-5-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
44
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
45
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
46
Message-id: 20201022132445.25039-1-peter.maydell@linaro.org
11
---
47
---
12
target/arm/translate-a64.c | 17 +++++------------
48
disas/capstone.c | 2 +-
13
1 file changed, 5 insertions(+), 12 deletions(-)
49
1 file changed, 1 insertion(+), 1 deletion(-)
14
50
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
51
diff --git a/disas/capstone.c b/disas/capstone.c
16
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
53
--- a/disas/capstone.c
18
+++ b/target/arm/translate-a64.c
54
+++ b/disas/capstone.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
55
@@ -XXX,XX +XXX,XX @@ bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
20
56
21
if (itof) {
57
/* Make certain that we can make progress. */
22
TCGv_i64 tcg_rn = cpu_reg(s, rn);
58
assert(tsize != 0);
23
+ TCGv_i64 tmp;
59
- info->read_memory_func(pc, cap_buf + csize, tsize, info);
24
60
+ info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
25
switch (type) {
61
csize += tsize;
26
case 0:
62
27
- {
63
if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
28
/* 32 bit */
29
- TCGv_i64 tmp = tcg_temp_new_i64();
30
+ tmp = tcg_temp_new_i64();
31
tcg_gen_ext32u_i64(tmp, tcg_rn);
32
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
33
- tcg_gen_movi_i64(tmp, 0);
34
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
35
+ write_fp_dreg(s, rd, tmp);
36
tcg_temp_free_i64(tmp);
37
break;
38
- }
39
case 1:
40
- {
41
/* 64 bit */
42
- TCGv_i64 tmp = tcg_const_i64(0);
43
- tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
44
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
45
- tcg_temp_free_i64(tmp);
46
+ write_fp_dreg(s, rd, tcg_rn);
47
break;
48
- }
49
case 2:
50
/* 64 bit to top half. */
51
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
52
+ clear_vec_high(s, true, rd);
53
break;
54
}
55
} else {
56
--
64
--
57
2.17.0
65
2.20.1
58
66
59
67
diff view generated by jsdifflib
1
From: Igor Mammedov <imammedo@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
By default MachineClass::get_hotplug_handler is NULL and concrete board
3
Use the BIT_ULL() macro to ensure we use 64-bit arithmetic.
4
should set it to it's own handler.
4
This fixes the following Coverity issue (OVERFLOW_BEFORE_WIDEN):
5
Considering there isn't any default handler, drop saving empty
6
MachineClass::get_hotplug_handler in child class and make PC code
7
consistent with spapr/s390x boards.
8
5
9
We can bring this back when actual usecase surfaces and do it
6
CID 1432363 (#1 of 1): Unintentional integer overflow:
10
consistently across boards that use get_hotplug_handler().
11
7
12
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
8
overflow_before_widen:
13
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
9
Potentially overflowing expression 1 << scale with type int
14
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
(32 bits, signed) is evaluated using 32-bit arithmetic, and
15
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
11
then used in a context that expects an expression of type
16
Message-id: 1525691524-32265-2-git-send-email-imammedo@redhat.com
12
hwaddr (64 bits, unsigned).
13
14
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Acked-by: Eric Auger <eric.auger@redhat.com>
16
Message-id: 20201030144617.1535064-1-philmd@redhat.com
17
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
19
---
19
include/hw/i386/pc.h | 8 --------
20
hw/arm/smmuv3.c | 3 ++-
20
hw/i386/pc.c | 6 +-----
21
1 file changed, 2 insertions(+), 1 deletion(-)
21
2 files changed, 1 insertion(+), 13 deletions(-)
22
22
23
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
23
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
24
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/hw/i386/pc.h
25
--- a/hw/arm/smmuv3.c
26
+++ b/include/hw/i386/pc.h
26
+++ b/hw/arm/smmuv3.c
27
@@ -XXX,XX +XXX,XX @@ struct PCMachineState {
27
@@ -XXX,XX +XXX,XX @@
28
/**
28
*/
29
* PCMachineClass:
29
30
*
30
#include "qemu/osdep.h"
31
- * Methods:
31
+#include "qemu/bitops.h"
32
- *
32
#include "hw/irq.h"
33
- * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
33
#include "hw/sysbus.h"
34
- *
34
#include "migration/vmstate.h"
35
* Compat fields:
35
@@ -XXX,XX +XXX,XX @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
36
*
36
scale = CMD_SCALE(cmd);
37
* @enforce_aligned_dimm: check that DIMM's address/size is aligned by
37
num = CMD_NUM(cmd);
38
@@ -XXX,XX +XXX,XX @@ struct PCMachineClass {
38
ttl = CMD_TTL(cmd);
39
39
- num_pages = (num + 1) * (1 << (scale));
40
/*< public >*/
40
+ num_pages = (num + 1) * BIT_ULL(scale);
41
42
- /* Methods: */
43
- HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
44
- DeviceState *dev);
45
-
46
/* Device configuration: */
47
bool pci_enabled;
48
bool kvmclock_enabled;
49
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/hw/i386/pc.c
52
+++ b/hw/i386/pc.c
53
@@ -XXX,XX +XXX,XX @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
54
static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
55
DeviceState *dev)
56
{
57
- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
58
-
59
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
60
object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
61
return HOTPLUG_HANDLER(machine);
62
}
41
}
63
42
64
- return pcmc->get_hotplug_handler ?
43
if (type == SMMU_CMD_TLBI_NH_VA) {
65
- pcmc->get_hotplug_handler(machine, dev) : NULL;
66
+ return NULL;
67
}
68
69
static void
70
@@ -XXX,XX +XXX,XX @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
71
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
72
NMIClass *nc = NMI_CLASS(oc);
73
74
- pcmc->get_hotplug_handler = mc->get_hotplug_handler;
75
pcmc->pci_enabled = true;
76
pcmc->has_acpi_build = true;
77
pcmc->rsdp_in_ram = true;
78
--
44
--
79
2.17.0
45
2.20.1
80
46
81
47
diff view generated by jsdifflib
1
From: Igor Mammedov <imammedo@redhat.com>
1
From: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
2
2
3
load_dtb() depends on arm_load_kernel() to figure out place
3
When booting a CPU with EL3 using the -kernel flag, set up CPTR_EL3 so
4
in RAM where it should be loaded, but it's not required for
4
that SVE will not trap to EL3.
5
arm_load_kernel() to work. Sometimes it's neccesary for
6
devices added with -device/device_add to be enumerated in
7
DTB as well, which's lead to [1] and surrounding commits to
8
add 2 more machine_done notifiers with non obvious ordering
9
to make dynamic sysbus devices initialization happen in
10
the right order.
11
5
12
However instead of moving whole arm_load_kernel() in to
6
Signed-off-by: Rémi Denis-Courmont <remi.denis.courmont@huawei.com>
13
machine_done, it's sufficient to move only load_dtb() into
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
virt_machine_done() notifier and remove ArmLoadKernelNotifier/
8
Message-id: 20201030151541.11976-1-remi@remlab.net
15
/PlatformBusFDTNotifierParams notifiers, which saves us ~90LOC
16
and simplifies code flow quite a bit.
17
Later would allow to consolidate DTB generation within one
18
function for 'mach-virt' board and make it reentrant so it
19
could generate updated DTB in device hotplug secenarios.
20
21
While at it rename load_dtb() to arm_load_dtb() since it's
22
public now.
23
24
Add additional field skip_dtb_autoload to struct arm_boot_info
25
to allow manual DTB load later in mach-virt and to avoid touching
26
all other boards to explicitly call arm_load_dtb().
27
28
1) (ac9d32e hw/arm/boot: arm_load_kernel implemented as a machine init done notifier)
29
30
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
31
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
32
Reviewed-by: Andrew Jones <drjones@redhat.com>
33
Message-id: 1525691524-32265-4-git-send-email-imammedo@redhat.com
34
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
35
---
10
---
36
include/hw/arm/arm.h | 45 +++++++++++++++++------
11
hw/arm/boot.c | 3 +++
37
include/hw/arm/sysbus-fdt.h | 37 ++++---------------
12
1 file changed, 3 insertions(+)
38
hw/arm/boot.c | 72 ++++++++++---------------------------
39
hw/arm/sysbus-fdt.c | 61 +++----------------------------
40
hw/arm/virt.c | 64 ++++++++++++++++-----------------
41
5 files changed, 94 insertions(+), 185 deletions(-)
42
13
43
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/hw/arm/arm.h
46
+++ b/include/hw/arm/arm.h
47
@@ -XXX,XX +XXX,XX @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
48
*/
49
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
50
51
-/*
52
- * struct used as a parameter of the arm_load_kernel machine init
53
- * done notifier
54
- */
55
-typedef struct {
56
- Notifier notifier; /* actual notifier */
57
- ARMCPU *cpu; /* handle to the first cpu object */
58
-} ArmLoadKernelNotifier;
59
-
60
/* arm_boot.c */
61
struct arm_boot_info {
62
uint64_t ram_size;
63
@@ -XXX,XX +XXX,XX @@ struct arm_boot_info {
64
const char *initrd_filename;
65
const char *dtb_filename;
66
hwaddr loader_start;
67
+ hwaddr dtb_start;
68
+ hwaddr dtb_limit;
69
+ /* If set to True, arm_load_kernel() will not load DTB.
70
+ * It allows board to load DTB manually later.
71
+ * (default: False)
72
+ */
73
+ bool skip_dtb_autoload;
74
/* multicore boards that use the default secondary core boot functions
75
* need to put the address of the secondary boot code, the boot reg,
76
* and the GIC address in the next 3 values, respectively. boards that
77
@@ -XXX,XX +XXX,XX @@ struct arm_boot_info {
78
* the user it should implement this hook.
79
*/
80
void (*modify_dtb)(const struct arm_boot_info *info, void *fdt);
81
- /* machine init done notifier executing arm_load_dtb */
82
- ArmLoadKernelNotifier load_kernel_notifier;
83
/* Used internally by arm_boot.c */
84
int is_linux;
85
hwaddr initrd_start;
86
@@ -XXX,XX +XXX,XX @@ struct arm_boot_info {
87
*/
88
void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info);
89
90
+AddressSpace *arm_boot_address_space(ARMCPU *cpu,
91
+ const struct arm_boot_info *info);
92
+
93
+/**
94
+ * arm_load_dtb() - load a device tree binary image into memory
95
+ * @addr: the address to load the image at
96
+ * @binfo: struct describing the boot environment
97
+ * @addr_limit: upper limit of the available memory area at @addr
98
+ * @as: address space to load image to
99
+ *
100
+ * Load a device tree supplied by the machine or by the user with the
101
+ * '-dtb' command line option, and put it at offset @addr in target
102
+ * memory.
103
+ *
104
+ * If @addr_limit contains a meaningful value (i.e., it is strictly greater
105
+ * than @addr), the device tree is only loaded if its size does not exceed
106
+ * the limit.
107
+ *
108
+ * Returns: the size of the device tree image on success,
109
+ * 0 if the image size exceeds the limit,
110
+ * -1 on errors.
111
+ *
112
+ * Note: Must not be called unless have_dtb(binfo) is true.
113
+ */
114
+int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
115
+ hwaddr addr_limit, AddressSpace *as);
116
+
117
/* Write a secure board setup routine with a dummy handler for SMCs */
118
void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu,
119
const struct arm_boot_info *info,
120
diff --git a/include/hw/arm/sysbus-fdt.h b/include/hw/arm/sysbus-fdt.h
121
index XXXXXXX..XXXXXXX 100644
122
--- a/include/hw/arm/sysbus-fdt.h
123
+++ b/include/hw/arm/sysbus-fdt.h
124
@@ -XXX,XX +XXX,XX @@
125
#ifndef HW_ARM_SYSBUS_FDT_H
126
#define HW_ARM_SYSBUS_FDT_H
127
128
-#include "hw/arm/arm.h"
129
-#include "qemu-common.h"
130
-#include "hw/sysbus.h"
131
-
132
-/*
133
- * struct that contains dimensioning parameters of the platform bus
134
- */
135
-typedef struct {
136
- hwaddr platform_bus_base; /* start address of the bus */
137
- hwaddr platform_bus_size; /* size of the bus */
138
- int platform_bus_first_irq; /* first hwirq assigned to the bus */
139
- int platform_bus_num_irqs; /* number of hwirq assigned to the bus */
140
-} ARMPlatformBusSystemParams;
141
-
142
-/*
143
- * struct that contains all relevant info to build the fdt nodes of
144
- * platform bus and attached dynamic sysbus devices
145
- * in the future might be augmented with additional info
146
- * such as PHY, CLK handles ...
147
- */
148
-typedef struct {
149
- const ARMPlatformBusSystemParams *system_params;
150
- struct arm_boot_info *binfo;
151
- const char *intc; /* parent interrupt controller name */
152
-} ARMPlatformBusFDTParams;
153
+#include "exec/hwaddr.h"
154
155
/**
156
- * arm_register_platform_bus_fdt_creator - register a machine init done
157
- * notifier that creates the device tree nodes of the platform bus and
158
- * associated dynamic sysbus devices
159
+ * platform_bus_add_all_fdt_nodes - create all the platform bus nodes
160
+ *
161
+ * builds the parent platform bus node and all the nodes of dynamic
162
+ * sysbus devices attached to it.
163
*/
164
-void arm_register_platform_bus_fdt_creator(ARMPlatformBusFDTParams *fdt_params);
165
-
166
+void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
167
+ hwaddr bus_size, int irq_start);
168
#endif
169
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
14
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
170
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
171
--- a/hw/arm/boot.c
16
--- a/hw/arm/boot.c
172
+++ b/hw/arm/boot.c
17
+++ b/hw/arm/boot.c
173
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static void do_cpu_reset(void *opaque)
174
#define ARM64_TEXT_OFFSET_OFFSET 8
19
if (cpu_isar_feature(aa64_mte, cpu)) {
175
#define ARM64_MAGIC_OFFSET 56
20
env->cp15.scr_el3 |= SCR_ATA;
176
21
}
177
-static AddressSpace *arm_boot_address_space(ARMCPU *cpu,
22
+ if (cpu_isar_feature(aa64_sve, cpu)) {
178
- const struct arm_boot_info *info)
23
+ env->cp15.cptr_el[3] |= CPTR_EZ;
179
+AddressSpace *arm_boot_address_space(ARMCPU *cpu,
24
+ }
180
+ const struct arm_boot_info *info)
25
/* AArch64 kernels never boot in secure mode */
181
{
26
assert(!info->secure_boot);
182
/* Return the address space to use for bootloader reads and writes.
27
/* This hook is only supported for AArch32 currently:
183
* We prefer the secure address space if the CPU has it and we're
184
@@ -XXX,XX +XXX,XX @@ static void fdt_add_psci_node(void *fdt)
185
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
186
}
187
188
-/**
189
- * load_dtb() - load a device tree binary image into memory
190
- * @addr: the address to load the image at
191
- * @binfo: struct describing the boot environment
192
- * @addr_limit: upper limit of the available memory area at @addr
193
- * @as: address space to load image to
194
- *
195
- * Load a device tree supplied by the machine or by the user with the
196
- * '-dtb' command line option, and put it at offset @addr in target
197
- * memory.
198
- *
199
- * If @addr_limit contains a meaningful value (i.e., it is strictly greater
200
- * than @addr), the device tree is only loaded if its size does not exceed
201
- * the limit.
202
- *
203
- * Returns: the size of the device tree image on success,
204
- * 0 if the image size exceeds the limit,
205
- * -1 on errors.
206
- *
207
- * Note: Must not be called unless have_dtb(binfo) is true.
208
- */
209
-static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
210
- hwaddr addr_limit, AddressSpace *as)
211
+int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
212
+ hwaddr addr_limit, AddressSpace *as)
213
{
214
void *fdt = NULL;
215
int size, rc;
216
@@ -XXX,XX +XXX,XX @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
217
return size;
218
}
219
220
-static void arm_load_kernel_notify(Notifier *notifier, void *data)
221
+void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
222
{
223
CPUState *cs;
224
int kernel_size;
225
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
226
int elf_machine;
227
hwaddr entry;
228
static const ARMInsnFixup *primary_loader;
229
- ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier,
230
- notifier, notifier);
231
- ARMCPU *cpu = n->cpu;
232
- struct arm_boot_info *info =
233
- container_of(n, struct arm_boot_info, load_kernel_notifier);
234
AddressSpace *as = arm_boot_address_space(cpu, info);
235
236
/* The board code is not supposed to set secure_board_setup unless
237
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
238
assert(!(info->secure_board_setup && kvm_enabled()));
239
240
info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
241
+ info->dtb_limit = 0;
242
243
/* Load the kernel. */
244
if (!info->kernel_filename || info->firmware_loaded) {
245
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
246
* the kernel is supposed to be loaded by the bootloader), copy the
247
* DTB to the base of RAM for the bootloader to pick up.
248
*/
249
- if (load_dtb(info->loader_start, info, 0, as) < 0) {
250
- exit(1);
251
- }
252
+ info->dtb_start = info->loader_start;
253
}
254
255
if (info->kernel_filename) {
256
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
257
*/
258
if (elf_low_addr > info->loader_start
259
|| elf_high_addr < info->loader_start) {
260
- /* Pass elf_low_addr as address limit to load_dtb if it may be
261
+ /* Set elf_low_addr as address limit for arm_load_dtb if it may be
262
* pointing into RAM, otherwise pass '0' (no limit)
263
*/
264
if (elf_low_addr < info->loader_start) {
265
elf_low_addr = 0;
266
}
267
- if (load_dtb(info->loader_start, info, elf_low_addr, as) < 0) {
268
- exit(1);
269
- }
270
+ info->dtb_start = info->loader_start;
271
+ info->dtb_limit = elf_low_addr;
272
}
273
}
274
entry = elf_entry;
275
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
276
*/
277
if (have_dtb(info)) {
278
hwaddr align;
279
- hwaddr dtb_start;
280
281
if (elf_machine == EM_AARCH64) {
282
/*
283
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
284
}
285
286
/* Place the DTB after the initrd in memory with alignment. */
287
- dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size, align);
288
- if (load_dtb(dtb_start, info, 0, as) < 0) {
289
- exit(1);
290
- }
291
- fixupcontext[FIXUP_ARGPTR] = dtb_start;
292
+ info->dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size,
293
+ align);
294
+ fixupcontext[FIXUP_ARGPTR] = info->dtb_start;
295
} else {
296
fixupcontext[FIXUP_ARGPTR] = info->loader_start + KERNEL_ARGS_ADDR;
297
if (info->ram_size >= (1ULL << 32)) {
298
@@ -XXX,XX +XXX,XX @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
299
for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
300
ARM_CPU(cs)->env.boot_info = info;
301
}
302
-}
303
-
304
-void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
305
-{
306
- CPUState *cs;
307
-
308
- info->load_kernel_notifier.cpu = cpu;
309
- info->load_kernel_notifier.notifier.notify = arm_load_kernel_notify;
310
- qemu_add_machine_init_done_notifier(&info->load_kernel_notifier.notifier);
311
312
/* CPU objects (unlike devices) are not automatically reset on system
313
* reset, so we must always register a handler to do so. If we're
314
@@ -XXX,XX +XXX,XX @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
315
for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
316
qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
317
}
318
+
319
+ if (!info->skip_dtb_autoload && have_dtb(info)) {
320
+ if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) {
321
+ exit(1);
322
+ }
323
+ }
324
}
325
326
static const TypeInfo arm_linux_boot_if_info = {
327
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/hw/arm/sysbus-fdt.c
330
+++ b/hw/arm/sysbus-fdt.c
331
@@ -XXX,XX +XXX,XX @@ typedef struct PlatformBusFDTData {
332
PlatformBusDevice *pbus;
333
} PlatformBusFDTData;
334
335
-/*
336
- * struct used when calling the machine init done notifier
337
- * that constructs the fdt nodes of platform bus devices
338
- */
339
-typedef struct PlatformBusFDTNotifierParams {
340
- Notifier notifier;
341
- ARMPlatformBusFDTParams *fdt_params;
342
-} PlatformBusFDTNotifierParams;
343
-
344
/* struct that associates a device type name and a node creation function */
345
typedef struct NodeCreationPair {
346
const char *typename;
347
@@ -XXX,XX +XXX,XX @@ static void add_fdt_node(SysBusDevice *sbdev, void *opaque)
348
exit(1);
349
}
350
351
-/**
352
- * add_all_platform_bus_fdt_nodes - create all the platform bus nodes
353
- *
354
- * builds the parent platform bus node and all the nodes of dynamic
355
- * sysbus devices attached to it.
356
- */
357
-static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params)
358
+void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
359
+ hwaddr bus_size, int irq_start)
360
{
361
const char platcomp[] = "qemu,platform\0simple-bus";
362
PlatformBusDevice *pbus;
363
DeviceState *dev;
364
gchar *node;
365
- uint64_t addr, size;
366
- int irq_start, dtb_size;
367
- struct arm_boot_info *info = fdt_params->binfo;
368
- const ARMPlatformBusSystemParams *params = fdt_params->system_params;
369
- const char *intc = fdt_params->intc;
370
- void *fdt = info->get_dtb(info, &dtb_size);
371
-
372
- /*
373
- * If the user provided a dtb, we assume the dynamic sysbus nodes
374
- * already are integrated there. This corresponds to a use case where
375
- * the dynamic sysbus nodes are complex and their generation is not yet
376
- * supported. In that case the user can take charge of the guest dt
377
- * while qemu takes charge of the qom stuff.
378
- */
379
- if (info->dtb_filename) {
380
- return;
381
- }
382
383
assert(fdt);
384
385
- node = g_strdup_printf("/platform@%"PRIx64, params->platform_bus_base);
386
- addr = params->platform_bus_base;
387
- size = params->platform_bus_size;
388
- irq_start = params->platform_bus_first_irq;
389
+ node = g_strdup_printf("/platform@%"PRIx64, addr);
390
391
/* Create a /platform node that we can put all devices into */
392
qemu_fdt_add_subnode(fdt, node);
393
@@ -XXX,XX +XXX,XX @@ static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params)
394
*/
395
qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
396
qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
397
- qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
398
+ qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size);
399
400
qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
401
402
@@ -XXX,XX +XXX,XX @@ static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params)
403
404
g_free(node);
405
}
406
-
407
-static void platform_bus_fdt_notify(Notifier *notifier, void *data)
408
-{
409
- PlatformBusFDTNotifierParams *p = DO_UPCAST(PlatformBusFDTNotifierParams,
410
- notifier, notifier);
411
-
412
- add_all_platform_bus_fdt_nodes(p->fdt_params);
413
- g_free(p->fdt_params);
414
- g_free(p);
415
-}
416
-
417
-void arm_register_platform_bus_fdt_creator(ARMPlatformBusFDTParams *fdt_params)
418
-{
419
- PlatformBusFDTNotifierParams *p = g_new(PlatformBusFDTNotifierParams, 1);
420
-
421
- p->fdt_params = fdt_params;
422
- p->notifier.notify = platform_bus_fdt_notify;
423
- qemu_add_machine_init_done_notifier(&p->notifier);
424
-}
425
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
426
index XXXXXXX..XXXXXXX 100644
427
--- a/hw/arm/virt.c
428
+++ b/hw/arm/virt.c
429
@@ -XXX,XX +XXX,XX @@
430
431
#define PLATFORM_BUS_NUM_IRQS 64
432
433
-static ARMPlatformBusSystemParams platform_bus_params;
434
-
435
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
436
* RAM can go up to the 256GB mark, leaving 256GB of the physical
437
* address space unallocated and free for future use between 256G and 512G.
438
@@ -XXX,XX +XXX,XX @@ static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
439
DeviceState *dev;
440
SysBusDevice *s;
441
int i;
442
- ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
443
MemoryRegion *sysmem = get_system_memory();
444
445
- platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
446
- platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size;
447
- platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS];
448
- platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;
449
-
450
- fdt_params->system_params = &platform_bus_params;
451
- fdt_params->binfo = &vms->bootinfo;
452
- fdt_params->intc = "/intc";
453
- /*
454
- * register a machine init done notifier that creates the device tree
455
- * nodes of the platform bus and its children dynamic sysbus devices
456
- */
457
- arm_register_platform_bus_fdt_creator(fdt_params);
458
-
459
dev = qdev_create(NULL, TYPE_PLATFORM_BUS_DEVICE);
460
dev->id = TYPE_PLATFORM_BUS_DEVICE;
461
- qdev_prop_set_uint32(dev, "num_irqs",
462
- platform_bus_params.platform_bus_num_irqs);
463
- qdev_prop_set_uint32(dev, "mmio_size",
464
- platform_bus_params.platform_bus_size);
465
+ qdev_prop_set_uint32(dev, "num_irqs", PLATFORM_BUS_NUM_IRQS);
466
+ qdev_prop_set_uint32(dev, "mmio_size", vms->memmap[VIRT_PLATFORM_BUS].size);
467
qdev_init_nofail(dev);
468
vms->platform_bus_dev = dev;
469
- s = SYS_BUS_DEVICE(dev);
470
471
- for (i = 0; i < platform_bus_params.platform_bus_num_irqs; i++) {
472
- int irqn = platform_bus_params.platform_bus_first_irq + i;
473
+ s = SYS_BUS_DEVICE(dev);
474
+ for (i = 0; i < PLATFORM_BUS_NUM_IRQS; i++) {
475
+ int irqn = vms->irqmap[VIRT_PLATFORM_BUS] + i;
476
sysbus_connect_irq(s, i, pic[irqn]);
477
}
478
479
memory_region_add_subregion(sysmem,
480
- platform_bus_params.platform_bus_base,
481
+ vms->memmap[VIRT_PLATFORM_BUS].base,
482
sysbus_mmio_get_region(s, 0));
483
}
484
485
@@ -XXX,XX +XXX,XX @@ void virt_machine_done(Notifier *notifier, void *data)
486
{
487
VirtMachineState *vms = container_of(notifier, VirtMachineState,
488
machine_done);
489
+ ARMCPU *cpu = ARM_CPU(first_cpu);
490
+ struct arm_boot_info *info = &vms->bootinfo;
491
+ AddressSpace *as = arm_boot_address_space(cpu, info);
492
+
493
+ /*
494
+ * If the user provided a dtb, we assume the dynamic sysbus nodes
495
+ * already are integrated there. This corresponds to a use case where
496
+ * the dynamic sysbus nodes are complex and their generation is not yet
497
+ * supported. In that case the user can take charge of the guest dt
498
+ * while qemu takes charge of the qom stuff.
499
+ */
500
+ if (info->dtb_filename == NULL) {
501
+ platform_bus_add_all_fdt_nodes(vms->fdt, "/intc",
502
+ vms->memmap[VIRT_PLATFORM_BUS].base,
503
+ vms->memmap[VIRT_PLATFORM_BUS].size,
504
+ vms->irqmap[VIRT_PLATFORM_BUS]);
505
+ }
506
+ if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) {
507
+ exit(1);
508
+ }
509
510
virt_acpi_setup(vms);
511
virt_build_smbios(vms);
512
@@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine)
513
vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
514
rom_set_fw(vms->fw_cfg);
515
516
- vms->machine_done.notify = virt_machine_done;
517
- qemu_add_machine_init_done_notifier(&vms->machine_done);
518
+ create_platform_bus(vms, pic);
519
520
vms->bootinfo.ram_size = machine->ram_size;
521
vms->bootinfo.kernel_filename = machine->kernel_filename;
522
@@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine)
523
vms->bootinfo.board_id = -1;
524
vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
525
vms->bootinfo.get_dtb = machvirt_dtb;
526
+ vms->bootinfo.skip_dtb_autoload = true;
527
vms->bootinfo.firmware_loaded = firmware_loaded;
528
arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
529
530
- /*
531
- * arm_load_kernel machine init done notifier registration must
532
- * happen before the platform_bus_create call. In this latter,
533
- * another notifier is registered which adds platform bus nodes.
534
- * Notifiers are executed in registration reverse order.
535
- */
536
- create_platform_bus(vms, pic);
537
+ vms->machine_done.notify = virt_machine_done;
538
+ qemu_add_machine_init_done_notifier(&vms->machine_done);
539
}
540
541
static bool virt_get_secure(Object *obj, Error **errp)
542
--
28
--
543
2.17.0
29
2.20.1
544
30
545
31
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: AlexChen <alex.chen@huawei.com>
2
2
3
While we have some of the scalar paths for FCVT for fp16,
3
In omap_lcd_interrupts(), the pointer omap_lcd is dereferinced before
4
we failed to decode the fp16 version of these instructions.
4
being check if it is valid, which may lead to NULL pointer dereference.
5
So move the assignment to surface after checking that the omap_lcd is valid
6
and move surface_bits_per_pixel(surface) to after the surface assignment.
5
7
6
Cc: qemu-stable@nongnu.org
8
Reported-by: Euler Robot <euler.robot@huawei.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: AlexChen <alex.chen@huawei.com>
8
Message-id: 20180502221552.3873-3-richard.henderson@linaro.org
10
Message-id: 5F9CDB8A.9000001@huawei.com
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
13
---
12
target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++-----------
14
hw/display/omap_lcdc.c | 10 +++++++---
13
1 file changed, 46 insertions(+), 19 deletions(-)
15
1 file changed, 7 insertions(+), 3 deletions(-)
14
16
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
diff --git a/hw/display/omap_lcdc.c b/hw/display/omap_lcdc.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
19
--- a/hw/display/omap_lcdc.c
18
+++ b/target/arm/translate-a64.c
20
+++ b/hw/display/omap_lcdc.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
21
@@ -XXX,XX +XXX,XX @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s)
20
bool is_q, bool is_u,
22
static void omap_update_display(void *opaque)
21
int immh, int immb, int rn, int rd)
22
{
23
{
23
- bool is_double = extract32(immh, 3, 1);
24
struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque;
24
int immhb = immh << 3 | immb;
25
- DisplaySurface *surface = qemu_console_surface(omap_lcd->con);
25
- int fracbits = (is_double ? 128 : 64) - immhb;
26
+ DisplaySurface *surface;
26
- int pass;
27
draw_line_func draw_line;
27
+ int pass, size, fracbits;
28
int size, height, first, last;
28
TCGv_ptr tcg_fpstatus;
29
int width, linesize, step, bpp, frame_offset;
29
TCGv_i32 tcg_rmode, tcg_shift;
30
hwaddr frame_base;
30
31
31
- if (!extract32(immh, 2, 2)) {
32
- if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable ||
32
- unallocated_encoding(s);
33
- !surface_bits_per_pixel(surface)) {
33
- return;
34
+ if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) {
34
- }
35
+ return;
35
-
36
+ }
36
- if (!is_scalar && !is_q && is_double) {
37
+
37
+ if (immh & 0x8) {
38
+ surface = qemu_console_surface(omap_lcd->con);
38
+ size = MO_64;
39
+ if (!surface_bits_per_pixel(surface)) {
39
+ if (!is_scalar && !is_q) {
40
+ unallocated_encoding(s);
41
+ return;
42
+ }
43
+ } else if (immh & 0x4) {
44
+ size = MO_32;
45
+ } else if (immh & 0x2) {
46
+ size = MO_16;
47
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
48
+ unallocated_encoding(s);
49
+ return;
50
+ }
51
+ } else {
52
+ /* Should have split out AdvSIMD modified immediate earlier. */
53
+ assert(immh == 1);
54
unallocated_encoding(s);
55
return;
40
return;
56
}
41
}
57
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
42
58
assert(!(is_scalar && is_q));
59
60
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
61
- tcg_fpstatus = get_fpstatus_ptr(false);
62
+ tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
63
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
64
+ fracbits = (16 << size) - immhb;
65
tcg_shift = tcg_const_i32(fracbits);
66
67
- if (is_double) {
68
+ if (size == MO_64) {
69
int maxpass = is_scalar ? 1 : 2;
70
71
for (pass = 0; pass < maxpass; pass++) {
72
@@ -XXX,XX +XXX,XX @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
73
}
74
clear_vec_high(s, is_q, rd);
75
} else {
76
- int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
77
+ void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
78
+ int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
79
+
80
+ switch (size) {
81
+ case MO_16:
82
+ if (is_u) {
83
+ fn = gen_helper_vfp_toulh;
84
+ } else {
85
+ fn = gen_helper_vfp_toslh;
86
+ }
87
+ break;
88
+ case MO_32:
89
+ if (is_u) {
90
+ fn = gen_helper_vfp_touls;
91
+ } else {
92
+ fn = gen_helper_vfp_tosls;
93
+ }
94
+ break;
95
+ default:
96
+ g_assert_not_reached();
97
+ }
98
+
99
for (pass = 0; pass < maxpass; pass++) {
100
TCGv_i32 tcg_op = tcg_temp_new_i32();
101
102
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
103
- if (is_u) {
104
- gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
105
- } else {
106
- gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
107
- }
108
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
109
+ fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
110
if (is_scalar) {
111
write_fp_sreg(s, rd, tcg_op);
112
} else {
113
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
114
+ write_vec_element_i32(s, tcg_op, rd, pass, size);
115
}
116
tcg_temp_free_i32(tcg_op);
117
}
118
--
43
--
119
2.17.0
44
2.20.1
120
45
121
46
diff view generated by jsdifflib
1
From: Igor Mammedov <imammedo@redhat.com>
1
From: AlexChen <alex.chen@huawei.com>
2
2
3
Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
3
In exynos4210_fimd_update(), the pointer s is dereferinced before
4
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
4
being check if it is valid, which may lead to NULL pointer dereference.
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
So move the assignment to global_width after checking that the s is valid.
6
Message-id: 1525691524-32265-5-git-send-email-imammedo@redhat.com
6
7
Reported-by: Euler Robot <euler.robot@huawei.com>
8
Signed-off-by: Alex Chen <alex.chen@huawei.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 5F9F8D88.9030102@huawei.com
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
---
12
---
9
hw/arm/virt.c | 1 +
13
hw/display/exynos4210_fimd.c | 4 +++-
10
hw/i386/pc.c | 1 +
14
1 file changed, 3 insertions(+), 1 deletion(-)
11
hw/ppc/e500plat.c | 1 +
12
hw/ppc/spapr.c | 1 +
13
hw/s390x/s390-virtio-ccw.c | 1 +
14
5 files changed, 5 insertions(+)
15
15
16
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
16
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/arm/virt.c
18
--- a/hw/display/exynos4210_fimd.c
19
+++ b/hw/arm/virt.c
19
+++ b/hw/display/exynos4210_fimd.c
20
@@ -XXX,XX +XXX,XX @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
20
@@ -XXX,XX +XXX,XX @@ static void exynos4210_fimd_update(void *opaque)
21
mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
21
bool blend = false;
22
mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
22
uint8_t *host_fb_addr;
23
mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
23
bool is_dirty = false;
24
+ assert(!mc->get_hotplug_handler);
24
- const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
25
mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
25
+ int global_width;
26
hc->plug = virt_machine_device_plug_cb;
26
27
}
27
if (!s || !s->console || !s->enabled ||
28
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
28
surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) {
29
index XXXXXXX..XXXXXXX 100644
29
return;
30
--- a/hw/i386/pc.c
30
}
31
+++ b/hw/i386/pc.c
31
+
32
@@ -XXX,XX +XXX,XX @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
32
+ global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
33
pcmc->acpi_data_size = 0x20000 + 0x8000;
33
exynos4210_update_resolution(s);
34
pcmc->save_tsc_khz = true;
34
surface = qemu_console_surface(s->console);
35
pcmc->linuxboot_dma_enabled = true;
35
36
+ assert(!mc->get_hotplug_handler);
37
mc->get_hotplug_handler = pc_get_hotpug_handler;
38
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
39
mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
40
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/ppc/e500plat.c
43
+++ b/hw/ppc/e500plat.c
44
@@ -XXX,XX +XXX,XX @@ static void e500plat_machine_class_init(ObjectClass *oc, void *data)
45
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
46
MachineClass *mc = MACHINE_CLASS(oc);
47
48
+ assert(!mc->get_hotplug_handler);
49
mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler;
50
hc->plug = e500plat_machine_device_plug_cb;
51
52
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/ppc/spapr.c
55
+++ b/hw/ppc/spapr.c
56
@@ -XXX,XX +XXX,XX @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
57
mc->kvm_type = spapr_kvm_type;
58
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
59
mc->pci_allow_0_address = true;
60
+ assert(!mc->get_hotplug_handler);
61
mc->get_hotplug_handler = spapr_get_hotplug_handler;
62
hc->pre_plug = spapr_machine_device_pre_plug;
63
hc->plug = spapr_machine_device_plug;
64
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/hw/s390x/s390-virtio-ccw.c
67
+++ b/hw/s390x/s390-virtio-ccw.c
68
@@ -XXX,XX +XXX,XX @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
69
mc->no_sdcard = 1;
70
mc->max_cpus = S390_MAX_CPUS;
71
mc->has_hotpluggable_cpus = true;
72
+ assert(!mc->get_hotplug_handler);
73
mc->get_hotplug_handler = s390_get_hotplug_handler;
74
mc->cpu_index_to_instance_props = s390_cpu_index_to_props;
75
mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids;
76
--
36
--
77
2.17.0
37
2.20.1
78
38
79
39
diff view generated by jsdifflib
New patch
1
In arm_v7m_mmu_idx_for_secstate() we get the 'priv' level to pass to
2
armv7m_mmu_idx_for_secstate_and_priv() by calling arm_current_el().
3
This is incorrect when the security state being queried is not the
4
current one, because arm_current_el() uses the current security state
5
to determine which of the banked CONTROL.nPRIV bits to look at.
6
The effect was that if (for instance) Secure state was in privileged
7
mode but Non-Secure was not then we would return the wrong MMU index.
1
8
9
The only places where we are using this function in a way that could
10
trigger this bug are for the stack loads during a v8M function-return
11
and for the instruction fetch of a v8M SG insn.
12
13
Fix the bug by expanding out the M-profile version of the
14
arm_current_el() logic inline so it can use the passed in secstate
15
rather than env->v7m.secure.
16
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-id: 20201022164408.13214-1-peter.maydell@linaro.org
20
---
21
target/arm/m_helper.c | 3 ++-
22
1 file changed, 2 insertions(+), 1 deletion(-)
23
24
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/m_helper.c
27
+++ b/target/arm/m_helper.c
28
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
29
/* Return the MMU index for a v7M CPU in the specified security state */
30
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
31
{
32
- bool priv = arm_current_el(env) != 0;
33
+ bool priv = arm_v7m_is_handler_mode(env) ||
34
+ !(env->v7m.control[secstate] & 1);
35
36
return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
37
}
38
--
39
2.20.1
40
41
diff view generated by jsdifflib
New patch
1
On some hosts (eg Ubuntu Bionic) pkg-config returns a set of
2
libraries for gio-2.0 which don't actually work when compiling
3
statically. (Specifically, the returned library string includes
4
-lmount, but not -lblkid which -lmount depends upon, so linking
5
fails due to missing symbols.)
1
6
7
Check that the libraries work, and don't enable gio if they don't,
8
in the same way we do for gnutls.
9
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Message-id: 20200928160402.7961-1-peter.maydell@linaro.org
14
---
15
configure | 10 +++++++++-
16
1 file changed, 9 insertions(+), 1 deletion(-)
17
18
diff --git a/configure b/configure
19
index XXXXXXX..XXXXXXX 100755
20
--- a/configure
21
+++ b/configure
22
@@ -XXX,XX +XXX,XX @@ if test "$static" = yes && test "$mingw32" = yes; then
23
fi
24
25
if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
26
- gio=yes
27
gio_cflags=$($pkg_config --cflags gio-2.0)
28
gio_libs=$($pkg_config --libs gio-2.0)
29
gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
30
if [ ! -x "$gdbus_codegen" ]; then
31
gdbus_codegen=
32
fi
33
+ # Check that the libraries actually work -- Ubuntu 18.04 ships
34
+ # with pkg-config --static --libs data for gio-2.0 that is missing
35
+ # -lblkid and will give a link error.
36
+ write_c_skeleton
37
+ if compile_prog "" "gio_libs" ; then
38
+ gio=yes
39
+ else
40
+ gio=no
41
+ fi
42
else
43
gio=no
44
fi
45
--
46
2.20.1
47
48
diff view generated by jsdifflib
New patch
1
In gicv3_init_cpuif() we copy the ARMCPU gicv3_maintenance_interrupt
2
into the GICv3CPUState struct's maintenance_irq field. This will
3
only work if the board happens to have already wired up the CPU
4
maintenance IRQ before the GIC was realized. Unfortunately this is
5
not the case for the 'virt' board, and so the value that gets copied
6
is NULL (since a qemu_irq is really a pointer to an IRQState struct
7
under the hood). The effect is that the CPU interface code never
8
actually raises the maintenance interrupt line.
1
9
10
Instead, since the GICv3CPUState has a pointer to the CPUState, make
11
the dereference at the point where we want to raise the interrupt, to
12
avoid an implicit requirement on board code to wire things up in a
13
particular order.
14
15
Reported-by: Jose Martins <josemartins90@gmail.com>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Message-id: 20201009153904.28529-1-peter.maydell@linaro.org
18
Reviewed-by: Luc Michel <luc@lmichel.fr>
19
---
20
include/hw/intc/arm_gicv3_common.h | 1 -
21
hw/intc/arm_gicv3_cpuif.c | 5 ++---
22
2 files changed, 2 insertions(+), 4 deletions(-)
23
24
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/hw/intc/arm_gicv3_common.h
27
+++ b/include/hw/intc/arm_gicv3_common.h
28
@@ -XXX,XX +XXX,XX @@ struct GICv3CPUState {
29
qemu_irq parent_fiq;
30
qemu_irq parent_virq;
31
qemu_irq parent_vfiq;
32
- qemu_irq maintenance_irq;
33
34
/* Redistributor */
35
uint32_t level; /* Current IRQ level */
36
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/intc/arm_gicv3_cpuif.c
39
+++ b/hw/intc/arm_gicv3_cpuif.c
40
@@ -XXX,XX +XXX,XX @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
41
int irqlevel = 0;
42
int fiqlevel = 0;
43
int maintlevel = 0;
44
+ ARMCPU *cpu = ARM_CPU(cs->cpu);
45
46
idx = hppvi_index(cs);
47
trace_gicv3_cpuif_virt_update(gicv3_redist_affid(cs), idx);
48
@@ -XXX,XX +XXX,XX @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
49
50
qemu_set_irq(cs->parent_vfiq, fiqlevel);
51
qemu_set_irq(cs->parent_virq, irqlevel);
52
- qemu_set_irq(cs->maintenance_irq, maintlevel);
53
+ qemu_set_irq(cpu->gicv3_maintenance_interrupt, maintlevel);
54
}
55
56
static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
57
@@ -XXX,XX +XXX,XX @@ void gicv3_init_cpuif(GICv3State *s)
58
&& cpu->gic_num_lrs) {
59
int j;
60
61
- cs->maintenance_irq = cpu->gicv3_maintenance_interrupt;
62
-
63
cs->num_list_regs = cpu->gic_num_lrs;
64
cs->vpribits = cpu->gic_vpribits;
65
cs->vprebits = cpu->gic_vprebits;
66
--
67
2.20.1
68
69
diff view generated by jsdifflib
New patch
1
The kerneldoc script currently emits Sphinx markup for a macro with
2
arguments that uses the c:function directive. This is correct for
3
Sphinx versions earlier than Sphinx 3, where c:macro doesn't allow
4
documentation of macros with arguments and c:function is not picky
5
about the syntax of what it is passed. However, in Sphinx 3 the
6
c:macro directive was enhanced to support macros with arguments,
7
and c:function was made more picky about what syntax it accepted.
1
8
9
When kerneldoc is told that it needs to produce output for Sphinx
10
3 or later, make it emit c:function only for functions and c:macro
11
for macros with arguments. We assume that anything with a return
12
type is a function and anything without is a macro.
13
14
This fixes the Sphinx error:
15
16
/home/petmay01/linaro/qemu-from-laptop/qemu/docs/../include/qom/object.h:155:Error in declarator
17
If declarator-id with parameters (e.g., 'void f(int arg)'):
18
Invalid C declaration: Expected identifier in nested name. [error at 25]
19
DECLARE_INSTANCE_CHECKER ( InstanceType, OBJ_NAME, TYPENAME)
20
-------------------------^
21
If parenthesis in noptr-declarator (e.g., 'void (*f(int arg))(double)'):
22
Error in declarator or parameters
23
Invalid C declaration: Expecting "(" in parameters. [error at 39]
24
DECLARE_INSTANCE_CHECKER ( InstanceType, OBJ_NAME, TYPENAME)
25
---------------------------------------^
26
27
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
29
Tested-by: Stefan Hajnoczi <stefanha@redhat.com>
30
Message-id: 20201030174700.7204-2-peter.maydell@linaro.org
31
---
32
scripts/kernel-doc | 18 +++++++++++++++++-
33
1 file changed, 17 insertions(+), 1 deletion(-)
34
35
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
36
index XXXXXXX..XXXXXXX 100755
37
--- a/scripts/kernel-doc
38
+++ b/scripts/kernel-doc
39
@@ -XXX,XX +XXX,XX @@ sub output_function_rst(%) {
40
    output_highlight_rst($args{'purpose'});
41
    $start = "\n\n**Syntax**\n\n ``";
42
} else {
43
-    print ".. c:function:: ";
44
+ if ((split(/\./, $sphinx_version))[0] >= 3) {
45
+ # Sphinx 3 and later distinguish macros and functions and
46
+ # complain if you use c:function with something that's not
47
+ # syntactically valid as a function declaration.
48
+ # We assume that anything with a return type is a function
49
+ # and anything without is a macro.
50
+ if ($args{'functiontype'} ne "") {
51
+ print ".. c:function:: ";
52
+ } else {
53
+ print ".. c:macro:: ";
54
+ }
55
+ } else {
56
+ # Older Sphinx don't support documenting macros that take
57
+ # arguments with c:macro, and don't complain about the use
58
+ # of c:function for this.
59
+ print ".. c:function:: ";
60
+ }
61
}
62
if ($args{'functiontype'} ne "") {
63
    $start .= $args{'functiontype'} . " " . $args{'function'} . " (";
64
--
65
2.20.1
66
67
diff view generated by jsdifflib
New patch
1
Sphinx 3.2 is pickier than earlier versions about the option:: markup,
2
and complains about our usage in qemu-option-trace.rst:
1
3
4
../../docs/qemu-option-trace.rst.inc:4:Malformed option description
5
'[enable=]PATTERN', should look like "opt", "-opt args", "--opt args",
6
"/opt args" or "+opt args"
7
8
In this file, we're really trying to document the different parts of
9
the top-level --trace option, which qemu-nbd.rst and qemu-img.rst
10
have already introduced with an option:: markup. So it's not right
11
to use option:: here anyway. Switch to a different markup
12
(definition lists) which gives about the same formatted output.
13
14
(Unlike option::, this markup doesn't produce index entries; but
15
at the moment we don't do anything much with indexes anyway, and
16
in any case I think it doesn't make much sense to have individual
17
index entries for the sub-parts of the --trace option.)
18
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
21
Tested-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-id: 20201030174700.7204-3-peter.maydell@linaro.org
23
---
24
docs/qemu-option-trace.rst.inc | 6 +++---
25
1 file changed, 3 insertions(+), 3 deletions(-)
26
27
diff --git a/docs/qemu-option-trace.rst.inc b/docs/qemu-option-trace.rst.inc
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/qemu-option-trace.rst.inc
30
+++ b/docs/qemu-option-trace.rst.inc
31
@@ -XXX,XX +XXX,XX @@
32
33
Specify tracing options.
34
35
-.. option:: [enable=]PATTERN
36
+``[enable=]PATTERN``
37
38
Immediately enable events matching *PATTERN*
39
(either event name or a globbing pattern). This option is only
40
@@ -XXX,XX +XXX,XX @@ Specify tracing options.
41
42
Use :option:`-trace help` to print a list of names of trace points.
43
44
-.. option:: events=FILE
45
+``events=FILE``
46
47
Immediately enable events listed in *FILE*.
48
The file must contain one event name (as listed in the ``trace-events-all``
49
@@ -XXX,XX +XXX,XX @@ Specify tracing options.
50
available if QEMU has been compiled with the ``simple``, ``log`` or
51
``ftrace`` tracing backend.
52
53
-.. option:: file=FILE
54
+``file=FILE``
55
56
Log output traces to *FILE*.
57
This option is only available if QEMU has been compiled with
58
--
59
2.20.1
60
61
diff view generated by jsdifflib
1
Coverity (CID1390573) spots that we forgot to free the
1
The randomness tests in the NPCM7xx RNG test fail intermittently
2
gpioname strings in a loop in the iotkit realize function.
2
but fairly frequently. On my machine running the test in a loop:
3
Correct the error.
3
while QTEST_QEMU_BINARY=./qemu-system-aarch64 ./tests/qtest/npcm7xx_rng-test; do true; done
4
4
5
This isn't a significant leak, because this function
5
will fail in less than a minute with an error like:
6
only ever runs once.
6
ERROR:../../tests/qtest/npcm7xx_rng-test.c:256:test_first_byte_runs:
7
assertion failed (calc_runs_p(buf.l, sizeof(buf) * BITS_PER_BYTE) > 0.01): (0.00286205989 > 0.01)
8
9
(Failures have been observed on all 4 of the randomness tests,
10
not just first_byte_runs.)
11
12
It's not clear why these tests are failing like this, but intermittent
13
failures make CI and merge testing awkward, so disable running them
14
unless a developer specifically sets QEMU_TEST_FLAKY_RNG_TESTS when
15
running the test suite, until we work out the cause.
7
16
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Reviewed-by: Peter Xu <peterx@redhat.com>
19
Message-id: 20201102152454.8287-1-peter.maydell@linaro.org
11
Message-id: 20180427110137.19304-1-peter.maydell@linaro.org
20
Reviewed-by: Havard Skinnemoen <hskinnemoen@google.com>
12
---
21
---
13
hw/arm/iotkit.c | 1 +
22
tests/qtest/npcm7xx_rng-test.c | 14 ++++++++++----
14
1 file changed, 1 insertion(+)
23
1 file changed, 10 insertions(+), 4 deletions(-)
15
24
16
diff --git a/hw/arm/iotkit.c b/hw/arm/iotkit.c
25
diff --git a/tests/qtest/npcm7xx_rng-test.c b/tests/qtest/npcm7xx_rng-test.c
17
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/arm/iotkit.c
27
--- a/tests/qtest/npcm7xx_rng-test.c
19
+++ b/hw/arm/iotkit.c
28
+++ b/tests/qtest/npcm7xx_rng-test.c
20
@@ -XXX,XX +XXX,XX @@ static void iotkit_realize(DeviceState *dev, Error **errp)
29
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
21
qdev_get_gpio_in(DEVICE(&s->ppc_irq_orgate), i));
30
22
qdev_connect_gpio_out_named(DEVICE(ppc), "irq", 0,
31
qtest_add_func("npcm7xx_rng/enable_disable", test_enable_disable);
23
qdev_get_gpio_in(devs, 0));
32
qtest_add_func("npcm7xx_rng/rosel", test_rosel);
24
+ g_free(gpioname);
33
- qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
25
}
34
- qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
26
35
- qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
27
iotkit_forward_sec_resp_cfg(s);
36
- qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
37
+ /*
38
+ * These tests fail intermittently; only run them on explicit
39
+ * request until we figure out why.
40
+ */
41
+ if (getenv("QEMU_TEST_FLAKY_RNG_TESTS")) {
42
+ qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
43
+ qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
44
+ qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
45
+ qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
46
+ }
47
48
qtest_start("-machine npcm750-evb");
49
ret = g_test_run();
28
--
50
--
29
2.17.0
51
2.20.1
30
52
31
53
diff view generated by jsdifflib