1
Arm queue -- mostly the first slice of my Musca patches.
1
I don't have anything else queued up at the moment, so this is just
2
Richard's SME patches.
2
3
3
thanks
4
-- PMM
4
-- PMM
5
5
6
The following changes since commit fc3dbb90f2eb069801bfb4cfe9cbc83cf9c5f4a9:
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
7
7
8
Merge remote-tracking branch 'remotes/jnsnow/tags/bitmaps-pull-request' into staging (2019-02-21 13:09:33 +0000)
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
9
9
10
are available in the Git repository at:
10
are available in the Git repository at:
11
11
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20190221
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
13
13
14
for you to fetch changes up to 3733f80308d2a7f23f5e39b039e0547aba6c07f1:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
15
15
16
hw/arm/armsse: Make 0x5... alias region work for per-CPU devices (2019-02-21 18:17:48 +0000)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
17
17
18
----------------------------------------------------------------
18
----------------------------------------------------------------
19
target-arm queue:
19
target-arm:
20
* Model the Arm "Musca" development boards: "musca-a" and "musca-b1"
20
* Implement SME emulation, for both system and linux-user
21
* Implement the ARMv8.3-JSConv extension
22
* v8M MPU should use background region as default, not always
23
* Stop unintentional sign extension in pmu_init
24
21
25
----------------------------------------------------------------
22
----------------------------------------------------------------
26
Aaron Lindsay OS (1):
23
Richard Henderson (45):
27
target/arm: Stop unintentional sign extension in pmu_init
24
target/arm: Handle SME in aarch64_cpu_dump_state
25
target/arm: Add infrastructure for disas_sme
26
target/arm: Trap non-streaming usage when Streaming SVE is active
27
target/arm: Mark ADR as non-streaming
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
28
69
29
Peter Maydell (16):
70
docs/system/arm/emulation.rst | 4 +
30
hw/arm/armsse: Fix memory leak in error-exit path
71
linux-user/aarch64/target_cpu.h | 5 +-
31
target/arm: v8M MPU should use background region as default, not always
72
linux-user/aarch64/target_prctl.h | 62 +-
32
hw/misc/tz-ppc: Support having unused ports in the middle of the range
73
target/arm/cpu.h | 7 +
33
hw/timer/pl031: Allow use as an embedded-struct device
74
target/arm/helper-sme.h | 126 ++++
34
hw/timer/pl031: Convert to using trace events
75
target/arm/helper-sve.h | 4 +
35
hw/char/pl011: Allow use as an embedded-struct device
76
target/arm/helper.h | 18 +
36
hw/char/pl011: Support all interrupt lines
77
target/arm/translate-a64.h | 45 ++
37
hw/char/pl011: Use '0x' prefix when logging hex numbers
78
target/arm/translate.h | 16 +
38
hw/arm/armsse: Document SRAM_ADDR_WIDTH property in header comment
79
target/arm/sme-fa64.decode | 60 ++
39
hw/arm/armsse: Allow boards to specify init-svtor
80
target/arm/sme.decode | 88 +++
40
hw/arm/musca.c: Implement models of the Musca-A and -B1 boards
81
target/arm/sve.decode | 41 +-
41
hw/arm/musca: Add PPCs
82
linux-user/aarch64/cpu_loop.c | 9 +
42
hw/arm/musca: Add MPCs
83
linux-user/aarch64/signal.c | 243 ++++++--
43
hw/arm/musca: Wire up PL031 RTC
84
linux-user/elfload.c | 20 +
44
hw/arm/musca: Wire up PL011 UARTs
85
linux-user/syscall.c | 28 +-
45
hw/arm/armsse: Make 0x5... alias region work for per-CPU devices
86
target/arm/cpu.c | 35 +-
46
87
target/arm/cpu64.c | 11 +
47
Richard Henderson (4):
88
target/arm/helper.c | 56 +-
48
target/arm: Restructure disas_fp_int_conv
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
49
target/arm: Split out vfp_helper.c
90
target/arm/sve_helper.c | 28 +
50
target/arm: Rearrange Floating-point data-processing (2 regs)
91
target/arm/translate-a64.c | 103 +++-
51
target/arm: Implement ARMv8.3-JSConv
92
target/arm/translate-sme.c | 373 ++++++++++++
52
93
target/arm/translate-sve.c | 393 ++++++++++---
53
hw/arm/Makefile.objs | 1 +
94
target/arm/translate-vfp.c | 12 +
54
target/arm/Makefile.objs | 2 +-
95
target/arm/translate.c | 2 +
55
include/hw/arm/armsse.h | 7 +-
96
target/arm/vec_helper.c | 24 +
56
include/hw/char/pl011.h | 34 ++
97
target/arm/meson.build | 3 +
57
include/hw/misc/tz-ppc.h | 8 +-
98
28 files changed, 2821 insertions(+), 135 deletions(-)
58
include/hw/timer/pl031.h | 44 ++
99
create mode 100644 target/arm/sme-fa64.decode
59
target/arm/cpu.h | 10 +
100
create mode 100644 target/arm/sme.decode
60
target/arm/helper.h | 3 +
101
create mode 100644 target/arm/translate-sme.c
61
hw/arm/armsse.c | 44 +-
62
hw/arm/musca.c | 669 ++++++++++++++++++++++
63
hw/char/pl011.c | 81 +--
64
hw/misc/tz-ppc.c | 32 ++
65
hw/timer/pl031.c | 80 ++-
66
target/arm/cpu.c | 1 +
67
target/arm/cpu64.c | 2 +
68
target/arm/helper.c | 1072 +----------------------------------
69
target/arm/translate-a64.c | 120 ++--
70
target/arm/translate.c | 237 ++++----
71
target/arm/vfp_helper.c | 1176 +++++++++++++++++++++++++++++++++++++++
72
MAINTAINERS | 7 +
73
default-configs/arm-softmmu.mak | 1 +
74
hw/timer/trace-events | 6 +
75
22 files changed, 2307 insertions(+), 1330 deletions(-)
76
create mode 100644 include/hw/timer/pl031.h
77
create mode 100644 hw/arm/musca.c
78
create mode 100644 target/arm/vfp_helper.c
79
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/cpu.c | 17 ++++++++++++++++-
11
1 file changed, 16 insertions(+), 1 deletion(-)
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
18
int i;
19
int el = arm_current_el(env);
20
const char *ns_status;
21
+ bool sve;
22
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
24
for (i = 0; i < 32; i++) {
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
26
el,
27
psr & PSTATE_SP ? 'h' : 't');
28
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
31
+ env->svcr,
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
+ }
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
37
}
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
41
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
44
+ sve = sme_exception_el(env, el) == 0;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
49
+ }
50
+
51
+ if (sve) {
52
int j, zcr_len = sve_vqm1_for_el(env, el);
53
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
55
--
56
2.25.1
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Move all of the fp helpers out of helper.c into a new file.
3
This includes the build rules for the decoder, and the
4
This is code movement only. Since helper.c has no copyright
4
new file for translation, but excludes any instructions.
5
header, take the one from cpu.h for the new file.
6
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20190215192302.27855-3-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/Makefile.objs | 2 +-
11
target/arm/translate-a64.h | 1 +
13
target/arm/helper.c | 1062 -------------------------------------
12
target/arm/sme.decode | 20 ++++++++++++++++++++
14
target/arm/vfp_helper.c | 1088 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-a64.c | 7 ++++++-
15
3 files changed, 1089 insertions(+), 1063 deletions(-)
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
16
create mode 100644 target/arm/vfp_helper.c
15
target/arm/meson.build | 2 ++
16
5 files changed, 64 insertions(+), 1 deletion(-)
17
create mode 100644 target/arm/sme.decode
18
create mode 100644 target/arm/translate-sme.c
17
19
18
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
19
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/Makefile.objs
22
--- a/target/arm/translate-a64.h
21
+++ b/target/arm/Makefile.objs
23
+++ b/target/arm/translate-a64.h
22
@@ -XXX,XX +XXX,XX @@ obj-$(call land,$(CONFIG_KVM),$(call lnot,$(TARGET_AARCH64))) += kvm32.o
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
23
obj-$(call land,$(CONFIG_KVM),$(TARGET_AARCH64)) += kvm64.o
24
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
25
obj-y += translate.o op_helper.o helper.o cpu.o
26
-obj-y += neon_helper.o iwmmxt_helper.o vec_helper.o
27
+obj-y += neon_helper.o iwmmxt_helper.o vec_helper.o vfp_helper.o
28
obj-y += gdbstub.o
29
obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
30
obj-$(TARGET_AARCH64) += pauth_helper.o
31
diff --git a/target/arm/helper.c b/target/arm/helper.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/helper.c
34
+++ b/target/arm/helper.c
35
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
36
return (a & mask) | (b & ~mask);
37
}
25
}
38
26
39
-/* VFP support. We follow the convention used for VFP instructions:
27
bool disas_sve(DisasContext *, uint32_t);
40
- Single precision routines have a "s" suffix, double precision a
28
+bool disas_sme(DisasContext *, uint32_t);
41
- "d" suffix. */
29
42
-
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
43
-/* Convert host exception flags to vfp form. */
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
44
-static inline int vfp_exceptbits_from_host(int host_bits)
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
45
-{
46
- int target_bits = 0;
47
-
48
- if (host_bits & float_flag_invalid)
49
- target_bits |= 1;
50
- if (host_bits & float_flag_divbyzero)
51
- target_bits |= 2;
52
- if (host_bits & float_flag_overflow)
53
- target_bits |= 4;
54
- if (host_bits & (float_flag_underflow | float_flag_output_denormal))
55
- target_bits |= 8;
56
- if (host_bits & float_flag_inexact)
57
- target_bits |= 0x10;
58
- if (host_bits & float_flag_input_denormal)
59
- target_bits |= 0x80;
60
- return target_bits;
61
-}
62
-
63
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
64
-{
65
- uint32_t i, fpscr;
66
-
67
- fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
68
- | (env->vfp.vec_len << 16)
69
- | (env->vfp.vec_stride << 20);
70
-
71
- i = get_float_exception_flags(&env->vfp.fp_status);
72
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
73
- /* FZ16 does not generate an input denormal exception. */
74
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
75
- & ~float_flag_input_denormal);
76
- fpscr |= vfp_exceptbits_from_host(i);
77
-
78
- i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
79
- fpscr |= i ? FPCR_QC : 0;
80
-
81
- return fpscr;
82
-}
83
-
84
-uint32_t vfp_get_fpscr(CPUARMState *env)
85
-{
86
- return HELPER(vfp_get_fpscr)(env);
87
-}
88
-
89
-/* Convert vfp exception flags to target form. */
90
-static inline int vfp_exceptbits_to_host(int target_bits)
91
-{
92
- int host_bits = 0;
93
-
94
- if (target_bits & 1)
95
- host_bits |= float_flag_invalid;
96
- if (target_bits & 2)
97
- host_bits |= float_flag_divbyzero;
98
- if (target_bits & 4)
99
- host_bits |= float_flag_overflow;
100
- if (target_bits & 8)
101
- host_bits |= float_flag_underflow;
102
- if (target_bits & 0x10)
103
- host_bits |= float_flag_inexact;
104
- if (target_bits & 0x80)
105
- host_bits |= float_flag_input_denormal;
106
- return host_bits;
107
-}
108
-
109
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
110
-{
111
- int i;
112
- uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
113
-
114
- /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
115
- if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
116
- val &= ~FPCR_FZ16;
117
- }
118
-
119
- /*
120
- * We don't implement trapped exception handling, so the
121
- * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
122
- *
123
- * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
124
- * (which are stored in fp_status), and the other RES0 bits
125
- * in between, then we clear all of the low 16 bits.
126
- */
127
- env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
128
- env->vfp.vec_len = (val >> 16) & 7;
129
- env->vfp.vec_stride = (val >> 20) & 3;
130
-
131
- /*
132
- * The bit we set within fpscr_q is arbitrary; the register as a
133
- * whole being zero/non-zero is what counts.
134
- */
135
- env->vfp.qc[0] = val & FPCR_QC;
136
- env->vfp.qc[1] = 0;
137
- env->vfp.qc[2] = 0;
138
- env->vfp.qc[3] = 0;
139
-
140
- changed ^= val;
141
- if (changed & (3 << 22)) {
142
- i = (val >> 22) & 3;
143
- switch (i) {
144
- case FPROUNDING_TIEEVEN:
145
- i = float_round_nearest_even;
146
- break;
147
- case FPROUNDING_POSINF:
148
- i = float_round_up;
149
- break;
150
- case FPROUNDING_NEGINF:
151
- i = float_round_down;
152
- break;
153
- case FPROUNDING_ZERO:
154
- i = float_round_to_zero;
155
- break;
156
- }
157
- set_float_rounding_mode(i, &env->vfp.fp_status);
158
- set_float_rounding_mode(i, &env->vfp.fp_status_f16);
159
- }
160
- if (changed & FPCR_FZ16) {
161
- bool ftz_enabled = val & FPCR_FZ16;
162
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
163
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
164
- }
165
- if (changed & FPCR_FZ) {
166
- bool ftz_enabled = val & FPCR_FZ;
167
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
168
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
169
- }
170
- if (changed & FPCR_DN) {
171
- bool dnan_enabled = val & FPCR_DN;
172
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
173
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
174
- }
175
-
176
- /* The exception flags are ORed together when we read fpscr so we
177
- * only need to preserve the current state in one of our
178
- * float_status values.
179
- */
180
- i = vfp_exceptbits_to_host(val);
181
- set_float_exception_flags(i, &env->vfp.fp_status);
182
- set_float_exception_flags(0, &env->vfp.fp_status_f16);
183
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
184
-}
185
-
186
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
187
-{
188
- HELPER(vfp_set_fpscr)(env, val);
189
-}
190
-
191
-#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
192
-
193
-#define VFP_BINOP(name) \
194
-float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
195
-{ \
196
- float_status *fpst = fpstp; \
197
- return float32_ ## name(a, b, fpst); \
198
-} \
199
-float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
200
-{ \
201
- float_status *fpst = fpstp; \
202
- return float64_ ## name(a, b, fpst); \
203
-}
204
-VFP_BINOP(add)
205
-VFP_BINOP(sub)
206
-VFP_BINOP(mul)
207
-VFP_BINOP(div)
208
-VFP_BINOP(min)
209
-VFP_BINOP(max)
210
-VFP_BINOP(minnum)
211
-VFP_BINOP(maxnum)
212
-#undef VFP_BINOP
213
-
214
-float32 VFP_HELPER(neg, s)(float32 a)
215
-{
216
- return float32_chs(a);
217
-}
218
-
219
-float64 VFP_HELPER(neg, d)(float64 a)
220
-{
221
- return float64_chs(a);
222
-}
223
-
224
-float32 VFP_HELPER(abs, s)(float32 a)
225
-{
226
- return float32_abs(a);
227
-}
228
-
229
-float64 VFP_HELPER(abs, d)(float64 a)
230
-{
231
- return float64_abs(a);
232
-}
233
-
234
-float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
235
-{
236
- return float32_sqrt(a, &env->vfp.fp_status);
237
-}
238
-
239
-float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
240
-{
241
- return float64_sqrt(a, &env->vfp.fp_status);
242
-}
243
-
244
-static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
245
-{
246
- uint32_t flags;
247
- switch (cmp) {
248
- case float_relation_equal:
249
- flags = 0x6;
250
- break;
251
- case float_relation_less:
252
- flags = 0x8;
253
- break;
254
- case float_relation_greater:
255
- flags = 0x2;
256
- break;
257
- case float_relation_unordered:
258
- flags = 0x3;
259
- break;
260
- default:
261
- g_assert_not_reached();
262
- }
263
- env->vfp.xregs[ARM_VFP_FPSCR] =
264
- deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
265
-}
266
-
267
-/* XXX: check quiet/signaling case */
268
-#define DO_VFP_cmp(p, type) \
269
-void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
270
-{ \
271
- softfloat_to_vfp_compare(env, \
272
- type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
273
-} \
274
-void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
275
-{ \
276
- softfloat_to_vfp_compare(env, \
277
- type ## _compare(a, b, &env->vfp.fp_status)); \
278
-}
279
-DO_VFP_cmp(s, float32)
280
-DO_VFP_cmp(d, float64)
281
-#undef DO_VFP_cmp
282
-
283
-/* Integer to float and float to integer conversions */
284
-
285
-#define CONV_ITOF(name, ftype, fsz, sign) \
286
-ftype HELPER(name)(uint32_t x, void *fpstp) \
287
-{ \
288
- float_status *fpst = fpstp; \
289
- return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
290
-}
291
-
292
-#define CONV_FTOI(name, ftype, fsz, sign, round) \
293
-sign##int32_t HELPER(name)(ftype x, void *fpstp) \
294
-{ \
295
- float_status *fpst = fpstp; \
296
- if (float##fsz##_is_any_nan(x)) { \
297
- float_raise(float_flag_invalid, fpst); \
298
- return 0; \
299
- } \
300
- return float##fsz##_to_##sign##int32##round(x, fpst); \
301
-}
302
-
303
-#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
304
- CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
305
- CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
306
- CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
307
-
308
-FLOAT_CONVS(si, h, uint32_t, 16, )
309
-FLOAT_CONVS(si, s, float32, 32, )
310
-FLOAT_CONVS(si, d, float64, 64, )
311
-FLOAT_CONVS(ui, h, uint32_t, 16, u)
312
-FLOAT_CONVS(ui, s, float32, 32, u)
313
-FLOAT_CONVS(ui, d, float64, 64, u)
314
-
315
-#undef CONV_ITOF
316
-#undef CONV_FTOI
317
-#undef FLOAT_CONVS
318
-
319
-/* floating point conversion */
320
-float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
321
-{
322
- return float32_to_float64(x, &env->vfp.fp_status);
323
-}
324
-
325
-float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
326
-{
327
- return float64_to_float32(x, &env->vfp.fp_status);
328
-}
329
-
330
-/* VFP3 fixed point conversion. */
331
-#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
332
-float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
333
- void *fpstp) \
334
-{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
335
-
336
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff) \
337
-uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
338
- void *fpst) \
339
-{ \
340
- if (unlikely(float##fsz##_is_any_nan(x))) { \
341
- float_raise(float_flag_invalid, fpst); \
342
- return 0; \
343
- } \
344
- return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
345
-}
346
-
347
-#define VFP_CONV_FIX(name, p, fsz, isz, itype) \
348
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
349
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
350
- float_round_to_zero, _round_to_zero) \
351
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
352
- get_float_rounding_mode(fpst), )
353
-
354
-#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \
355
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
356
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
357
- get_float_rounding_mode(fpst), )
358
-
359
-VFP_CONV_FIX(sh, d, 64, 64, int16)
360
-VFP_CONV_FIX(sl, d, 64, 64, int32)
361
-VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
362
-VFP_CONV_FIX(uh, d, 64, 64, uint16)
363
-VFP_CONV_FIX(ul, d, 64, 64, uint32)
364
-VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
365
-VFP_CONV_FIX(sh, s, 32, 32, int16)
366
-VFP_CONV_FIX(sl, s, 32, 32, int32)
367
-VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
368
-VFP_CONV_FIX(uh, s, 32, 32, uint16)
369
-VFP_CONV_FIX(ul, s, 32, 32, uint32)
370
-VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
371
-
372
-#undef VFP_CONV_FIX
373
-#undef VFP_CONV_FIX_FLOAT
374
-#undef VFP_CONV_FLOAT_FIX_ROUND
375
-#undef VFP_CONV_FIX_A64
376
-
377
-uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
378
-{
379
- return int32_to_float16_scalbn(x, -shift, fpst);
380
-}
381
-
382
-uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
383
-{
384
- return uint32_to_float16_scalbn(x, -shift, fpst);
385
-}
386
-
387
-uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
388
-{
389
- return int64_to_float16_scalbn(x, -shift, fpst);
390
-}
391
-
392
-uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
393
-{
394
- return uint64_to_float16_scalbn(x, -shift, fpst);
395
-}
396
-
397
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
398
-{
399
- if (unlikely(float16_is_any_nan(x))) {
400
- float_raise(float_flag_invalid, fpst);
401
- return 0;
402
- }
403
- return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
404
- shift, fpst);
405
-}
406
-
407
-uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
408
-{
409
- if (unlikely(float16_is_any_nan(x))) {
410
- float_raise(float_flag_invalid, fpst);
411
- return 0;
412
- }
413
- return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
414
- shift, fpst);
415
-}
416
-
417
-uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
418
-{
419
- if (unlikely(float16_is_any_nan(x))) {
420
- float_raise(float_flag_invalid, fpst);
421
- return 0;
422
- }
423
- return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
424
- shift, fpst);
425
-}
426
-
427
-uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
428
-{
429
- if (unlikely(float16_is_any_nan(x))) {
430
- float_raise(float_flag_invalid, fpst);
431
- return 0;
432
- }
433
- return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
434
- shift, fpst);
435
-}
436
-
437
-uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
438
-{
439
- if (unlikely(float16_is_any_nan(x))) {
440
- float_raise(float_flag_invalid, fpst);
441
- return 0;
442
- }
443
- return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
444
- shift, fpst);
445
-}
446
-
447
-uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
448
-{
449
- if (unlikely(float16_is_any_nan(x))) {
450
- float_raise(float_flag_invalid, fpst);
451
- return 0;
452
- }
453
- return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
454
- shift, fpst);
455
-}
456
-
457
-/* Set the current fp rounding mode and return the old one.
458
- * The argument is a softfloat float_round_ value.
459
- */
460
-uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
461
-{
462
- float_status *fp_status = fpstp;
463
-
464
- uint32_t prev_rmode = get_float_rounding_mode(fp_status);
465
- set_float_rounding_mode(rmode, fp_status);
466
-
467
- return prev_rmode;
468
-}
469
-
470
-/* Set the current fp rounding mode in the standard fp status and return
471
- * the old one. This is for NEON instructions that need to change the
472
- * rounding mode but wish to use the standard FPSCR values for everything
473
- * else. Always set the rounding mode back to the correct value after
474
- * modifying it.
475
- * The argument is a softfloat float_round_ value.
476
- */
477
-uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
478
-{
479
- float_status *fp_status = &env->vfp.standard_fp_status;
480
-
481
- uint32_t prev_rmode = get_float_rounding_mode(fp_status);
482
- set_float_rounding_mode(rmode, fp_status);
483
-
484
- return prev_rmode;
485
-}
486
-
487
-/* Half precision conversions. */
488
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
489
-{
490
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
491
- * it would affect flushing input denormals.
492
- */
493
- float_status *fpst = fpstp;
494
- flag save = get_flush_inputs_to_zero(fpst);
495
- set_flush_inputs_to_zero(false, fpst);
496
- float32 r = float16_to_float32(a, !ahp_mode, fpst);
497
- set_flush_inputs_to_zero(save, fpst);
498
- return r;
499
-}
500
-
501
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
502
-{
503
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
504
- * it would affect flushing output denormals.
505
- */
506
- float_status *fpst = fpstp;
507
- flag save = get_flush_to_zero(fpst);
508
- set_flush_to_zero(false, fpst);
509
- float16 r = float32_to_float16(a, !ahp_mode, fpst);
510
- set_flush_to_zero(save, fpst);
511
- return r;
512
-}
513
-
514
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
515
-{
516
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
517
- * it would affect flushing input denormals.
518
- */
519
- float_status *fpst = fpstp;
520
- flag save = get_flush_inputs_to_zero(fpst);
521
- set_flush_inputs_to_zero(false, fpst);
522
- float64 r = float16_to_float64(a, !ahp_mode, fpst);
523
- set_flush_inputs_to_zero(save, fpst);
524
- return r;
525
-}
526
-
527
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
528
-{
529
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
530
- * it would affect flushing output denormals.
531
- */
532
- float_status *fpst = fpstp;
533
- flag save = get_flush_to_zero(fpst);
534
- set_flush_to_zero(false, fpst);
535
- float16 r = float64_to_float16(a, !ahp_mode, fpst);
536
- set_flush_to_zero(save, fpst);
537
- return r;
538
-}
539
-
540
-#define float32_two make_float32(0x40000000)
541
-#define float32_three make_float32(0x40400000)
542
-#define float32_one_point_five make_float32(0x3fc00000)
543
-
544
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
545
-{
546
- float_status *s = &env->vfp.standard_fp_status;
547
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
548
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
549
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
550
- float_raise(float_flag_input_denormal, s);
551
- }
552
- return float32_two;
553
- }
554
- return float32_sub(float32_two, float32_mul(a, b, s), s);
555
-}
556
-
557
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
558
-{
559
- float_status *s = &env->vfp.standard_fp_status;
560
- float32 product;
561
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
562
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
563
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
564
- float_raise(float_flag_input_denormal, s);
565
- }
566
- return float32_one_point_five;
567
- }
568
- product = float32_mul(a, b, s);
569
- return float32_div(float32_sub(float32_three, product, s), float32_two, s);
570
-}
571
-
572
-/* NEON helpers. */
573
-
574
-/* Constants 256 and 512 are used in some helpers; we avoid relying on
575
- * int->float conversions at run-time. */
576
-#define float64_256 make_float64(0x4070000000000000LL)
577
-#define float64_512 make_float64(0x4080000000000000LL)
578
-#define float16_maxnorm make_float16(0x7bff)
579
-#define float32_maxnorm make_float32(0x7f7fffff)
580
-#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
581
-
582
-/* Reciprocal functions
583
- *
584
- * The algorithm that must be used to calculate the estimate
585
- * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
586
- */
587
-
588
-/* See RecipEstimate()
589
- *
590
- * input is a 9 bit fixed point number
591
- * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
592
- * result range 256 .. 511 for a number from 1.0 to 511/256.
593
- */
594
-
595
-static int recip_estimate(int input)
596
-{
597
- int a, b, r;
598
- assert(256 <= input && input < 512);
599
- a = (input * 2) + 1;
600
- b = (1 << 19) / a;
601
- r = (b + 1) >> 1;
602
- assert(256 <= r && r < 512);
603
- return r;
604
-}
605
-
606
-/*
607
- * Common wrapper to call recip_estimate
608
- *
609
- * The parameters are exponent and 64 bit fraction (without implicit
610
- * bit) where the binary point is nominally at bit 52. Returns a
611
- * float64 which can then be rounded to the appropriate size by the
612
- * callee.
613
- */
614
-
615
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
616
-{
617
- uint32_t scaled, estimate;
618
- uint64_t result_frac;
619
- int result_exp;
620
-
621
- /* Handle sub-normals */
622
- if (*exp == 0) {
623
- if (extract64(frac, 51, 1) == 0) {
624
- *exp = -1;
625
- frac <<= 2;
626
- } else {
627
- frac <<= 1;
628
- }
629
- }
630
-
631
- /* scaled = UInt('1':fraction<51:44>) */
632
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
633
- estimate = recip_estimate(scaled);
634
-
635
- result_exp = exp_off - *exp;
636
- result_frac = deposit64(0, 44, 8, estimate);
637
- if (result_exp == 0) {
638
- result_frac = deposit64(result_frac >> 1, 51, 1, 1);
639
- } else if (result_exp == -1) {
640
- result_frac = deposit64(result_frac >> 2, 50, 2, 1);
641
- result_exp = 0;
642
- }
643
-
644
- *exp = result_exp;
645
-
646
- return result_frac;
647
-}
648
-
649
-static bool round_to_inf(float_status *fpst, bool sign_bit)
650
-{
651
- switch (fpst->float_rounding_mode) {
652
- case float_round_nearest_even: /* Round to Nearest */
653
- return true;
654
- case float_round_up: /* Round to +Inf */
655
- return !sign_bit;
656
- case float_round_down: /* Round to -Inf */
657
- return sign_bit;
658
- case float_round_to_zero: /* Round to Zero */
659
- return false;
660
- }
661
-
662
- g_assert_not_reached();
663
-}
664
-
665
-uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
666
-{
667
- float_status *fpst = fpstp;
668
- float16 f16 = float16_squash_input_denormal(input, fpst);
669
- uint32_t f16_val = float16_val(f16);
670
- uint32_t f16_sign = float16_is_neg(f16);
671
- int f16_exp = extract32(f16_val, 10, 5);
672
- uint32_t f16_frac = extract32(f16_val, 0, 10);
673
- uint64_t f64_frac;
674
-
675
- if (float16_is_any_nan(f16)) {
676
- float16 nan = f16;
677
- if (float16_is_signaling_nan(f16, fpst)) {
678
- float_raise(float_flag_invalid, fpst);
679
- nan = float16_silence_nan(f16, fpst);
680
- }
681
- if (fpst->default_nan_mode) {
682
- nan = float16_default_nan(fpst);
683
- }
684
- return nan;
685
- } else if (float16_is_infinity(f16)) {
686
- return float16_set_sign(float16_zero, float16_is_neg(f16));
687
- } else if (float16_is_zero(f16)) {
688
- float_raise(float_flag_divbyzero, fpst);
689
- return float16_set_sign(float16_infinity, float16_is_neg(f16));
690
- } else if (float16_abs(f16) < (1 << 8)) {
691
- /* Abs(value) < 2.0^-16 */
692
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
693
- if (round_to_inf(fpst, f16_sign)) {
694
- return float16_set_sign(float16_infinity, f16_sign);
695
- } else {
696
- return float16_set_sign(float16_maxnorm, f16_sign);
697
- }
698
- } else if (f16_exp >= 29 && fpst->flush_to_zero) {
699
- float_raise(float_flag_underflow, fpst);
700
- return float16_set_sign(float16_zero, float16_is_neg(f16));
701
- }
702
-
703
- f64_frac = call_recip_estimate(&f16_exp, 29,
704
- ((uint64_t) f16_frac) << (52 - 10));
705
-
706
- /* result = sign : result_exp<4:0> : fraction<51:42> */
707
- f16_val = deposit32(0, 15, 1, f16_sign);
708
- f16_val = deposit32(f16_val, 10, 5, f16_exp);
709
- f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
710
- return make_float16(f16_val);
711
-}
712
-
713
-float32 HELPER(recpe_f32)(float32 input, void *fpstp)
714
-{
715
- float_status *fpst = fpstp;
716
- float32 f32 = float32_squash_input_denormal(input, fpst);
717
- uint32_t f32_val = float32_val(f32);
718
- bool f32_sign = float32_is_neg(f32);
719
- int f32_exp = extract32(f32_val, 23, 8);
720
- uint32_t f32_frac = extract32(f32_val, 0, 23);
721
- uint64_t f64_frac;
722
-
723
- if (float32_is_any_nan(f32)) {
724
- float32 nan = f32;
725
- if (float32_is_signaling_nan(f32, fpst)) {
726
- float_raise(float_flag_invalid, fpst);
727
- nan = float32_silence_nan(f32, fpst);
728
- }
729
- if (fpst->default_nan_mode) {
730
- nan = float32_default_nan(fpst);
731
- }
732
- return nan;
733
- } else if (float32_is_infinity(f32)) {
734
- return float32_set_sign(float32_zero, float32_is_neg(f32));
735
- } else if (float32_is_zero(f32)) {
736
- float_raise(float_flag_divbyzero, fpst);
737
- return float32_set_sign(float32_infinity, float32_is_neg(f32));
738
- } else if (float32_abs(f32) < (1ULL << 21)) {
739
- /* Abs(value) < 2.0^-128 */
740
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
741
- if (round_to_inf(fpst, f32_sign)) {
742
- return float32_set_sign(float32_infinity, f32_sign);
743
- } else {
744
- return float32_set_sign(float32_maxnorm, f32_sign);
745
- }
746
- } else if (f32_exp >= 253 && fpst->flush_to_zero) {
747
- float_raise(float_flag_underflow, fpst);
748
- return float32_set_sign(float32_zero, float32_is_neg(f32));
749
- }
750
-
751
- f64_frac = call_recip_estimate(&f32_exp, 253,
752
- ((uint64_t) f32_frac) << (52 - 23));
753
-
754
- /* result = sign : result_exp<7:0> : fraction<51:29> */
755
- f32_val = deposit32(0, 31, 1, f32_sign);
756
- f32_val = deposit32(f32_val, 23, 8, f32_exp);
757
- f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
758
- return make_float32(f32_val);
759
-}
760
-
761
-float64 HELPER(recpe_f64)(float64 input, void *fpstp)
762
-{
763
- float_status *fpst = fpstp;
764
- float64 f64 = float64_squash_input_denormal(input, fpst);
765
- uint64_t f64_val = float64_val(f64);
766
- bool f64_sign = float64_is_neg(f64);
767
- int f64_exp = extract64(f64_val, 52, 11);
768
- uint64_t f64_frac = extract64(f64_val, 0, 52);
769
-
770
- /* Deal with any special cases */
771
- if (float64_is_any_nan(f64)) {
772
- float64 nan = f64;
773
- if (float64_is_signaling_nan(f64, fpst)) {
774
- float_raise(float_flag_invalid, fpst);
775
- nan = float64_silence_nan(f64, fpst);
776
- }
777
- if (fpst->default_nan_mode) {
778
- nan = float64_default_nan(fpst);
779
- }
780
- return nan;
781
- } else if (float64_is_infinity(f64)) {
782
- return float64_set_sign(float64_zero, float64_is_neg(f64));
783
- } else if (float64_is_zero(f64)) {
784
- float_raise(float_flag_divbyzero, fpst);
785
- return float64_set_sign(float64_infinity, float64_is_neg(f64));
786
- } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
787
- /* Abs(value) < 2.0^-1024 */
788
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
789
- if (round_to_inf(fpst, f64_sign)) {
790
- return float64_set_sign(float64_infinity, f64_sign);
791
- } else {
792
- return float64_set_sign(float64_maxnorm, f64_sign);
793
- }
794
- } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
795
- float_raise(float_flag_underflow, fpst);
796
- return float64_set_sign(float64_zero, float64_is_neg(f64));
797
- }
798
-
799
- f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
800
-
801
- /* result = sign : result_exp<10:0> : fraction<51:0>; */
802
- f64_val = deposit64(0, 63, 1, f64_sign);
803
- f64_val = deposit64(f64_val, 52, 11, f64_exp);
804
- f64_val = deposit64(f64_val, 0, 52, f64_frac);
805
- return make_float64(f64_val);
806
-}
807
-
808
-/* The algorithm that must be used to calculate the estimate
809
- * is specified by the ARM ARM.
810
- */
811
-
812
-static int do_recip_sqrt_estimate(int a)
813
-{
814
- int b, estimate;
815
-
816
- assert(128 <= a && a < 512);
817
- if (a < 256) {
818
- a = a * 2 + 1;
819
- } else {
820
- a = (a >> 1) << 1;
821
- a = (a + 1) * 2;
822
- }
823
- b = 512;
824
- while (a * (b + 1) * (b + 1) < (1 << 28)) {
825
- b += 1;
826
- }
827
- estimate = (b + 1) / 2;
828
- assert(256 <= estimate && estimate < 512);
829
-
830
- return estimate;
831
-}
832
-
833
-
834
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
835
-{
836
- int estimate;
837
- uint32_t scaled;
838
-
839
- if (*exp == 0) {
840
- while (extract64(frac, 51, 1) == 0) {
841
- frac = frac << 1;
842
- *exp -= 1;
843
- }
844
- frac = extract64(frac, 0, 51) << 1;
845
- }
846
-
847
- if (*exp & 1) {
848
- /* scaled = UInt('01':fraction<51:45>) */
849
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
850
- } else {
851
- /* scaled = UInt('1':fraction<51:44>) */
852
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
853
- }
854
- estimate = do_recip_sqrt_estimate(scaled);
855
-
856
- *exp = (exp_off - *exp) / 2;
857
- return extract64(estimate, 0, 8) << 44;
858
-}
859
-
860
-uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
861
-{
862
- float_status *s = fpstp;
863
- float16 f16 = float16_squash_input_denormal(input, s);
864
- uint16_t val = float16_val(f16);
865
- bool f16_sign = float16_is_neg(f16);
866
- int f16_exp = extract32(val, 10, 5);
867
- uint16_t f16_frac = extract32(val, 0, 10);
868
- uint64_t f64_frac;
869
-
870
- if (float16_is_any_nan(f16)) {
871
- float16 nan = f16;
872
- if (float16_is_signaling_nan(f16, s)) {
873
- float_raise(float_flag_invalid, s);
874
- nan = float16_silence_nan(f16, s);
875
- }
876
- if (s->default_nan_mode) {
877
- nan = float16_default_nan(s);
878
- }
879
- return nan;
880
- } else if (float16_is_zero(f16)) {
881
- float_raise(float_flag_divbyzero, s);
882
- return float16_set_sign(float16_infinity, f16_sign);
883
- } else if (f16_sign) {
884
- float_raise(float_flag_invalid, s);
885
- return float16_default_nan(s);
886
- } else if (float16_is_infinity(f16)) {
887
- return float16_zero;
888
- }
889
-
890
- /* Scale and normalize to a double-precision value between 0.25 and 1.0,
891
- * preserving the parity of the exponent. */
892
-
893
- f64_frac = ((uint64_t) f16_frac) << (52 - 10);
894
-
895
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
896
-
897
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
898
- val = deposit32(0, 15, 1, f16_sign);
899
- val = deposit32(val, 10, 5, f16_exp);
900
- val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
901
- return make_float16(val);
902
-}
903
-
904
-float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
905
-{
906
- float_status *s = fpstp;
907
- float32 f32 = float32_squash_input_denormal(input, s);
908
- uint32_t val = float32_val(f32);
909
- uint32_t f32_sign = float32_is_neg(f32);
910
- int f32_exp = extract32(val, 23, 8);
911
- uint32_t f32_frac = extract32(val, 0, 23);
912
- uint64_t f64_frac;
913
-
914
- if (float32_is_any_nan(f32)) {
915
- float32 nan = f32;
916
- if (float32_is_signaling_nan(f32, s)) {
917
- float_raise(float_flag_invalid, s);
918
- nan = float32_silence_nan(f32, s);
919
- }
920
- if (s->default_nan_mode) {
921
- nan = float32_default_nan(s);
922
- }
923
- return nan;
924
- } else if (float32_is_zero(f32)) {
925
- float_raise(float_flag_divbyzero, s);
926
- return float32_set_sign(float32_infinity, float32_is_neg(f32));
927
- } else if (float32_is_neg(f32)) {
928
- float_raise(float_flag_invalid, s);
929
- return float32_default_nan(s);
930
- } else if (float32_is_infinity(f32)) {
931
- return float32_zero;
932
- }
933
-
934
- /* Scale and normalize to a double-precision value between 0.25 and 1.0,
935
- * preserving the parity of the exponent. */
936
-
937
- f64_frac = ((uint64_t) f32_frac) << 29;
938
-
939
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
940
-
941
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
942
- val = deposit32(0, 31, 1, f32_sign);
943
- val = deposit32(val, 23, 8, f32_exp);
944
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
945
- return make_float32(val);
946
-}
947
-
948
-float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
949
-{
950
- float_status *s = fpstp;
951
- float64 f64 = float64_squash_input_denormal(input, s);
952
- uint64_t val = float64_val(f64);
953
- bool f64_sign = float64_is_neg(f64);
954
- int f64_exp = extract64(val, 52, 11);
955
- uint64_t f64_frac = extract64(val, 0, 52);
956
-
957
- if (float64_is_any_nan(f64)) {
958
- float64 nan = f64;
959
- if (float64_is_signaling_nan(f64, s)) {
960
- float_raise(float_flag_invalid, s);
961
- nan = float64_silence_nan(f64, s);
962
- }
963
- if (s->default_nan_mode) {
964
- nan = float64_default_nan(s);
965
- }
966
- return nan;
967
- } else if (float64_is_zero(f64)) {
968
- float_raise(float_flag_divbyzero, s);
969
- return float64_set_sign(float64_infinity, float64_is_neg(f64));
970
- } else if (float64_is_neg(f64)) {
971
- float_raise(float_flag_invalid, s);
972
- return float64_default_nan(s);
973
- } else if (float64_is_infinity(f64)) {
974
- return float64_zero;
975
- }
976
-
977
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
978
-
979
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
980
- val = deposit64(0, 61, 1, f64_sign);
981
- val = deposit64(val, 52, 11, f64_exp);
982
- val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
983
- return make_float64(val);
984
-}
985
-
986
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
987
-{
988
- /* float_status *s = fpstp; */
989
- int input, estimate;
990
-
991
- if ((a & 0x80000000) == 0) {
992
- return 0xffffffff;
993
- }
994
-
995
- input = extract32(a, 23, 9);
996
- estimate = recip_estimate(input);
997
-
998
- return deposit32(0, (32 - 9), 9, estimate);
999
-}
1000
-
1001
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
1002
-{
1003
- int estimate;
1004
-
1005
- if ((a & 0xc0000000) == 0) {
1006
- return 0xffffffff;
1007
- }
1008
-
1009
- estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
1010
-
1011
- return deposit32(0, 23, 9, estimate);
1012
-}
1013
-
1014
-/* VFPv4 fused multiply-accumulate */
1015
-float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
1016
-{
1017
- float_status *fpst = fpstp;
1018
- return float32_muladd(a, b, c, 0, fpst);
1019
-}
1020
-
1021
-float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
1022
-{
1023
- float_status *fpst = fpstp;
1024
- return float64_muladd(a, b, c, 0, fpst);
1025
-}
1026
-
1027
-/* ARMv8 round to integral */
1028
-float32 HELPER(rints_exact)(float32 x, void *fp_status)
1029
-{
1030
- return float32_round_to_int(x, fp_status);
1031
-}
1032
-
1033
-float64 HELPER(rintd_exact)(float64 x, void *fp_status)
1034
-{
1035
- return float64_round_to_int(x, fp_status);
1036
-}
1037
-
1038
-float32 HELPER(rints)(float32 x, void *fp_status)
1039
-{
1040
- int old_flags = get_float_exception_flags(fp_status), new_flags;
1041
- float32 ret;
1042
-
1043
- ret = float32_round_to_int(x, fp_status);
1044
-
1045
- /* Suppress any inexact exceptions the conversion produced */
1046
- if (!(old_flags & float_flag_inexact)) {
1047
- new_flags = get_float_exception_flags(fp_status);
1048
- set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1049
- }
1050
-
1051
- return ret;
1052
-}
1053
-
1054
-float64 HELPER(rintd)(float64 x, void *fp_status)
1055
-{
1056
- int old_flags = get_float_exception_flags(fp_status), new_flags;
1057
- float64 ret;
1058
-
1059
- ret = float64_round_to_int(x, fp_status);
1060
-
1061
- new_flags = get_float_exception_flags(fp_status);
1062
-
1063
- /* Suppress any inexact exceptions the conversion produced */
1064
- if (!(old_flags & float_flag_inexact)) {
1065
- new_flags = get_float_exception_flags(fp_status);
1066
- set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1067
- }
1068
-
1069
- return ret;
1070
-}
1071
-
1072
-/* Convert ARM rounding mode to softfloat */
1073
-int arm_rmode_to_sf(int rmode)
1074
-{
1075
- switch (rmode) {
1076
- case FPROUNDING_TIEAWAY:
1077
- rmode = float_round_ties_away;
1078
- break;
1079
- case FPROUNDING_ODD:
1080
- /* FIXME: add support for TIEAWAY and ODD */
1081
- qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
1082
- rmode);
1083
- /* fall through for now */
1084
- case FPROUNDING_TIEEVEN:
1085
- default:
1086
- rmode = float_round_nearest_even;
1087
- break;
1088
- case FPROUNDING_POSINF:
1089
- rmode = float_round_up;
1090
- break;
1091
- case FPROUNDING_NEGINF:
1092
- rmode = float_round_down;
1093
- break;
1094
- case FPROUNDING_ZERO:
1095
- rmode = float_round_to_zero;
1096
- break;
1097
- }
1098
- return rmode;
1099
-}
1100
-
1101
/* CRC helpers.
1102
* The upper bytes of val (above the number specified by 'bytes') must have
1103
* been zeroed out by the caller.
1104
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
1105
new file mode 100644
33
new file mode 100644
1106
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
1107
--- /dev/null
35
--- /dev/null
1108
+++ b/target/arm/vfp_helper.c
36
+++ b/target/arm/sme.decode
37
@@ -XXX,XX +XXX,XX @@
38
+# AArch64 SME instruction descriptions
39
+#
40
+# Copyright (c) 2022 Linaro, Ltd
41
+#
42
+# This library is free software; you can redistribute it and/or
43
+# modify it under the terms of the GNU Lesser General Public
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
47
+# This library is distributed in the hope that it will be useful,
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
54
+
55
+#
56
+# This file is processed by scripts/decodetree.py
57
+#
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
63
}
64
65
switch (extract32(insn, 25, 4)) {
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
67
+ case 0x0:
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
69
+ unallocated_encoding(s);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/translate-sme.c
1109
@@ -XXX,XX +XXX,XX @@
81
@@ -XXX,XX +XXX,XX @@
1110
+/*
82
+/*
1111
+ * ARM VFP floating-point operations
83
+ * AArch64 SME translation
1112
+ *
84
+ *
1113
+ * Copyright (c) 2003 Fabrice Bellard
85
+ * Copyright (c) 2022 Linaro, Ltd
1114
+ *
86
+ *
1115
+ * This library is free software; you can redistribute it and/or
87
+ * This library is free software; you can redistribute it and/or
1116
+ * modify it under the terms of the GNU Lesser General Public
88
+ * modify it under the terms of the GNU Lesser General Public
1117
+ * License as published by the Free Software Foundation; either
89
+ * License as published by the Free Software Foundation; either
1118
+ * version 2.1 of the License, or (at your option) any later version.
90
+ * version 2.1 of the License, or (at your option) any later version.
...
...
1125
+ * You should have received a copy of the GNU Lesser General Public
97
+ * You should have received a copy of the GNU Lesser General Public
1126
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
1127
+ */
99
+ */
1128
+
100
+
1129
+#include "qemu/osdep.h"
101
+#include "qemu/osdep.h"
1130
+#include "qemu/log.h"
1131
+#include "cpu.h"
102
+#include "cpu.h"
1132
+#include "exec/helper-proto.h"
103
+#include "tcg/tcg-op.h"
104
+#include "tcg/tcg-op-gvec.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
1133
+#include "fpu/softfloat.h"
109
+#include "fpu/softfloat.h"
1134
+#include "internals.h"
1135
+
110
+
1136
+
111
+
1137
+/* VFP support. We follow the convention used for VFP instructions:
112
+/*
1138
+ Single precision routines have a "s" suffix, double precision a
113
+ * Include the generated decoder.
1139
+ "d" suffix. */
1140
+
1141
+/* Convert host exception flags to vfp form. */
1142
+static inline int vfp_exceptbits_from_host(int host_bits)
1143
+{
1144
+ int target_bits = 0;
1145
+
1146
+ if (host_bits & float_flag_invalid)
1147
+ target_bits |= 1;
1148
+ if (host_bits & float_flag_divbyzero)
1149
+ target_bits |= 2;
1150
+ if (host_bits & float_flag_overflow)
1151
+ target_bits |= 4;
1152
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal))
1153
+ target_bits |= 8;
1154
+ if (host_bits & float_flag_inexact)
1155
+ target_bits |= 0x10;
1156
+ if (host_bits & float_flag_input_denormal)
1157
+ target_bits |= 0x80;
1158
+ return target_bits;
1159
+}
1160
+
1161
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
1162
+{
1163
+ uint32_t i, fpscr;
1164
+
1165
+ fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
1166
+ | (env->vfp.vec_len << 16)
1167
+ | (env->vfp.vec_stride << 20);
1168
+
1169
+ i = get_float_exception_flags(&env->vfp.fp_status);
1170
+ i |= get_float_exception_flags(&env->vfp.standard_fp_status);
1171
+ /* FZ16 does not generate an input denormal exception. */
1172
+ i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
1173
+ & ~float_flag_input_denormal);
1174
+ fpscr |= vfp_exceptbits_from_host(i);
1175
+
1176
+ i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
1177
+ fpscr |= i ? FPCR_QC : 0;
1178
+
1179
+ return fpscr;
1180
+}
1181
+
1182
+uint32_t vfp_get_fpscr(CPUARMState *env)
1183
+{
1184
+ return HELPER(vfp_get_fpscr)(env);
1185
+}
1186
+
1187
+/* Convert vfp exception flags to target form. */
1188
+static inline int vfp_exceptbits_to_host(int target_bits)
1189
+{
1190
+ int host_bits = 0;
1191
+
1192
+ if (target_bits & 1)
1193
+ host_bits |= float_flag_invalid;
1194
+ if (target_bits & 2)
1195
+ host_bits |= float_flag_divbyzero;
1196
+ if (target_bits & 4)
1197
+ host_bits |= float_flag_overflow;
1198
+ if (target_bits & 8)
1199
+ host_bits |= float_flag_underflow;
1200
+ if (target_bits & 0x10)
1201
+ host_bits |= float_flag_inexact;
1202
+ if (target_bits & 0x80)
1203
+ host_bits |= float_flag_input_denormal;
1204
+ return host_bits;
1205
+}
1206
+
1207
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
1208
+{
1209
+ int i;
1210
+ uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
1211
+
1212
+ /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
1213
+ if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
1214
+ val &= ~FPCR_FZ16;
1215
+ }
1216
+
1217
+ /*
1218
+ * We don't implement trapped exception handling, so the
1219
+ * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
1220
+ *
1221
+ * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
1222
+ * (which are stored in fp_status), and the other RES0 bits
1223
+ * in between, then we clear all of the low 16 bits.
1224
+ */
1225
+ env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
1226
+ env->vfp.vec_len = (val >> 16) & 7;
1227
+ env->vfp.vec_stride = (val >> 20) & 3;
1228
+
1229
+ /*
1230
+ * The bit we set within fpscr_q is arbitrary; the register as a
1231
+ * whole being zero/non-zero is what counts.
1232
+ */
1233
+ env->vfp.qc[0] = val & FPCR_QC;
1234
+ env->vfp.qc[1] = 0;
1235
+ env->vfp.qc[2] = 0;
1236
+ env->vfp.qc[3] = 0;
1237
+
1238
+ changed ^= val;
1239
+ if (changed & (3 << 22)) {
1240
+ i = (val >> 22) & 3;
1241
+ switch (i) {
1242
+ case FPROUNDING_TIEEVEN:
1243
+ i = float_round_nearest_even;
1244
+ break;
1245
+ case FPROUNDING_POSINF:
1246
+ i = float_round_up;
1247
+ break;
1248
+ case FPROUNDING_NEGINF:
1249
+ i = float_round_down;
1250
+ break;
1251
+ case FPROUNDING_ZERO:
1252
+ i = float_round_to_zero;
1253
+ break;
1254
+ }
1255
+ set_float_rounding_mode(i, &env->vfp.fp_status);
1256
+ set_float_rounding_mode(i, &env->vfp.fp_status_f16);
1257
+ }
1258
+ if (changed & FPCR_FZ16) {
1259
+ bool ftz_enabled = val & FPCR_FZ16;
1260
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
1261
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
1262
+ }
1263
+ if (changed & FPCR_FZ) {
1264
+ bool ftz_enabled = val & FPCR_FZ;
1265
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
1266
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
1267
+ }
1268
+ if (changed & FPCR_DN) {
1269
+ bool dnan_enabled = val & FPCR_DN;
1270
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
1271
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
1272
+ }
1273
+
1274
+ /* The exception flags are ORed together when we read fpscr so we
1275
+ * only need to preserve the current state in one of our
1276
+ * float_status values.
1277
+ */
1278
+ i = vfp_exceptbits_to_host(val);
1279
+ set_float_exception_flags(i, &env->vfp.fp_status);
1280
+ set_float_exception_flags(0, &env->vfp.fp_status_f16);
1281
+ set_float_exception_flags(0, &env->vfp.standard_fp_status);
1282
+}
1283
+
1284
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
1285
+{
1286
+ HELPER(vfp_set_fpscr)(env, val);
1287
+}
1288
+
1289
+#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
1290
+
1291
+#define VFP_BINOP(name) \
1292
+float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
1293
+{ \
1294
+ float_status *fpst = fpstp; \
1295
+ return float32_ ## name(a, b, fpst); \
1296
+} \
1297
+float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
1298
+{ \
1299
+ float_status *fpst = fpstp; \
1300
+ return float64_ ## name(a, b, fpst); \
1301
+}
1302
+VFP_BINOP(add)
1303
+VFP_BINOP(sub)
1304
+VFP_BINOP(mul)
1305
+VFP_BINOP(div)
1306
+VFP_BINOP(min)
1307
+VFP_BINOP(max)
1308
+VFP_BINOP(minnum)
1309
+VFP_BINOP(maxnum)
1310
+#undef VFP_BINOP
1311
+
1312
+float32 VFP_HELPER(neg, s)(float32 a)
1313
+{
1314
+ return float32_chs(a);
1315
+}
1316
+
1317
+float64 VFP_HELPER(neg, d)(float64 a)
1318
+{
1319
+ return float64_chs(a);
1320
+}
1321
+
1322
+float32 VFP_HELPER(abs, s)(float32 a)
1323
+{
1324
+ return float32_abs(a);
1325
+}
1326
+
1327
+float64 VFP_HELPER(abs, d)(float64 a)
1328
+{
1329
+ return float64_abs(a);
1330
+}
1331
+
1332
+float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
1333
+{
1334
+ return float32_sqrt(a, &env->vfp.fp_status);
1335
+}
1336
+
1337
+float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
1338
+{
1339
+ return float64_sqrt(a, &env->vfp.fp_status);
1340
+}
1341
+
1342
+static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
1343
+{
1344
+ uint32_t flags;
1345
+ switch (cmp) {
1346
+ case float_relation_equal:
1347
+ flags = 0x6;
1348
+ break;
1349
+ case float_relation_less:
1350
+ flags = 0x8;
1351
+ break;
1352
+ case float_relation_greater:
1353
+ flags = 0x2;
1354
+ break;
1355
+ case float_relation_unordered:
1356
+ flags = 0x3;
1357
+ break;
1358
+ default:
1359
+ g_assert_not_reached();
1360
+ }
1361
+ env->vfp.xregs[ARM_VFP_FPSCR] =
1362
+ deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
1363
+}
1364
+
1365
+/* XXX: check quiet/signaling case */
1366
+#define DO_VFP_cmp(p, type) \
1367
+void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
1368
+{ \
1369
+ softfloat_to_vfp_compare(env, \
1370
+ type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
1371
+} \
1372
+void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
1373
+{ \
1374
+ softfloat_to_vfp_compare(env, \
1375
+ type ## _compare(a, b, &env->vfp.fp_status)); \
1376
+}
1377
+DO_VFP_cmp(s, float32)
1378
+DO_VFP_cmp(d, float64)
1379
+#undef DO_VFP_cmp
1380
+
1381
+/* Integer to float and float to integer conversions */
1382
+
1383
+#define CONV_ITOF(name, ftype, fsz, sign) \
1384
+ftype HELPER(name)(uint32_t x, void *fpstp) \
1385
+{ \
1386
+ float_status *fpst = fpstp; \
1387
+ return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
1388
+}
1389
+
1390
+#define CONV_FTOI(name, ftype, fsz, sign, round) \
1391
+sign##int32_t HELPER(name)(ftype x, void *fpstp) \
1392
+{ \
1393
+ float_status *fpst = fpstp; \
1394
+ if (float##fsz##_is_any_nan(x)) { \
1395
+ float_raise(float_flag_invalid, fpst); \
1396
+ return 0; \
1397
+ } \
1398
+ return float##fsz##_to_##sign##int32##round(x, fpst); \
1399
+}
1400
+
1401
+#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
1402
+ CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
1403
+ CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
1404
+ CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
1405
+
1406
+FLOAT_CONVS(si, h, uint32_t, 16, )
1407
+FLOAT_CONVS(si, s, float32, 32, )
1408
+FLOAT_CONVS(si, d, float64, 64, )
1409
+FLOAT_CONVS(ui, h, uint32_t, 16, u)
1410
+FLOAT_CONVS(ui, s, float32, 32, u)
1411
+FLOAT_CONVS(ui, d, float64, 64, u)
1412
+
1413
+#undef CONV_ITOF
1414
+#undef CONV_FTOI
1415
+#undef FLOAT_CONVS
1416
+
1417
+/* floating point conversion */
1418
+float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
1419
+{
1420
+ return float32_to_float64(x, &env->vfp.fp_status);
1421
+}
1422
+
1423
+float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
1424
+{
1425
+ return float64_to_float32(x, &env->vfp.fp_status);
1426
+}
1427
+
1428
+/* VFP3 fixed point conversion. */
1429
+#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
1430
+float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
1431
+ void *fpstp) \
1432
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
1433
+
1434
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff) \
1435
+uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
1436
+ void *fpst) \
1437
+{ \
1438
+ if (unlikely(float##fsz##_is_any_nan(x))) { \
1439
+ float_raise(float_flag_invalid, fpst); \
1440
+ return 0; \
1441
+ } \
1442
+ return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
1443
+}
1444
+
1445
+#define VFP_CONV_FIX(name, p, fsz, isz, itype) \
1446
+VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
1447
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
1448
+ float_round_to_zero, _round_to_zero) \
1449
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
1450
+ get_float_rounding_mode(fpst), )
1451
+
1452
+#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \
1453
+VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
1454
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
1455
+ get_float_rounding_mode(fpst), )
1456
+
1457
+VFP_CONV_FIX(sh, d, 64, 64, int16)
1458
+VFP_CONV_FIX(sl, d, 64, 64, int32)
1459
+VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
1460
+VFP_CONV_FIX(uh, d, 64, 64, uint16)
1461
+VFP_CONV_FIX(ul, d, 64, 64, uint32)
1462
+VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
1463
+VFP_CONV_FIX(sh, s, 32, 32, int16)
1464
+VFP_CONV_FIX(sl, s, 32, 32, int32)
1465
+VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
1466
+VFP_CONV_FIX(uh, s, 32, 32, uint16)
1467
+VFP_CONV_FIX(ul, s, 32, 32, uint32)
1468
+VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
1469
+
1470
+#undef VFP_CONV_FIX
1471
+#undef VFP_CONV_FIX_FLOAT
1472
+#undef VFP_CONV_FLOAT_FIX_ROUND
1473
+#undef VFP_CONV_FIX_A64
1474
+
1475
+uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
1476
+{
1477
+ return int32_to_float16_scalbn(x, -shift, fpst);
1478
+}
1479
+
1480
+uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
1481
+{
1482
+ return uint32_to_float16_scalbn(x, -shift, fpst);
1483
+}
1484
+
1485
+uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
1486
+{
1487
+ return int64_to_float16_scalbn(x, -shift, fpst);
1488
+}
1489
+
1490
+uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
1491
+{
1492
+ return uint64_to_float16_scalbn(x, -shift, fpst);
1493
+}
1494
+
1495
+uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
1496
+{
1497
+ if (unlikely(float16_is_any_nan(x))) {
1498
+ float_raise(float_flag_invalid, fpst);
1499
+ return 0;
1500
+ }
1501
+ return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
1502
+ shift, fpst);
1503
+}
1504
+
1505
+uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
1506
+{
1507
+ if (unlikely(float16_is_any_nan(x))) {
1508
+ float_raise(float_flag_invalid, fpst);
1509
+ return 0;
1510
+ }
1511
+ return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
1512
+ shift, fpst);
1513
+}
1514
+
1515
+uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
1516
+{
1517
+ if (unlikely(float16_is_any_nan(x))) {
1518
+ float_raise(float_flag_invalid, fpst);
1519
+ return 0;
1520
+ }
1521
+ return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
1522
+ shift, fpst);
1523
+}
1524
+
1525
+uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
1526
+{
1527
+ if (unlikely(float16_is_any_nan(x))) {
1528
+ float_raise(float_flag_invalid, fpst);
1529
+ return 0;
1530
+ }
1531
+ return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
1532
+ shift, fpst);
1533
+}
1534
+
1535
+uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
1536
+{
1537
+ if (unlikely(float16_is_any_nan(x))) {
1538
+ float_raise(float_flag_invalid, fpst);
1539
+ return 0;
1540
+ }
1541
+ return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
1542
+ shift, fpst);
1543
+}
1544
+
1545
+uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
1546
+{
1547
+ if (unlikely(float16_is_any_nan(x))) {
1548
+ float_raise(float_flag_invalid, fpst);
1549
+ return 0;
1550
+ }
1551
+ return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
1552
+ shift, fpst);
1553
+}
1554
+
1555
+/* Set the current fp rounding mode and return the old one.
1556
+ * The argument is a softfloat float_round_ value.
1557
+ */
1558
+uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
1559
+{
1560
+ float_status *fp_status = fpstp;
1561
+
1562
+ uint32_t prev_rmode = get_float_rounding_mode(fp_status);
1563
+ set_float_rounding_mode(rmode, fp_status);
1564
+
1565
+ return prev_rmode;
1566
+}
1567
+
1568
+/* Set the current fp rounding mode in the standard fp status and return
1569
+ * the old one. This is for NEON instructions that need to change the
1570
+ * rounding mode but wish to use the standard FPSCR values for everything
1571
+ * else. Always set the rounding mode back to the correct value after
1572
+ * modifying it.
1573
+ * The argument is a softfloat float_round_ value.
1574
+ */
1575
+uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
1576
+{
1577
+ float_status *fp_status = &env->vfp.standard_fp_status;
1578
+
1579
+ uint32_t prev_rmode = get_float_rounding_mode(fp_status);
1580
+ set_float_rounding_mode(rmode, fp_status);
1581
+
1582
+ return prev_rmode;
1583
+}
1584
+
1585
+/* Half precision conversions. */
1586
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
1587
+{
1588
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
1589
+ * it would affect flushing input denormals.
1590
+ */
1591
+ float_status *fpst = fpstp;
1592
+ flag save = get_flush_inputs_to_zero(fpst);
1593
+ set_flush_inputs_to_zero(false, fpst);
1594
+ float32 r = float16_to_float32(a, !ahp_mode, fpst);
1595
+ set_flush_inputs_to_zero(save, fpst);
1596
+ return r;
1597
+}
1598
+
1599
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
1600
+{
1601
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
1602
+ * it would affect flushing output denormals.
1603
+ */
1604
+ float_status *fpst = fpstp;
1605
+ flag save = get_flush_to_zero(fpst);
1606
+ set_flush_to_zero(false, fpst);
1607
+ float16 r = float32_to_float16(a, !ahp_mode, fpst);
1608
+ set_flush_to_zero(save, fpst);
1609
+ return r;
1610
+}
1611
+
1612
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
1613
+{
1614
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
1615
+ * it would affect flushing input denormals.
1616
+ */
1617
+ float_status *fpst = fpstp;
1618
+ flag save = get_flush_inputs_to_zero(fpst);
1619
+ set_flush_inputs_to_zero(false, fpst);
1620
+ float64 r = float16_to_float64(a, !ahp_mode, fpst);
1621
+ set_flush_inputs_to_zero(save, fpst);
1622
+ return r;
1623
+}
1624
+
1625
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
1626
+{
1627
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
1628
+ * it would affect flushing output denormals.
1629
+ */
1630
+ float_status *fpst = fpstp;
1631
+ flag save = get_flush_to_zero(fpst);
1632
+ set_flush_to_zero(false, fpst);
1633
+ float16 r = float64_to_float16(a, !ahp_mode, fpst);
1634
+ set_flush_to_zero(save, fpst);
1635
+ return r;
1636
+}
1637
+
1638
+#define float32_two make_float32(0x40000000)
1639
+#define float32_three make_float32(0x40400000)
1640
+#define float32_one_point_five make_float32(0x3fc00000)
1641
+
1642
+float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
1643
+{
1644
+ float_status *s = &env->vfp.standard_fp_status;
1645
+ if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
1646
+ (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
1647
+ if (!(float32_is_zero(a) || float32_is_zero(b))) {
1648
+ float_raise(float_flag_input_denormal, s);
1649
+ }
1650
+ return float32_two;
1651
+ }
1652
+ return float32_sub(float32_two, float32_mul(a, b, s), s);
1653
+}
1654
+
1655
+float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
1656
+{
1657
+ float_status *s = &env->vfp.standard_fp_status;
1658
+ float32 product;
1659
+ if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
1660
+ (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
1661
+ if (!(float32_is_zero(a) || float32_is_zero(b))) {
1662
+ float_raise(float_flag_input_denormal, s);
1663
+ }
1664
+ return float32_one_point_five;
1665
+ }
1666
+ product = float32_mul(a, b, s);
1667
+ return float32_div(float32_sub(float32_three, product, s), float32_two, s);
1668
+}
1669
+
1670
+/* NEON helpers. */
1671
+
1672
+/* Constants 256 and 512 are used in some helpers; we avoid relying on
1673
+ * int->float conversions at run-time. */
1674
+#define float64_256 make_float64(0x4070000000000000LL)
1675
+#define float64_512 make_float64(0x4080000000000000LL)
1676
+#define float16_maxnorm make_float16(0x7bff)
1677
+#define float32_maxnorm make_float32(0x7f7fffff)
1678
+#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
1679
+
1680
+/* Reciprocal functions
1681
+ *
1682
+ * The algorithm that must be used to calculate the estimate
1683
+ * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
1684
+ */
114
+ */
1685
+
115
+
1686
+/* See RecipEstimate()
116
+#include "decode-sme.c.inc"
1687
+ *
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
1688
+ * input is a 9 bit fixed point number
118
index XXXXXXX..XXXXXXX 100644
1689
+ * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
119
--- a/target/arm/meson.build
1690
+ * result range 256 .. 511 for a number from 1.0 to 511/256.
120
+++ b/target/arm/meson.build
1691
+ */
121
@@ -XXX,XX +XXX,XX @@
1692
+
122
gen = [
1693
+static int recip_estimate(int input)
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
1694
+{
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
1695
+ int a, b, r;
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
1696
+ assert(256 <= input && input < 512);
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
1697
+ a = (input * 2) + 1;
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
1698
+ b = (1 << 19) / a;
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
1699
+ r = (b + 1) >> 1;
129
'sme_helper.c',
1700
+ assert(256 <= r && r < 512);
130
'translate-a64.c',
1701
+ return r;
131
'translate-sve.c',
1702
+}
132
+ 'translate-sme.c',
1703
+
133
))
1704
+/*
134
1705
+ * Common wrapper to call recip_estimate
135
arm_softmmu_ss = ss.source_set()
1706
+ *
1707
+ * The parameters are exponent and 64 bit fraction (without implicit
1708
+ * bit) where the binary point is nominally at bit 52. Returns a
1709
+ * float64 which can then be rounded to the appropriate size by the
1710
+ * callee.
1711
+ */
1712
+
1713
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
1714
+{
1715
+ uint32_t scaled, estimate;
1716
+ uint64_t result_frac;
1717
+ int result_exp;
1718
+
1719
+ /* Handle sub-normals */
1720
+ if (*exp == 0) {
1721
+ if (extract64(frac, 51, 1) == 0) {
1722
+ *exp = -1;
1723
+ frac <<= 2;
1724
+ } else {
1725
+ frac <<= 1;
1726
+ }
1727
+ }
1728
+
1729
+ /* scaled = UInt('1':fraction<51:44>) */
1730
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
1731
+ estimate = recip_estimate(scaled);
1732
+
1733
+ result_exp = exp_off - *exp;
1734
+ result_frac = deposit64(0, 44, 8, estimate);
1735
+ if (result_exp == 0) {
1736
+ result_frac = deposit64(result_frac >> 1, 51, 1, 1);
1737
+ } else if (result_exp == -1) {
1738
+ result_frac = deposit64(result_frac >> 2, 50, 2, 1);
1739
+ result_exp = 0;
1740
+ }
1741
+
1742
+ *exp = result_exp;
1743
+
1744
+ return result_frac;
1745
+}
1746
+
1747
+static bool round_to_inf(float_status *fpst, bool sign_bit)
1748
+{
1749
+ switch (fpst->float_rounding_mode) {
1750
+ case float_round_nearest_even: /* Round to Nearest */
1751
+ return true;
1752
+ case float_round_up: /* Round to +Inf */
1753
+ return !sign_bit;
1754
+ case float_round_down: /* Round to -Inf */
1755
+ return sign_bit;
1756
+ case float_round_to_zero: /* Round to Zero */
1757
+ return false;
1758
+ }
1759
+
1760
+ g_assert_not_reached();
1761
+}
1762
+
1763
+uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
1764
+{
1765
+ float_status *fpst = fpstp;
1766
+ float16 f16 = float16_squash_input_denormal(input, fpst);
1767
+ uint32_t f16_val = float16_val(f16);
1768
+ uint32_t f16_sign = float16_is_neg(f16);
1769
+ int f16_exp = extract32(f16_val, 10, 5);
1770
+ uint32_t f16_frac = extract32(f16_val, 0, 10);
1771
+ uint64_t f64_frac;
1772
+
1773
+ if (float16_is_any_nan(f16)) {
1774
+ float16 nan = f16;
1775
+ if (float16_is_signaling_nan(f16, fpst)) {
1776
+ float_raise(float_flag_invalid, fpst);
1777
+ nan = float16_silence_nan(f16, fpst);
1778
+ }
1779
+ if (fpst->default_nan_mode) {
1780
+ nan = float16_default_nan(fpst);
1781
+ }
1782
+ return nan;
1783
+ } else if (float16_is_infinity(f16)) {
1784
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
1785
+ } else if (float16_is_zero(f16)) {
1786
+ float_raise(float_flag_divbyzero, fpst);
1787
+ return float16_set_sign(float16_infinity, float16_is_neg(f16));
1788
+ } else if (float16_abs(f16) < (1 << 8)) {
1789
+ /* Abs(value) < 2.0^-16 */
1790
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
1791
+ if (round_to_inf(fpst, f16_sign)) {
1792
+ return float16_set_sign(float16_infinity, f16_sign);
1793
+ } else {
1794
+ return float16_set_sign(float16_maxnorm, f16_sign);
1795
+ }
1796
+ } else if (f16_exp >= 29 && fpst->flush_to_zero) {
1797
+ float_raise(float_flag_underflow, fpst);
1798
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
1799
+ }
1800
+
1801
+ f64_frac = call_recip_estimate(&f16_exp, 29,
1802
+ ((uint64_t) f16_frac) << (52 - 10));
1803
+
1804
+ /* result = sign : result_exp<4:0> : fraction<51:42> */
1805
+ f16_val = deposit32(0, 15, 1, f16_sign);
1806
+ f16_val = deposit32(f16_val, 10, 5, f16_exp);
1807
+ f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
1808
+ return make_float16(f16_val);
1809
+}
1810
+
1811
+float32 HELPER(recpe_f32)(float32 input, void *fpstp)
1812
+{
1813
+ float_status *fpst = fpstp;
1814
+ float32 f32 = float32_squash_input_denormal(input, fpst);
1815
+ uint32_t f32_val = float32_val(f32);
1816
+ bool f32_sign = float32_is_neg(f32);
1817
+ int f32_exp = extract32(f32_val, 23, 8);
1818
+ uint32_t f32_frac = extract32(f32_val, 0, 23);
1819
+ uint64_t f64_frac;
1820
+
1821
+ if (float32_is_any_nan(f32)) {
1822
+ float32 nan = f32;
1823
+ if (float32_is_signaling_nan(f32, fpst)) {
1824
+ float_raise(float_flag_invalid, fpst);
1825
+ nan = float32_silence_nan(f32, fpst);
1826
+ }
1827
+ if (fpst->default_nan_mode) {
1828
+ nan = float32_default_nan(fpst);
1829
+ }
1830
+ return nan;
1831
+ } else if (float32_is_infinity(f32)) {
1832
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
1833
+ } else if (float32_is_zero(f32)) {
1834
+ float_raise(float_flag_divbyzero, fpst);
1835
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
1836
+ } else if (float32_abs(f32) < (1ULL << 21)) {
1837
+ /* Abs(value) < 2.0^-128 */
1838
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
1839
+ if (round_to_inf(fpst, f32_sign)) {
1840
+ return float32_set_sign(float32_infinity, f32_sign);
1841
+ } else {
1842
+ return float32_set_sign(float32_maxnorm, f32_sign);
1843
+ }
1844
+ } else if (f32_exp >= 253 && fpst->flush_to_zero) {
1845
+ float_raise(float_flag_underflow, fpst);
1846
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
1847
+ }
1848
+
1849
+ f64_frac = call_recip_estimate(&f32_exp, 253,
1850
+ ((uint64_t) f32_frac) << (52 - 23));
1851
+
1852
+ /* result = sign : result_exp<7:0> : fraction<51:29> */
1853
+ f32_val = deposit32(0, 31, 1, f32_sign);
1854
+ f32_val = deposit32(f32_val, 23, 8, f32_exp);
1855
+ f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
1856
+ return make_float32(f32_val);
1857
+}
1858
+
1859
+float64 HELPER(recpe_f64)(float64 input, void *fpstp)
1860
+{
1861
+ float_status *fpst = fpstp;
1862
+ float64 f64 = float64_squash_input_denormal(input, fpst);
1863
+ uint64_t f64_val = float64_val(f64);
1864
+ bool f64_sign = float64_is_neg(f64);
1865
+ int f64_exp = extract64(f64_val, 52, 11);
1866
+ uint64_t f64_frac = extract64(f64_val, 0, 52);
1867
+
1868
+ /* Deal with any special cases */
1869
+ if (float64_is_any_nan(f64)) {
1870
+ float64 nan = f64;
1871
+ if (float64_is_signaling_nan(f64, fpst)) {
1872
+ float_raise(float_flag_invalid, fpst);
1873
+ nan = float64_silence_nan(f64, fpst);
1874
+ }
1875
+ if (fpst->default_nan_mode) {
1876
+ nan = float64_default_nan(fpst);
1877
+ }
1878
+ return nan;
1879
+ } else if (float64_is_infinity(f64)) {
1880
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
1881
+ } else if (float64_is_zero(f64)) {
1882
+ float_raise(float_flag_divbyzero, fpst);
1883
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
1884
+ } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
1885
+ /* Abs(value) < 2.0^-1024 */
1886
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
1887
+ if (round_to_inf(fpst, f64_sign)) {
1888
+ return float64_set_sign(float64_infinity, f64_sign);
1889
+ } else {
1890
+ return float64_set_sign(float64_maxnorm, f64_sign);
1891
+ }
1892
+ } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
1893
+ float_raise(float_flag_underflow, fpst);
1894
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
1895
+ }
1896
+
1897
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
1898
+
1899
+ /* result = sign : result_exp<10:0> : fraction<51:0>; */
1900
+ f64_val = deposit64(0, 63, 1, f64_sign);
1901
+ f64_val = deposit64(f64_val, 52, 11, f64_exp);
1902
+ f64_val = deposit64(f64_val, 0, 52, f64_frac);
1903
+ return make_float64(f64_val);
1904
+}
1905
+
1906
+/* The algorithm that must be used to calculate the estimate
1907
+ * is specified by the ARM ARM.
1908
+ */
1909
+
1910
+static int do_recip_sqrt_estimate(int a)
1911
+{
1912
+ int b, estimate;
1913
+
1914
+ assert(128 <= a && a < 512);
1915
+ if (a < 256) {
1916
+ a = a * 2 + 1;
1917
+ } else {
1918
+ a = (a >> 1) << 1;
1919
+ a = (a + 1) * 2;
1920
+ }
1921
+ b = 512;
1922
+ while (a * (b + 1) * (b + 1) < (1 << 28)) {
1923
+ b += 1;
1924
+ }
1925
+ estimate = (b + 1) / 2;
1926
+ assert(256 <= estimate && estimate < 512);
1927
+
1928
+ return estimate;
1929
+}
1930
+
1931
+
1932
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
1933
+{
1934
+ int estimate;
1935
+ uint32_t scaled;
1936
+
1937
+ if (*exp == 0) {
1938
+ while (extract64(frac, 51, 1) == 0) {
1939
+ frac = frac << 1;
1940
+ *exp -= 1;
1941
+ }
1942
+ frac = extract64(frac, 0, 51) << 1;
1943
+ }
1944
+
1945
+ if (*exp & 1) {
1946
+ /* scaled = UInt('01':fraction<51:45>) */
1947
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
1948
+ } else {
1949
+ /* scaled = UInt('1':fraction<51:44>) */
1950
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
1951
+ }
1952
+ estimate = do_recip_sqrt_estimate(scaled);
1953
+
1954
+ *exp = (exp_off - *exp) / 2;
1955
+ return extract64(estimate, 0, 8) << 44;
1956
+}
1957
+
1958
+uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
1959
+{
1960
+ float_status *s = fpstp;
1961
+ float16 f16 = float16_squash_input_denormal(input, s);
1962
+ uint16_t val = float16_val(f16);
1963
+ bool f16_sign = float16_is_neg(f16);
1964
+ int f16_exp = extract32(val, 10, 5);
1965
+ uint16_t f16_frac = extract32(val, 0, 10);
1966
+ uint64_t f64_frac;
1967
+
1968
+ if (float16_is_any_nan(f16)) {
1969
+ float16 nan = f16;
1970
+ if (float16_is_signaling_nan(f16, s)) {
1971
+ float_raise(float_flag_invalid, s);
1972
+ nan = float16_silence_nan(f16, s);
1973
+ }
1974
+ if (s->default_nan_mode) {
1975
+ nan = float16_default_nan(s);
1976
+ }
1977
+ return nan;
1978
+ } else if (float16_is_zero(f16)) {
1979
+ float_raise(float_flag_divbyzero, s);
1980
+ return float16_set_sign(float16_infinity, f16_sign);
1981
+ } else if (f16_sign) {
1982
+ float_raise(float_flag_invalid, s);
1983
+ return float16_default_nan(s);
1984
+ } else if (float16_is_infinity(f16)) {
1985
+ return float16_zero;
1986
+ }
1987
+
1988
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
1989
+ * preserving the parity of the exponent. */
1990
+
1991
+ f64_frac = ((uint64_t) f16_frac) << (52 - 10);
1992
+
1993
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
1994
+
1995
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
1996
+ val = deposit32(0, 15, 1, f16_sign);
1997
+ val = deposit32(val, 10, 5, f16_exp);
1998
+ val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
1999
+ return make_float16(val);
2000
+}
2001
+
2002
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
2003
+{
2004
+ float_status *s = fpstp;
2005
+ float32 f32 = float32_squash_input_denormal(input, s);
2006
+ uint32_t val = float32_val(f32);
2007
+ uint32_t f32_sign = float32_is_neg(f32);
2008
+ int f32_exp = extract32(val, 23, 8);
2009
+ uint32_t f32_frac = extract32(val, 0, 23);
2010
+ uint64_t f64_frac;
2011
+
2012
+ if (float32_is_any_nan(f32)) {
2013
+ float32 nan = f32;
2014
+ if (float32_is_signaling_nan(f32, s)) {
2015
+ float_raise(float_flag_invalid, s);
2016
+ nan = float32_silence_nan(f32, s);
2017
+ }
2018
+ if (s->default_nan_mode) {
2019
+ nan = float32_default_nan(s);
2020
+ }
2021
+ return nan;
2022
+ } else if (float32_is_zero(f32)) {
2023
+ float_raise(float_flag_divbyzero, s);
2024
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
2025
+ } else if (float32_is_neg(f32)) {
2026
+ float_raise(float_flag_invalid, s);
2027
+ return float32_default_nan(s);
2028
+ } else if (float32_is_infinity(f32)) {
2029
+ return float32_zero;
2030
+ }
2031
+
2032
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
2033
+ * preserving the parity of the exponent. */
2034
+
2035
+ f64_frac = ((uint64_t) f32_frac) << 29;
2036
+
2037
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
2038
+
2039
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
2040
+ val = deposit32(0, 31, 1, f32_sign);
2041
+ val = deposit32(val, 23, 8, f32_exp);
2042
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
2043
+ return make_float32(val);
2044
+}
2045
+
2046
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
2047
+{
2048
+ float_status *s = fpstp;
2049
+ float64 f64 = float64_squash_input_denormal(input, s);
2050
+ uint64_t val = float64_val(f64);
2051
+ bool f64_sign = float64_is_neg(f64);
2052
+ int f64_exp = extract64(val, 52, 11);
2053
+ uint64_t f64_frac = extract64(val, 0, 52);
2054
+
2055
+ if (float64_is_any_nan(f64)) {
2056
+ float64 nan = f64;
2057
+ if (float64_is_signaling_nan(f64, s)) {
2058
+ float_raise(float_flag_invalid, s);
2059
+ nan = float64_silence_nan(f64, s);
2060
+ }
2061
+ if (s->default_nan_mode) {
2062
+ nan = float64_default_nan(s);
2063
+ }
2064
+ return nan;
2065
+ } else if (float64_is_zero(f64)) {
2066
+ float_raise(float_flag_divbyzero, s);
2067
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
2068
+ } else if (float64_is_neg(f64)) {
2069
+ float_raise(float_flag_invalid, s);
2070
+ return float64_default_nan(s);
2071
+ } else if (float64_is_infinity(f64)) {
2072
+ return float64_zero;
2073
+ }
2074
+
2075
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
2076
+
2077
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
2078
+ val = deposit64(0, 61, 1, f64_sign);
2079
+ val = deposit64(val, 52, 11, f64_exp);
2080
+ val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
2081
+ return make_float64(val);
2082
+}
2083
+
2084
+uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
2085
+{
2086
+ /* float_status *s = fpstp; */
2087
+ int input, estimate;
2088
+
2089
+ if ((a & 0x80000000) == 0) {
2090
+ return 0xffffffff;
2091
+ }
2092
+
2093
+ input = extract32(a, 23, 9);
2094
+ estimate = recip_estimate(input);
2095
+
2096
+ return deposit32(0, (32 - 9), 9, estimate);
2097
+}
2098
+
2099
+uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
2100
+{
2101
+ int estimate;
2102
+
2103
+ if ((a & 0xc0000000) == 0) {
2104
+ return 0xffffffff;
2105
+ }
2106
+
2107
+ estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
2108
+
2109
+ return deposit32(0, 23, 9, estimate);
2110
+}
2111
+
2112
+/* VFPv4 fused multiply-accumulate */
2113
+float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
2114
+{
2115
+ float_status *fpst = fpstp;
2116
+ return float32_muladd(a, b, c, 0, fpst);
2117
+}
2118
+
2119
+float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
2120
+{
2121
+ float_status *fpst = fpstp;
2122
+ return float64_muladd(a, b, c, 0, fpst);
2123
+}
2124
+
2125
+/* ARMv8 round to integral */
2126
+float32 HELPER(rints_exact)(float32 x, void *fp_status)
2127
+{
2128
+ return float32_round_to_int(x, fp_status);
2129
+}
2130
+
2131
+float64 HELPER(rintd_exact)(float64 x, void *fp_status)
2132
+{
2133
+ return float64_round_to_int(x, fp_status);
2134
+}
2135
+
2136
+float32 HELPER(rints)(float32 x, void *fp_status)
2137
+{
2138
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
2139
+ float32 ret;
2140
+
2141
+ ret = float32_round_to_int(x, fp_status);
2142
+
2143
+ /* Suppress any inexact exceptions the conversion produced */
2144
+ if (!(old_flags & float_flag_inexact)) {
2145
+ new_flags = get_float_exception_flags(fp_status);
2146
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
2147
+ }
2148
+
2149
+ return ret;
2150
+}
2151
+
2152
+float64 HELPER(rintd)(float64 x, void *fp_status)
2153
+{
2154
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
2155
+ float64 ret;
2156
+
2157
+ ret = float64_round_to_int(x, fp_status);
2158
+
2159
+ new_flags = get_float_exception_flags(fp_status);
2160
+
2161
+ /* Suppress any inexact exceptions the conversion produced */
2162
+ if (!(old_flags & float_flag_inexact)) {
2163
+ new_flags = get_float_exception_flags(fp_status);
2164
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
2165
+ }
2166
+
2167
+ return ret;
2168
+}
2169
+
2170
+/* Convert ARM rounding mode to softfloat */
2171
+int arm_rmode_to_sf(int rmode)
2172
+{
2173
+ switch (rmode) {
2174
+ case FPROUNDING_TIEAWAY:
2175
+ rmode = float_round_ties_away;
2176
+ break;
2177
+ case FPROUNDING_ODD:
2178
+ /* FIXME: add support for TIEAWAY and ODD */
2179
+ qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
2180
+ rmode);
2181
+ /* fall through for now */
2182
+ case FPROUNDING_TIEEVEN:
2183
+ default:
2184
+ rmode = float_round_nearest_even;
2185
+ break;
2186
+ case FPROUNDING_POSINF:
2187
+ rmode = float_round_up;
2188
+ break;
2189
+ case FPROUNDING_NEGINF:
2190
+ rmode = float_round_down;
2191
+ break;
2192
+ case FPROUNDING_ZERO:
2193
+ rmode = float_round_to_zero;
2194
+ break;
2195
+ }
2196
+ return rmode;
2197
+}
2198
--
136
--
2199
2.20.1
137
2.25.1
2200
2201
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This new behaviour is in the ARM pseudocode function
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
the trap would be delivered is in AArch64 mode.
7
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
detection ought to be trivially true, but the pseudocode still contains
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20190215192302.27855-5-richard.henderson@linaro.org
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
[PMM: fixed a couple of comment typos]
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
---
24
---
9
target/arm/cpu.h | 10 +++++
25
target/arm/cpu.h | 7 +++
10
target/arm/helper.h | 3 ++
26
target/arm/translate.h | 4 ++
11
target/arm/cpu.c | 1 +
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
12
target/arm/cpu64.c | 2 +
28
target/arm/helper.c | 41 +++++++++++++++++
13
target/arm/translate-a64.c | 26 +++++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
14
target/arm/translate.c | 10 +++++
30
target/arm/translate-vfp.c | 12 +++++
15
target/arm/vfp_helper.c | 88 ++++++++++++++++++++++++++++++++++++++
31
target/arm/translate.c | 2 +
16
7 files changed, 140 insertions(+)
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
17
35
18
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
19
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/cpu.h
38
--- a/target/arm/cpu.h
21
+++ b/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
22
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id)
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
23
return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0;
41
* the same thing as the current security state of the processor!
42
*/
43
FIELD(TBFLAG_A32, NS, 10, 1)
44
+/*
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
46
+ * This requires an SME trap from AArch32 mode when using NEON.
47
+ */
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
49
50
/*
51
* Bit usage when in AArch32 state, for M-profile only.
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
55
FIELD(TBFLAG_A64, SVL, 24, 4)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
58
59
/*
60
* Helpers for using the above.
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.h
64
+++ b/target/arm/translate.h
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
66
bool pstate_sm;
67
/* True if PSTATE.ZA is set. */
68
bool pstate_za;
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
75
/*
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/sme-fa64.decode
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/arm/helper.c
175
+++ b/target/arm/helper.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
24
}
178
}
25
179
26
+static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id)
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
27
+{
181
+static bool sme_fa64(CPUARMState *env, int el)
28
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0;
182
+{
29
+}
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
30
+
184
+ return false;
31
static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
185
+ }
32
{
186
+
33
return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
34
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_dp(const ARMISARegisters *id)
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
35
return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
212
213
+ /*
214
+ * The SME exception we are testing for is raised via
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
217
+ */
218
+ if (el == 0
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
220
+ && (!arm_is_el2_enabled(env)
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
225
+ }
226
+
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
36
}
228
}
37
229
38
+static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id)
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
39
+{
231
}
40
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0;
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
41
+}
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
42
+
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
43
static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
235
}
44
{
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
45
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
237
}
46
diff --git a/target/arm/helper.h b/target/arm/helper.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/helper.h
49
+++ b/target/arm/helper.h
50
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
51
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
52
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
53
54
+DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env)
55
+DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr)
56
+
57
/* neon_helper.c */
58
DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
59
DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
60
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/cpu.c
63
+++ b/target/arm/cpu.c
64
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
65
cpu->isar.id_isar5 = t;
66
67
t = cpu->isar.id_isar6;
68
+ t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1);
69
t = FIELD_DP32(t, ID_ISAR6, DP, 1);
70
cpu->isar.id_isar6 = t;
71
72
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/target/arm/cpu64.c
75
+++ b/target/arm/cpu64.c
76
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
77
cpu->isar.id_aa64isar0 = t;
78
79
t = cpu->isar.id_aa64isar1;
80
+ t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
81
t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
82
t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
83
t = FIELD_DP64(t, ID_AA64ISAR1, API, 0);
84
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
85
cpu->isar.id_isar5 = u;
86
87
u = cpu->isar.id_isar6;
88
+ u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1);
89
u = FIELD_DP32(u, ID_ISAR6, DP, 1);
90
cpu->isar.id_isar6 = u;
91
92
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
93
index XXXXXXX..XXXXXXX 100644
239
index XXXXXXX..XXXXXXX 100644
94
--- a/target/arm/translate-a64.c
240
--- a/target/arm/translate-a64.c
95
+++ b/target/arm/translate-a64.c
241
+++ b/target/arm/translate-a64.c
96
@@ -XXX,XX +XXX,XX @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
97
}
243
* unallocated-encoding checks (otherwise the syndrome information
244
* for the resulting exception will be incorrect).
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
248
{
249
if (s->fp_excp_el) {
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
98
}
253
}
99
254
100
+static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
255
+static bool fp_access_check(DisasContext *s)
101
+{
256
+{
102
+ TCGv_i64 t = read_fp_dreg(s, rn);
257
+ if (!fp_access_check_only(s)) {
103
+ TCGv_ptr fpstatus = get_fpstatus_ptr(false);
258
+ return false;
104
+
259
+ }
105
+ gen_helper_fjcvtzs(t, t, fpstatus);
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
106
+
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
107
+ tcg_temp_free_ptr(fpstatus);
262
+ syn_smetrap(SME_ET_Streaming, false));
108
+
263
+ return false;
109
+ tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
264
+ }
110
+ tcg_gen_extrh_i64_i32(cpu_ZF, t);
265
+ return true;
111
+ tcg_gen_movi_i32(cpu_CF, 0);
266
+}
112
+ tcg_gen_movi_i32(cpu_NF, 0);
267
+
113
+ tcg_gen_movi_i32(cpu_VF, 0);
268
/* Check that SVE access is enabled. If it is, return true.
114
+
269
* If not, emit code to generate an appropriate exception and return false.
115
+ tcg_temp_free_i64(t);
270
*/
116
+}
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
117
+
272
default:
118
/* Floating point <-> integer conversions
273
g_assert_not_reached();
119
* 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
274
}
120
* +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
121
@@ -XXX,XX +XXX,XX @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
122
handle_fmov(s, rd, rn, type, itof);
277
return;
123
break;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
124
279
return;
125
+ case 0b00111110: /* FJCVTZS */
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
126
+ if (!dc_isar_feature(aa64_jscvt, s)) {
281
}
127
+ goto do_unallocated;
282
}
128
+ } else if (fp_access_check(s)) {
283
129
+ handle_fjcvtzs(s, rd, rn);
284
+/*
130
+ }
285
+ * Include the generated SME FA64 decoder.
131
+ break;
286
+ */
132
+
287
+
133
default:
288
+#include "decode-sme-fa64.c.inc"
134
do_unallocated:
289
+
135
unallocated_encoding(s);
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
136
diff --git a/target/arm/translate.c b/target/arm/translate.c
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
137
index XXXXXXX..XXXXXXX 100644
348
index XXXXXXX..XXXXXXX 100644
138
--- a/target/arm/translate.c
349
--- a/target/arm/translate.c
139
+++ b/target/arm/translate.c
350
+++ b/target/arm/translate.c
140
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
141
rm_is_dp = false;
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
142
break;
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
143
354
}
144
+ case 0x13: /* vjcvt */
355
+ dc->sme_trap_nonstreaming =
145
+ if (!dp || !dc_isar_feature(aa32_jscvt, s)) {
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
146
+ return 1;
357
}
147
+ }
358
dc->cp_regs = cpu->cp_regs;
148
+ rd_is_dp = false;
359
dc->features = env->features;
149
+ break;
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
150
+
361
index XXXXXXX..XXXXXXX 100644
151
default:
362
--- a/target/arm/meson.build
152
return 1;
363
+++ b/target/arm/meson.build
153
}
364
@@ -XXX,XX +XXX,XX @@
154
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
365
gen = [
155
case 17: /* fsito */
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
156
gen_vfp_sito(dp, 0);
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
157
break;
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
158
+ case 19: /* vjcvt */
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
159
+ gen_helper_vjcvt(cpu_F0s, cpu_F0d, cpu_env);
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
160
+ break;
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
161
case 20: /* fshto */
162
gen_vfp_shto(dp, 16 - rm, 0);
163
break;
164
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/target/arm/vfp_helper.c
167
+++ b/target/arm/vfp_helper.c
168
@@ -XXX,XX +XXX,XX @@ int arm_rmode_to_sf(int rmode)
169
}
170
return rmode;
171
}
172
+
173
+/*
174
+ * Implement float64 to int32_t conversion without saturation;
175
+ * the result is supplied modulo 2^32.
176
+ */
177
+uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
178
+{
179
+ float_status *status = vstatus;
180
+ uint32_t exp, sign;
181
+ uint64_t frac;
182
+ uint32_t inexact = 1; /* !Z */
183
+
184
+ sign = extract64(value, 63, 1);
185
+ exp = extract64(value, 52, 11);
186
+ frac = extract64(value, 0, 52);
187
+
188
+ if (exp == 0) {
189
+ /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
190
+ inexact = sign;
191
+ if (frac != 0) {
192
+ if (status->flush_inputs_to_zero) {
193
+ float_raise(float_flag_input_denormal, status);
194
+ } else {
195
+ float_raise(float_flag_inexact, status);
196
+ inexact = 1;
197
+ }
198
+ }
199
+ frac = 0;
200
+ } else if (exp == 0x7ff) {
201
+ /* This operation raises Invalid for both NaN and overflow (Inf). */
202
+ float_raise(float_flag_invalid, status);
203
+ frac = 0;
204
+ } else {
205
+ int true_exp = exp - 1023;
206
+ int shift = true_exp - 52;
207
+
208
+ /* Restore implicit bit. */
209
+ frac |= 1ull << 52;
210
+
211
+ /* Shift the fraction into place. */
212
+ if (shift >= 0) {
213
+ /* The number is so large we must shift the fraction left. */
214
+ if (shift >= 64) {
215
+ /* The fraction is shifted out entirely. */
216
+ frac = 0;
217
+ } else {
218
+ frac <<= shift;
219
+ }
220
+ } else if (shift > -64) {
221
+ /* Normal case -- shift right and notice if bits shift out. */
222
+ inexact = (frac << (64 + shift)) != 0;
223
+ frac >>= -shift;
224
+ } else {
225
+ /* The fraction is shifted out entirely. */
226
+ frac = 0;
227
+ }
228
+
229
+ /* Notice overflow or inexact exceptions. */
230
+ if (true_exp > 31 || frac > (sign ? 0x80000000ull : 0x7fffffff)) {
231
+ /* Overflow, for which this operation raises invalid. */
232
+ float_raise(float_flag_invalid, status);
233
+ inexact = 1;
234
+ } else if (inexact) {
235
+ float_raise(float_flag_inexact, status);
236
+ }
237
+
238
+ /* Honor the sign. */
239
+ if (sign) {
240
+ frac = -frac;
241
+ }
242
+ }
243
+
244
+ /* Pack the result and the env->ZF representation of Z together. */
245
+ return deposit64(frac, 32, 32, inexact);
246
+}
247
+
248
+uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
249
+{
250
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
251
+ uint32_t result = pair;
252
+ uint32_t z = (pair >> 32) == 0;
253
+
254
+ /* Store Z, clear NCV, in FPSCR.NZCV. */
255
+ env->vfp.xregs[ARM_VFP_FPSCR]
256
+ = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
257
+
258
+ return result;
259
+}
260
--
372
--
261
2.20.1
373
2.25.1
262
263
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark ADR as a non-streaming instruction, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Removing entries from sme-fa64.decode is an easy way to see
7
what remains to be done.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 7 +++++++
15
target/arm/sme-fa64.decode | 1 -
16
target/arm/translate-sve.c | 8 ++++----
17
3 files changed, 11 insertions(+), 5 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
26
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
29
+ { \
30
+ s->is_nonstreaming = true; \
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
+ }
33
+
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/sme-fa64.decode
38
+++ b/target/arm/sme-fa64.decode
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
42
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
53
}
54
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
63
64
/*
65
*** SVE Integer Misc - Unpredicated Group
66
--
67
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
49
}
50
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
58
--
59
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
34
NULL, gen_helper_sve_fexpa_h,
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
92
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
13
2 files changed, 15 insertions(+), 11 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
34
NULL, gen_helper_sve2_pmull_d,
35
};
36
- if (a->esz == 0
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
38
- : !dc_isar_feature(aa64_sve, s)) {
39
+
40
+ if (a->esz == 0) {
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
42
+ return false;
43
+ }
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
46
return false;
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
51
*/
52
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
59
+ 0, FPST_FPCR)
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
62
+ 0, FPST_FPCR)
63
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
68
gen_helper_gvec_bfdot_idx, a)
69
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
71
- gen_helper_gvec_bfmmla, a, 0)
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
73
+ gen_helper_gvec_bfmmla, a, 0)
74
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
76
{
77
--
78
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 15 +++++++++++----
13
2 files changed, 11 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
36
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
43
44
/*
45
*** SVE Floating Point Accumulating Reduction Group
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
48
return false;
49
}
50
+ s->is_nonstreaming = true;
51
if (!sve_access_check(s)) {
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
55
DO_FP3(FADD_zzz, fadd)
56
DO_FP3(FSUB_zzz, fsub)
57
DO_FP3(FMUL_zzz, fmul)
58
-DO_FP3(FTSMUL, ftsmul)
59
DO_FP3(FRECPS, recps)
60
DO_FP3(FRSQRTS, rsqrts)
61
62
#undef DO_FP3
63
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
65
+ NULL, gen_helper_gvec_ftsmul_h,
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
67
+};
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
69
+ ftsmul_fns[a->esz], a, 0)
70
+
71
/*
72
*** SVE Floating Point Arithmetic - Predicated Group
73
*/
74
--
75
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
50
--
51
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
92
--
93
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 9 ---------
12
target/arm/translate-sve.c | 6 ++++++
13
2 files changed, 6 insertions(+), 9 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
if (!dc_isar_feature(aa64_sve, s)) {
45
return false;
46
}
47
+ s->is_nonstreaming = true;
48
if (!sve_access_check(s)) {
49
return true;
50
}
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
52
if (!dc_isar_feature(aa64_sve, s)) {
53
return false;
54
}
55
+ s->is_nonstreaming = true;
56
if (!sve_access_check(s)) {
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
60
if (!dc_isar_feature(aa64_sve2, s)) {
61
return false;
62
}
63
+ s->is_nonstreaming = true;
64
if (!sve_access_check(s)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
if (!dc_isar_feature(aa64_sve, s)) {
69
return false;
70
}
71
+ s->is_nonstreaming = true;
72
if (!sve_access_check(s)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
if (!dc_isar_feature(aa64_sve, s)) {
77
return false;
78
}
79
+ s->is_nonstreaming = true;
80
if (!sve_access_check(s)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
90
}
91
--
92
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap if full
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/sme-fa64.decode | 3 ---
13
target/arm/sve.decode | 10 +++++-----
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
16
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/sme-fa64.decode
20
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
72
}
73
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
75
+{
76
+ if (!dc_isar_feature(aa64_sve, s)) {
77
+ return false;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
83
+}
84
+
85
/*
86
* Move Prefix
87
*
88
--
89
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 2 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
32
if (!dc_isar_feature(aa64_sve, s)) {
33
return false;
34
}
35
+ s->is_nonstreaming = true;
36
if (sve_access_check(s)) {
37
TCGv_i64 addr = new_tmp_a64(s);
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
48
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
-
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-sve.c
29
+++ b/target/arm/translate-sve.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
if (a->rm == 31) {
32
return false;
33
}
34
+ s->is_nonstreaming = true;
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
46
--
47
2.25.1
diff view generated by jsdifflib
1
The Musca-A and Musca-B1 development boards are based on the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
SSE-200 subsystem for embedded. Implement an initial skeleton
3
model of these boards, which are similar but not identical.
4
2
5
This commit creates the board model with the SSE and the IRQ
3
These functions will be used to verify that the cpu
6
splitters to wire IRQs up to its two CPUs. As yet there
4
is in the correct state for a given instruction.
7
are no devices and no memory: these will be added later.
8
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
---
10
---
12
hw/arm/Makefile.objs | 1 +
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
13
hw/arm/musca.c | 197 ++++++++++++++++++++++++++++++++
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
14
MAINTAINERS | 6 +
13
2 files changed, 55 insertions(+)
15
default-configs/arm-softmmu.mak | 1 +
16
4 files changed, 205 insertions(+)
17
create mode 100644 hw/arm/musca.c
18
14
19
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/arm/Makefile.objs
17
--- a/target/arm/translate-a64.h
22
+++ b/hw/arm/Makefile.objs
18
+++ b/target/arm/translate-a64.h
23
@@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
24
obj-$(CONFIG_MPS2) += mps2.o
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
25
obj-$(CONFIG_MPS2) += mps2-tz.o
21
unsigned int imms, unsigned int immr);
26
obj-$(CONFIG_MSF2) += msf2-soc.o msf2-som.o
22
bool sve_access_check(DisasContext *s);
27
+obj-$(CONFIG_MUSCA) += musca.o
23
+bool sme_enabled_check(DisasContext *s);
28
obj-$(CONFIG_ARMSSE) += armsse.o
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
29
obj-$(CONFIG_FSL_IMX7) += fsl-imx7.o mcimx7d-sabre.o
30
obj-$(CONFIG_ARM_SMMUV3) += smmu-common.o smmuv3.o
31
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
32
new file mode 100644
33
index XXXXXXX..XXXXXXX
34
--- /dev/null
35
+++ b/hw/arm/musca.c
36
@@ -XXX,XX +XXX,XX @@
37
+/*
38
+ * Arm Musca-B1 test chip board emulation
39
+ *
40
+ * Copyright (c) 2019 Linaro Limited
41
+ * Written by Peter Maydell
42
+ *
43
+ * This program is free software; you can redistribute it and/or modify
44
+ * it under the terms of the GNU General Public License version 2 or
45
+ * (at your option) any later version.
46
+ */
47
+
25
+
48
+/*
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
49
+ * The Musca boards are a reference implementation of a system using
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
50
+ * the SSE-200 subsystem for embedded:
51
+ * https://developer.arm.com/products/system-design/development-boards/iot-test-chips-and-boards/musca-a-test-chip-board
52
+ * https://developer.arm.com/products/system-design/development-boards/iot-test-chips-and-boards/musca-b-test-chip-board
53
+ * We model the A and B1 variants of this board, as described in the TRMs:
54
+ * http://infocenter.arm.com/help/topic/com.arm.doc.101107_0000_00_en/index.html
55
+ * http://infocenter.arm.com/help/topic/com.arm.doc.101312_0000_00_en/index.html
56
+ */
57
+
58
+#include "qemu/osdep.h"
59
+#include "qemu/error-report.h"
60
+#include "qapi/error.h"
61
+#include "exec/address-spaces.h"
62
+#include "hw/arm/arm.h"
63
+#include "hw/arm/armsse.h"
64
+#include "hw/boards.h"
65
+#include "hw/core/split-irq.h"
66
+
67
+#define MUSCA_NUMIRQ_MAX 96
68
+
69
+typedef enum MuscaType {
70
+ MUSCA_A,
71
+ MUSCA_B1,
72
+} MuscaType;
73
+
74
+typedef struct {
75
+ MachineClass parent;
76
+ MuscaType type;
77
+ uint32_t init_svtor;
78
+ int sram_addr_width;
79
+ int num_irqs;
80
+} MuscaMachineClass;
81
+
82
+typedef struct {
83
+ MachineState parent;
84
+
85
+ ARMSSE sse;
86
+ SplitIRQ cpu_irq_splitter[MUSCA_NUMIRQ_MAX];
87
+} MuscaMachineState;
88
+
89
+#define TYPE_MUSCA_MACHINE "musca"
90
+#define TYPE_MUSCA_A_MACHINE MACHINE_TYPE_NAME("musca-a")
91
+#define TYPE_MUSCA_B1_MACHINE MACHINE_TYPE_NAME("musca-b1")
92
+
93
+#define MUSCA_MACHINE(obj) \
94
+ OBJECT_CHECK(MuscaMachineState, obj, TYPE_MUSCA_MACHINE)
95
+#define MUSCA_MACHINE_GET_CLASS(obj) \
96
+ OBJECT_GET_CLASS(MuscaMachineClass, obj, TYPE_MUSCA_MACHINE)
97
+#define MUSCA_MACHINE_CLASS(klass) \
98
+ OBJECT_CLASS_CHECK(MuscaMachineClass, klass, TYPE_MUSCA_MACHINE)
99
+
100
+/*
101
+ * Main SYSCLK frequency in Hz
102
+ * TODO this should really be different for the two cores, but we
103
+ * don't model that in our SSE-200 model yet.
104
+ */
105
+#define SYSCLK_FRQ 40000000
106
+
107
+static void musca_init(MachineState *machine)
108
+{
28
+{
109
+ MuscaMachineState *mms = MUSCA_MACHINE(machine);
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
110
+ MuscaMachineClass *mmc = MUSCA_MACHINE_GET_CLASS(mms);
111
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
112
+ MemoryRegion *system_memory = get_system_memory();
113
+ DeviceState *ssedev;
114
+ int i;
115
+
116
+ assert(mmc->num_irqs <= MUSCA_NUMIRQ_MAX);
117
+
118
+ if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) {
119
+ error_report("This board can only be used with CPU %s",
120
+ mc->default_cpu_type);
121
+ exit(1);
122
+ }
123
+
124
+ sysbus_init_child_obj(OBJECT(machine), "sse-200", &mms->sse,
125
+ sizeof(mms->sse), TYPE_SSE200);
126
+ ssedev = DEVICE(&mms->sse);
127
+ object_property_set_link(OBJECT(&mms->sse), OBJECT(system_memory),
128
+ "memory", &error_fatal);
129
+ qdev_prop_set_uint32(ssedev, "EXP_NUMIRQ", mmc->num_irqs);
130
+ qdev_prop_set_uint32(ssedev, "init-svtor", mmc->init_svtor);
131
+ qdev_prop_set_uint32(ssedev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width);
132
+ qdev_prop_set_uint32(ssedev, "MAINCLK", SYSCLK_FRQ);
133
+ object_property_set_bool(OBJECT(&mms->sse), true, "realized",
134
+ &error_fatal);
135
+
136
+ /*
137
+ * We need to create splitters to feed the IRQ inputs
138
+ * for each CPU in the SSE-200 from each device in the board.
139
+ */
140
+ for (i = 0; i < mmc->num_irqs; i++) {
141
+ char *name = g_strdup_printf("musca-irq-splitter%d", i);
142
+ SplitIRQ *splitter = &mms->cpu_irq_splitter[i];
143
+
144
+ object_initialize_child(OBJECT(machine), name,
145
+ splitter, sizeof(*splitter),
146
+ TYPE_SPLIT_IRQ, &error_fatal, NULL);
147
+ g_free(name);
148
+
149
+ object_property_set_int(OBJECT(splitter), 2, "num-lines",
150
+ &error_fatal);
151
+ object_property_set_bool(OBJECT(splitter), true, "realized",
152
+ &error_fatal);
153
+ qdev_connect_gpio_out(DEVICE(splitter), 0,
154
+ qdev_get_gpio_in_named(ssedev, "EXP_IRQ", i));
155
+ qdev_connect_gpio_out(DEVICE(splitter), 1,
156
+ qdev_get_gpio_in_named(ssedev,
157
+ "EXP_CPU1_IRQ", i));
158
+ }
159
+
160
+ armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0x2000000);
161
+}
30
+}
162
+
31
+
163
+static void musca_class_init(ObjectClass *oc, void *data)
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
33
+static inline bool sme_za_enabled_check(DisasContext *s)
164
+{
34
+{
165
+ MachineClass *mc = MACHINE_CLASS(oc);
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
166
+
167
+ mc->default_cpus = 2;
168
+ mc->min_cpus = mc->default_cpus;
169
+ mc->max_cpus = mc->default_cpus;
170
+ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
171
+ mc->init = musca_init;
172
+}
36
+}
173
+
37
+
174
+static void musca_a_class_init(ObjectClass *oc, void *data)
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
175
+{
40
+{
176
+ MachineClass *mc = MACHINE_CLASS(oc);
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
177
+ MuscaMachineClass *mmc = MUSCA_MACHINE_CLASS(oc);
178
+
179
+ mc->desc = "ARM Musca-A board (dual Cortex-M33)";
180
+ mmc->type = MUSCA_A;
181
+ mmc->init_svtor = 0x10200000;
182
+ mmc->sram_addr_width = 15;
183
+ mmc->num_irqs = 64;
184
+}
42
+}
185
+
43
+
186
+static void musca_b1_class_init(ObjectClass *oc, void *data)
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
52
return true;
53
}
54
55
+/* This function corresponds to CheckSMEEnabled. */
56
+bool sme_enabled_check(DisasContext *s)
187
+{
57
+{
188
+ MachineClass *mc = MACHINE_CLASS(oc);
189
+ MuscaMachineClass *mmc = MUSCA_MACHINE_CLASS(oc);
190
+
191
+ mc->desc = "ARM Musca-B1 board (dual Cortex-M33)";
192
+ mmc->type = MUSCA_B1;
193
+ /*
58
+ /*
194
+ * This matches the DAPlink firmware which boots from QSPI. There
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
195
+ * is also a firmware blob which boots from the eFlash, which
60
+ * to be zero when fp_excp_el has priority. This is because we need
196
+ * uses init_svtor = 0x1A000000. QEMU doesn't currently support that,
61
+ * sme_excp_el by itself for cpregs access checks.
197
+ * though we could in theory expose a machine property on the command
198
+ * line to allow the user to request eFlash boot.
199
+ */
62
+ */
200
+ mmc->init_svtor = 0x10000000;
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
201
+ mmc->sram_addr_width = 17;
64
+ s->fp_access_checked = true;
202
+ mmc->num_irqs = 96;
65
+ return sme_access_check(s);
66
+ }
67
+ return fp_access_check_only(s);
203
+}
68
+}
204
+
69
+
205
+static const TypeInfo musca_info = {
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
206
+ .name = TYPE_MUSCA_MACHINE,
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
207
+ .parent = TYPE_MACHINE,
208
+ .abstract = true,
209
+ .instance_size = sizeof(MuscaMachineState),
210
+ .class_size = sizeof(MuscaMachineClass),
211
+ .class_init = musca_class_init,
212
+};
213
+
214
+static const TypeInfo musca_a_info = {
215
+ .name = TYPE_MUSCA_A_MACHINE,
216
+ .parent = TYPE_MUSCA_MACHINE,
217
+ .class_init = musca_a_class_init,
218
+};
219
+
220
+static const TypeInfo musca_b1_info = {
221
+ .name = TYPE_MUSCA_B1_MACHINE,
222
+ .parent = TYPE_MUSCA_MACHINE,
223
+ .class_init = musca_b1_class_init,
224
+};
225
+
226
+static void musca_machine_init(void)
227
+{
72
+{
228
+ type_register_static(&musca_info);
73
+ if (!sme_enabled_check(s)) {
229
+ type_register_static(&musca_a_info);
74
+ return false;
230
+ type_register_static(&musca_b1_info);
75
+ }
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
78
+ syn_smetrap(SME_ET_NotStreaming, false));
79
+ return false;
80
+ }
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
85
+ }
86
+ return true;
231
+}
87
+}
232
+
88
+
233
+type_init(musca_machine_init);
89
/*
234
diff --git a/MAINTAINERS b/MAINTAINERS
90
* This utility function is for doing register extension with an
235
index XXXXXXX..XXXXXXX 100644
91
* optional shift. You will likely want to pass a temporary for the
236
--- a/MAINTAINERS
237
+++ b/MAINTAINERS
238
@@ -XXX,XX +XXX,XX @@ F: include/hw/misc/iotkit-sysinfo.h
239
F: hw/misc/armsse-cpuid.c
240
F: include/hw/misc/armsse-cpuid.h
241
242
+Musca
243
+M: Peter Maydell <peter.maydell@linaro.org>
244
+L: qemu-arm@nongnu.org
245
+S: Maintained
246
+F: hw/arm/musca.c
247
+
248
Musicpal
249
M: Jan Kiszka <jan.kiszka@web.de>
250
M: Peter Maydell <peter.maydell@linaro.org>
251
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
252
index XXXXXXX..XXXXXXX 100644
253
--- a/default-configs/arm-softmmu.mak
254
+++ b/default-configs/arm-softmmu.mak
255
@@ -XXX,XX +XXX,XX @@ CONFIG_TUSB6010=y
256
CONFIG_IMX=y
257
CONFIG_MAINSTONE=y
258
CONFIG_MPS2=y
259
+CONFIG_MUSCA=y
260
CONFIG_NSERIES=y
261
CONFIG_RASPI=y
262
CONFIG_REALVIEW=y
263
--
92
--
264
2.20.1
93
2.25.1
265
266
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
For opcodes 0-5, move some if conditions into the structure
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
of a switch statement. For opcodes 6 & 7, decode everything
4
SVE mode, and for SME present but SVE absent.
5
at once with a second switch.
6
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20190215192302.27855-2-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/translate-a64.c | 94 ++++++++++++++++++++------------------
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
13
1 file changed, 49 insertions(+), 45 deletions(-)
12
1 file changed, 16 insertions(+), 6 deletions(-)
14
13
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
16
--- a/target/arm/translate-a64.c
18
+++ b/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
19
@@ -XXX,XX +XXX,XX @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
20
int type = extract32(insn, 22, 2);
19
return true;
21
bool sbit = extract32(insn, 29, 1);
20
}
22
bool sf = extract32(insn, 31, 1);
21
23
+ bool itof = false;
22
-/* Check that SVE access is enabled. If it is, return true.
24
23
+/*
25
if (sbit) {
24
+ * Check that SVE access is enabled. If it is, return true.
26
- unallocated_encoding(s);
25
* If not, emit code to generate an appropriate exception and return false.
27
- return;
26
+ * This function corresponds to CheckSVEEnabled().
28
+ goto do_unallocated;
27
*/
28
bool sve_access_check(DisasContext *s)
29
{
30
- if (s->sve_excp_el) {
31
- assert(!s->sve_access_checked);
32
- s->sve_access_checked = true;
33
-
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
35
+ assert(dc_isar_feature(aa64_sme, s));
36
+ if (!sme_sm_enabled_check(s)) {
37
+ goto fail_exit;
38
+ }
39
+ } else if (s->sve_excp_el) {
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
41
syn_sve_access_trap(), s->sve_excp_el);
42
- return false;
43
+ goto fail_exit;
29
}
44
}
30
45
s->sve_access_checked = true;
31
- if (opcode > 5) {
46
return fp_access_check(s);
32
- /* FMOV */
33
- bool itof = opcode & 1;
34
-
35
- if (rmode >= 2) {
36
- unallocated_encoding(s);
37
- return;
38
- }
39
-
40
- switch (sf << 3 | type << 1 | rmode) {
41
- case 0x0: /* 32 bit */
42
- case 0xa: /* 64 bit */
43
- case 0xd: /* 64 bit to top half of quad */
44
- break;
45
- case 0x6: /* 16-bit float, 32-bit int */
46
- case 0xe: /* 16-bit float, 64-bit int */
47
- if (dc_isar_feature(aa64_fp16, s)) {
48
- break;
49
- }
50
- /* fallthru */
51
- default:
52
- /* all other sf/type/rmode combinations are invalid */
53
- unallocated_encoding(s);
54
- return;
55
- }
56
-
57
- if (!fp_access_check(s)) {
58
- return;
59
- }
60
- handle_fmov(s, rd, rn, type, itof);
61
- } else {
62
- /* actual FP conversions */
63
- bool itof = extract32(opcode, 1, 1);
64
-
65
- if (rmode != 0 && opcode > 1) {
66
- unallocated_encoding(s);
67
- return;
68
+ switch (opcode) {
69
+ case 2: /* SCVTF */
70
+ case 3: /* UCVTF */
71
+ itof = true;
72
+ /* fallthru */
73
+ case 4: /* FCVTAS */
74
+ case 5: /* FCVTAU */
75
+ if (rmode != 0) {
76
+ goto do_unallocated;
77
}
78
+ /* fallthru */
79
+ case 0: /* FCVT[NPMZ]S */
80
+ case 1: /* FCVT[NPMZ]U */
81
switch (type) {
82
case 0: /* float32 */
83
case 1: /* float64 */
84
break;
85
case 3: /* float16 */
86
- if (dc_isar_feature(aa64_fp16, s)) {
87
- break;
88
+ if (!dc_isar_feature(aa64_fp16, s)) {
89
+ goto do_unallocated;
90
}
91
- /* fallthru */
92
+ break;
93
default:
94
- unallocated_encoding(s);
95
- return;
96
+ goto do_unallocated;
97
}
98
-
99
if (!fp_access_check(s)) {
100
return;
101
}
102
handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
103
+ break;
104
+
47
+
105
+ default:
48
+ fail_exit:
106
+ switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
49
+ /* Assert that we only raise one exception per instruction. */
107
+ case 0b01100110: /* FMOV half <-> 32-bit int */
50
+ assert(!s->sve_access_checked);
108
+ case 0b01100111:
51
+ s->sve_access_checked = true;
109
+ case 0b11100110: /* FMOV half <-> 64-bit int */
52
+ return false;
110
+ case 0b11100111:
111
+ if (!dc_isar_feature(aa64_fp16, s)) {
112
+ goto do_unallocated;
113
+ }
114
+ /* fallthru */
115
+ case 0b00000110: /* FMOV 32-bit */
116
+ case 0b00000111:
117
+ case 0b10100110: /* FMOV 64-bit */
118
+ case 0b10100111:
119
+ case 0b11001110: /* FMOV top half of 128-bit */
120
+ case 0b11001111:
121
+ if (!fp_access_check(s)) {
122
+ return;
123
+ }
124
+ itof = opcode & 1;
125
+ handle_fmov(s, rd, rn, type, itof);
126
+ break;
127
+
128
+ default:
129
+ do_unallocated:
130
+ unallocated_encoding(s);
131
+ return;
132
+ }
133
+ break;
134
}
135
}
53
}
136
54
55
/*
137
--
56
--
138
2.20.1
57
2.25.1
139
140
diff view generated by jsdifflib
1
Wire up the PL031 RTC for the Musca board.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
These SME instructions are nominally within the SVE decode space,
4
so we add them to sve.decode and translate-sve.c.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
---
10
---
6
hw/arm/musca.c | 26 +++++++++++++++++++++++---
11
target/arm/translate-a64.h | 12 ++++++++++++
7
1 file changed, 23 insertions(+), 3 deletions(-)
12
target/arm/sve.decode | 5 ++++-
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
14
3 files changed, 54 insertions(+), 1 deletion(-)
8
15
9
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
10
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
11
--- a/hw/arm/musca.c
18
--- a/target/arm/translate-a64.h
12
+++ b/hw/arm/musca.c
19
+++ b/target/arm/translate-a64.h
13
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
14
#include "hw/misc/tz-mpc.h"
21
return s->vl;
15
#include "hw/misc/tz-ppc.h"
22
}
16
#include "hw/misc/unimp.h"
23
17
+#include "hw/timer/pl031.h"
24
+/* Return the byte size of the vector register, SVL / 8. */
18
25
+static inline int streaming_vec_reg_size(DisasContext *s)
19
#define MUSCA_NUMIRQ_MAX 96
20
#define MUSCA_PPC_MAX 3
21
@@ -XXX,XX +XXX,XX @@ typedef struct {
22
UnimplementedDeviceState spi;
23
UnimplementedDeviceState scc;
24
UnimplementedDeviceState timer;
25
- UnimplementedDeviceState rtc;
26
+ PL031State rtc;
27
UnimplementedDeviceState pvt;
28
UnimplementedDeviceState sdio;
29
UnimplementedDeviceState gpio;
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
31
*/
32
#define SYSCLK_FRQ 40000000
33
34
+static qemu_irq get_sse_irq_in(MuscaMachineState *mms, int irqno)
35
+{
26
+{
36
+ /* Return a qemu_irq which will signal IRQ n to all CPUs in the SSE. */
27
+ return s->svl;
37
+ assert(irqno < MUSCA_NUMIRQ_MAX);
38
+
39
+ return qdev_get_gpio_in(DEVICE(&mms->cpu_irq_splitter[irqno]), 0);
40
+}
28
+}
41
+
29
+
42
/*
30
/*
43
* Most of the devices in the Musca board sit behind Peripheral Protection
31
* Return the offset info CPUARMState of the predicate vector register Pn.
44
* Controllers. These data structures define the layout of which devices
32
* Note for this purpose, FFR is P16.
45
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_mpc(MuscaMachineState *mms, void *opaque,
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
46
return sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 0);
34
return s->vl >> 3;
47
}
35
}
48
36
49
+static MemoryRegion *make_rtc(MuscaMachineState *mms, void *opaque,
37
+/* Return the byte size of the predicate register, SVL / 64. */
50
+ const char *name, hwaddr size)
38
+static inline int streaming_pred_reg_size(DisasContext *s)
51
+{
39
+{
52
+ PL031State *rtc = opaque;
40
+ return s->svl >> 3;
53
+
54
+ sysbus_init_child_obj(OBJECT(mms), name, rtc, sizeof(mms->rtc), TYPE_PL031);
55
+ object_property_set_bool(OBJECT(rtc), true, "realized", &error_fatal);
56
+ sysbus_connect_irq(SYS_BUS_DEVICE(rtc), 0, get_sse_irq_in(mms, 39));
57
+ return sysbus_mmio_get_region(SYS_BUS_DEVICE(rtc), 0);
58
+}
41
+}
59
+
42
+
60
static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
43
/*
61
const char *name, hwaddr size)
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
51
# SVE index generation (register start, register increment)
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
53
54
-### SVE Stack Allocation Group
55
+### SVE / Streaming SVE Stack Allocation Group
56
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
74
return true;
75
}
76
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
78
+{
79
+ if (!dc_isar_feature(aa64_sme, s)) {
80
+ return false;
81
+ }
82
+ if (sme_enabled_check(s)) {
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
86
+ }
87
+ return true;
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
62
{
91
{
63
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
92
if (!dc_isar_feature(aa64_sve, s)) {
64
{ "i2c1", make_unimp_dev, &mms->i2c[1], 0x5000, 0x1000 },
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
65
{ "i2s", make_unimp_dev, &mms->i2s, 0x6000, 0x1000 },
94
return true;
66
{ "pwm0", make_unimp_dev, &mms->pwm[0], 0x7000, 0x1000 },
95
}
67
- { "rtc", make_unimp_dev, &mms->rtc, 0x8000, 0x1000 },
96
68
+ { "rtc", make_rtc, &mms->rtc, 0x8000, 0x1000 },
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
69
{ "qspi", make_unimp_dev, &mms->qspi, 0xa000, 0x1000 },
98
+{
70
{ "timer", make_unimp_dev, &mms->timer, 0xb000, 0x1000 },
99
+ if (!dc_isar_feature(aa64_sme, s)) {
71
{ "scc", make_unimp_dev, &mms->scc, 0xc000, 0x1000 },
100
+ return false;
72
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
101
+ }
73
{ "spi", make_unimp_dev, &mms->spi, 0x4010a000, 0x1000 },
102
+ if (sme_enabled_check(s)) {
74
{ "scc", make_unimp_dev, &mms->scc, 0x5010b000, 0x1000 },
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
75
{ "timer", make_unimp_dev, &mms->timer, 0x4010c000, 0x1000 },
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
76
- { "rtc", make_unimp_dev, &mms->rtc, 0x4010d000, 0x1000 },
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
77
+ { "rtc", make_rtc, &mms->rtc, 0x4010d000, 0x1000 },
106
+ }
78
{ "pvt", make_unimp_dev, &mms->pvt, 0x4010e000, 0x1000 },
107
+ return true;
79
{ "sdio", make_unimp_dev, &mms->sdio, 0x4010f000, 0x1000 },
108
+}
80
},
109
+
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
+{
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
127
+}
128
+
129
/*
130
*** SVE Compute Vector Address Group
131
*/
81
--
132
--
82
2.20.1
133
2.25.1
83
84
diff view generated by jsdifflib
1
Create a new include file for the pl031's device struct,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
type macros, etc, so that it can be instantiated using
3
the "embedded struct" coding style.
4
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
include/hw/timer/pl031.h | 44 ++++++++++++++++++++++++++++++++++++++++
8
target/arm/helper-sme.h | 2 ++
10
hw/timer/pl031.c | 25 +----------------------
9
target/arm/sme.decode | 4 ++++
11
MAINTAINERS | 1 +
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
12
3 files changed, 46 insertions(+), 24 deletions(-)
11
target/arm/translate-sme.c | 13 +++++++++++++
13
create mode 100644 include/hw/timer/pl031.h
12
4 files changed, 44 insertions(+)
14
13
15
diff --git a/include/hw/timer/pl031.h b/include/hw/timer/pl031.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
new file mode 100644
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX
16
--- a/target/arm/helper-sme.h
18
--- /dev/null
17
+++ b/target/arm/helper-sme.h
19
+++ b/include/hw/timer/pl031.h
20
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
21
+/*
19
22
+ * ARM AMBA PrimeCell PL031 RTC
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
23
+ *
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
24
+ * Copyright (c) 2007 CodeSourcery
25
+ *
26
+ * This file is free software; you can redistribute it and/or modify
27
+ * it under the terms of the GNU General Public License version 2 as
28
+ * published by the Free Software Foundation.
29
+ *
30
+ * Contributions after 2012-01-13 are licensed under the terms of the
31
+ * GNU GPL, version 2 or (at your option) any later version.
32
+ */
33
+
22
+
34
+#ifndef HW_TIMER_PL031
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
35
+#define HW_TIMER_PL031
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@
29
#
30
# This file is processed by scripts/decodetree.py
31
#
36
+
32
+
37
+#include "hw/sysbus.h"
33
+### SME Misc
38
+
34
+
39
+#define TYPE_PL031 "pl031"
35
+ZERO 11000000 00 001 00000000000 imm:8
40
+#define PL031(obj) OBJECT_CHECK(PL031State, (obj), TYPE_PL031)
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
43
}
41
+
44
+
42
+typedef struct PL031State {
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
43
+ SysBusDevice parent_obj;
46
+{
44
+
47
+ uint32_t i;
45
+ MemoryRegion iomem;
46
+ QEMUTimer *timer;
47
+ qemu_irq irq;
48
+
48
+
49
+ /*
49
+ /*
50
+ * Needed to preserve the tick_count across migration, even if the
50
+ * Special case clearing the entire ZA space.
51
+ * absolute value of the rtc_clock is different on the source and
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * destination.
52
+ * parts of the ZA storage outside of SVL.
53
+ */
53
+ */
54
+ uint32_t tick_offset_vmstate;
54
+ if (imm == 0xff) {
55
+ uint32_t tick_offset;
55
+ memset(env->zarray, 0, sizeof(env->zarray));
56
+ return;
57
+ }
56
+
58
+
57
+ uint32_t mr;
59
+ /*
58
+ uint32_t lr;
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
59
+ uint32_t cr;
61
+ * so each row is discontiguous within ZA[].
60
+ uint32_t im;
62
+ */
61
+ uint32_t is;
63
+ for (i = 0; i < svl; i++) {
62
+} PL031State;
64
+ if (imm & (1 << (i % 8))) {
63
+
65
+ memset(&env->zarray[i], 0, svl);
64
+#endif
66
+ }
65
diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
66
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/timer/pl031.c
71
--- a/target/arm/translate-sme.c
68
+++ b/hw/timer/pl031.c
72
+++ b/target/arm/translate-sme.c
69
@@ -XXX,XX +XXX,XX @@
73
@@ -XXX,XX +XXX,XX @@
70
*/
74
*/
71
75
72
#include "qemu/osdep.h"
76
#include "decode-sme.c.inc"
73
+#include "hw/timer/pl031.h"
77
+
74
#include "hw/sysbus.h"
78
+
75
#include "qemu/timer.h"
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
76
#include "sysemu/sysemu.h"
80
+{
77
@@ -XXX,XX +XXX,XX @@ do { printf("pl031: " fmt , ## __VA_ARGS__); } while (0)
81
+ if (!dc_isar_feature(aa64_sme, s)) {
78
#define RTC_MIS 0x18 /* Masked interrupt status register */
82
+ return false;
79
#define RTC_ICR 0x1c /* Interrupt clear register */
83
+ }
80
84
+ if (sme_za_enabled_check(s)) {
81
-#define TYPE_PL031 "pl031"
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
82
-#define PL031(obj) OBJECT_CHECK(PL031State, (obj), TYPE_PL031)
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
83
-
87
+ }
84
-typedef struct PL031State {
88
+ return true;
85
- SysBusDevice parent_obj;
89
+}
86
-
87
- MemoryRegion iomem;
88
- QEMUTimer *timer;
89
- qemu_irq irq;
90
-
91
- /* Needed to preserve the tick_count across migration, even if the
92
- * absolute value of the rtc_clock is different on the source and
93
- * destination.
94
- */
95
- uint32_t tick_offset_vmstate;
96
- uint32_t tick_offset;
97
-
98
- uint32_t mr;
99
- uint32_t lr;
100
- uint32_t cr;
101
- uint32_t im;
102
- uint32_t is;
103
-} PL031State;
104
-
105
static const unsigned char pl031_id[] = {
106
0x31, 0x10, 0x14, 0x00, /* Device ID */
107
0x0d, 0xf0, 0x05, 0xb1 /* Cell ID */
108
diff --git a/MAINTAINERS b/MAINTAINERS
109
index XXXXXXX..XXXXXXX 100644
110
--- a/MAINTAINERS
111
+++ b/MAINTAINERS
112
@@ -XXX,XX +XXX,XX @@ F: hw/sd/pl181.c
113
F: hw/ssi/pl022.c
114
F: include/hw/ssi/pl022.h
115
F: hw/timer/pl031.c
116
+F: include/hw/timer/pl031.h
117
F: include/hw/arm/primecell.h
118
F: hw/timer/cmsdk-apb-timer.c
119
F: include/hw/timer/cmsdk-apb-timer.h
120
--
90
--
121
2.20.1
91
2.25.1
122
123
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
We can reuse the SVE functions for implementing moves to/from
4
horizontal tile slices, but we need new ones for moves to/from
5
vertical tile slices.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/helper-sme.h | 12 +++
13
target/arm/helper-sve.h | 2 +
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
21
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper-sme.h
25
+++ b/target/arm/helper-sme.h
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
28
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
30
+
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
62
63
+/* Return a newly allocated pointer to the predicate register. */
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
65
+{
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
68
+ return ret;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
80
}
81
82
+static inline int plus_12(DisasContext *s, int x)
83
+{
84
+ return x + 12;
85
+}
86
+
87
static inline int times_2(DisasContext *s, int x)
88
{
89
return x * 2;
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
213
+ * the address arithmetic.
214
+ */
215
+ for (i = 0; i < oprsz; i++) {
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
286
}
287
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
445
--
446
2.25.1
diff view generated by jsdifflib
1
The "background region" for a v8M MPU is a default which will be used
1
From: Richard Henderson <richard.henderson@linaro.org>
2
(if enabled, and if the access is privileged) if the access does
3
not match any specific MPU region. We were incorrectly using it
4
always (by putting the condition at the wrong nesting level). This
5
meant that we would always return the default background permissions
6
rather than the correct permissions for a specific region, and also
7
that we would not return the right information in response to a
8
TT instruction.
9
2
10
Move the check for the background region to the same place in the
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
11
logic as the equivalent v8M MPUCheck() pseudocode puts it.
4
because those functions accept only a Zreg register number.
12
This in turn means we must adjust the condition we use to detect
5
For SME, we want to pass a pointer into ZA storage.
13
matches in multiple regions to avoid false-positives.
14
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Message-id: 20190214113408.10214-1-peter.maydell@linaro.org
18
---
11
---
19
target/arm/helper.c | 8 +++++---
12
target/arm/helper-sme.h | 82 +++++
20
1 file changed, 5 insertions(+), 3 deletions(-)
13
target/arm/sme.decode | 9 +
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
21
17
22
diff --git a/target/arm/helper.c b/target/arm/helper.c
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper.c
20
--- a/target/arm/helper-sme.h
25
+++ b/target/arm/helper.c
21
+++ b/target/arm/helper-sme.h
26
@@ -XXX,XX +XXX,XX @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
hit = true;
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
} else if (m_is_ppb_region(env, address)) {
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
hit = true;
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
- } else if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
26
+
31
- hit = true;
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
32
} else {
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+ if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+ hit = true;
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
129
@@ -XXX,XX +XXX,XX @@
130
131
#include "qemu/osdep.h"
132
#include "cpu.h"
133
+#include "internals.h"
134
#include "tcg/tcg-gvec-desc.h"
135
#include "exec/helper-proto.h"
136
+#include "exec/cpu_ldst.h"
137
+#include "exec/exec-all.h"
138
#include "qemu/int128.h"
139
#include "vec_internal.h"
140
+#include "sve_ldst_internal.h"
141
142
/* ResetSVEState */
143
void arm_reset_sve_state(CPUARMState *env)
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
145
}
146
147
#undef DO_MOVA_Z
148
+
149
+/*
150
+ * Clear elements in a tile slice comprising len bytes.
151
+ */
152
+
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
154
+
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
156
+{
157
+ memset(ptr + off, 0, len);
158
+}
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
197
+ */
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
35
+ }
431
+ }
36
+
432
+
37
for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
433
+ do {
38
/* region search */
434
+ uint64_t pg = vg[reg_off >> 6];
39
/* Note that the base address is bits [31:5] from the register
435
+ do {
40
@@ -XXX,XX +XXX,XX @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
436
+ if ((pg >> (reg_off & 63)) & 1) {
41
*is_subpage = true;
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
42
}
438
+ }
43
439
+ reg_off += esize;
44
- if (hit) {
440
+ } while (reg_off & 63);
45
+ if (matchregion != -1) {
441
+ } while (reg_off <= reg_last);
46
/* Multiple regions match -- always a failure (unlike
442
+
47
* PMSAv7 where highest-numbered-region wins)
443
+ cpy_fn(za, &scratch, reg_max);
48
*/
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
49
--
817
--
50
2.20.1
818
2.25.1
51
52
diff view generated by jsdifflib
1
The pl011 logs when the guest makes a bad access. It prints
1
From: Richard Henderson <richard.henderson@linaro.org>
2
the address offset in hex but confusingly omits the '0x'
3
prefix; add it.
4
2
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
We will reuse this for SME save and restore array insns.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
hw/char/pl011.c | 4 ++--
11
target/arm/translate-a64.h | 3 +++
10
1 file changed, 2 insertions(+), 2 deletions(-)
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
13
2 files changed, 39 insertions(+), 12 deletions(-)
11
14
12
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/char/pl011.c
17
--- a/target/arm/translate-a64.h
15
+++ b/hw/char/pl011.c
18
+++ b/target/arm/translate-a64.h
16
@@ -XXX,XX +XXX,XX @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
17
break;
20
uint32_t rm_ofs, int64_t shift,
18
default:
21
uint32_t opr_sz, uint32_t max_sz);
19
qemu_log_mask(LOG_GUEST_ERROR,
22
20
- "pl011_read: Bad offset %x\n", (int)offset);
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
21
+ "pl011_read: Bad offset 0x%x\n", (int)offset);
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
22
r = 0;
25
+
23
break;
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
32
* The load should begin at the address Rn + IMM.
33
*/
34
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
37
+ int len, int rn, int imm)
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
24
}
81
}
25
@@ -XXX,XX +XXX,XX @@ static void pl011_write(void *opaque, hwaddr offset,
82
26
break;
83
/*
27
default:
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
28
qemu_log_mask(LOG_GUEST_ERROR,
85
default:
29
- "pl011_write: Bad offset %x\n", (int)offset);
86
g_assert_not_reached();
30
+ "pl011_write: Bad offset 0x%x\n", (int)offset);
87
}
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
31
}
91
}
32
}
92
}
33
93
94
/* Similarly for stores. */
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
97
+ int len, int rn, int imm)
98
{
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
100
int len_remain = len % 8;
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
102
103
t0 = tcg_temp_new_i64();
104
for (i = 0; i < len_align; i += 8) {
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
109
}
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
111
clean_addr = new_tmp_a64_local(s);
112
tcg_gen_mov_i64(clean_addr, t0);
113
114
+ if (base != cpu_env) {
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
118
+ }
119
+
120
gen_set_label(loop);
121
122
t0 = tcg_temp_new_i64();
123
tp = tcg_temp_new_ptr();
124
- tcg_gen_add_ptr(tp, cpu_env, i);
125
+ tcg_gen_add_ptr(tp, base, i);
126
tcg_gen_ld_i64(t0, tp, vofs);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
138
}
139
140
/* Predicate register stores can be any multiple of 2. */
141
if (len_remain) {
142
t0 = tcg_temp_new_i64();
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
145
146
switch (len_remain) {
147
case 2:
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
149
if (sve_access_check(s)) {
150
int size = vec_full_reg_size(s);
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
154
}
155
return true;
156
}
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
158
if (sve_access_check(s)) {
159
int size = pred_full_reg_size(s);
160
int off = pred_full_reg_offset(s, a->rd);
161
- do_ldr(s, off, size, a->rn, a->imm * size);
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
34
--
184
--
35
2.20.1
185
2.25.1
36
37
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
We can reuse the SVE functions for LDR and STR, passing in the
4
base of the ZA vector and a zero offset.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme.decode | 7 +++++++
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
13
2 files changed, 31 insertions(+)
14
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme.decode
18
+++ b/target/arm/sme.decode
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
20
&ldst rs=%mova_rs
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
22
&ldst esz=4 rs=%mova_rs
23
+
24
+&ldstr rv rn imm
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
26
+ &ldstr rv=%mova_rs
27
+
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-sme.c
33
+++ b/target/arm/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
tcg_temp_free_i64(addr);
36
return true;
37
}
38
+
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
40
+
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
42
+{
43
+ int svl = streaming_vec_reg_size(s);
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
46
+
47
+ if (!sme_za_enabled_check(s)) {
48
+ return true;
49
+ }
50
+
51
+ /* ZA[n] equates to ZA0H.B[n]. */
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
53
+
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
55
+
56
+ tcg_temp_free_ptr(base);
57
+ return true;
58
+}
59
+
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
62
--
63
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 5 +++
9
target/arm/sme.decode | 11 +++++
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 31 +++++++++++++
12
4 files changed, 137 insertions(+)
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
22
+
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
32
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
35
+
36
+### SME Add Vector to Array
37
+
38
+&adda zad zn pm pn
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
41
+
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sme_helper.c
49
+++ b/target/arm/sme_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
51
DO_ST(q, _le, MO_128)
52
53
#undef DO_ST
54
+
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
56
+ void *vpm, uint32_t desc)
57
+{
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
59
+ uint64_t *pn = vpn, *pm = vpm;
60
+ uint32_t *zda = vzda, *zn = vzn;
61
+
62
+ for (row = 0; row < oprsz; ) {
63
+ uint64_t pa = pn[row >> 4];
64
+ do {
65
+ if (pa & 1) {
66
+ for (col = 0; col < oprsz; ) {
67
+ uint64_t pb = pm[col >> 4];
68
+ do {
69
+ if (pb & 1) {
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
71
+ }
72
+ pb >>= 4;
73
+ } while (++col & 15);
74
+ }
75
+ }
76
+ pa >>= 4;
77
+ } while (++row & 15);
78
+ }
79
+}
80
+
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
83
+{
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
87
+
88
+ for (row = 0; row < oprsz; ++row) {
89
+ if (pn[H1(row)] & 1) {
90
+ for (col = 0; col < oprsz; ++col) {
91
+ if (pm[H1(col)] & 1) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
96
+ }
97
+}
98
+
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
101
+{
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
105
+
106
+ for (row = 0; row < oprsz; ) {
107
+ uint64_t pa = pn[row >> 4];
108
+ do {
109
+ if (pa & 1) {
110
+ uint32_t zn_row = zn[H4(row)];
111
+ for (col = 0; col < oprsz; ) {
112
+ uint64_t pb = pm[col >> 4];
113
+ do {
114
+ if (pb & 1) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
116
+ }
117
+ pb >>= 4;
118
+ } while (++col & 15);
119
+ }
120
+ }
121
+ pa >>= 4;
122
+ } while (++row & 15);
123
+ }
124
+}
125
+
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
127
+ void *vpm, uint32_t desc)
128
+{
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
132
+
133
+ for (row = 0; row < oprsz; ++row) {
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
177
+}
178
+
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
183
--
184
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 5 +++
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
58
}
59
}
60
+
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
62
+ void *vpm, void *vst, uint32_t desc)
63
+{
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
65
+ uint32_t neg = simd_data(desc) << 31;
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+ float_status fpst;
68
+
69
+ /*
70
+ * Make a copy of float_status because this operation does not
71
+ * update the cumulative fp exception status. It also produces
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
168
--
169
2.25.1
diff view generated by jsdifflib
1
Many of the devices on the Musca board live behind TrustZone
1
From: Richard Henderson <richard.henderson@linaro.org>
2
Peripheral Protection Controllers (PPCs); add models of the
3
PPCs, using a similar scheme to the MPS2 board models.
4
This commit wires up the PPCs with "unimplemented device"
5
stubs behind them in the correct places in the address map.
6
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
---
7
---
10
hw/arm/musca.c | 289 +++++++++++++++++++++++++++++++++++++++++++++++++
8
target/arm/helper-sme.h | 2 ++
11
1 file changed, 289 insertions(+)
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
12
13
13
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/arm/musca.c
16
--- a/target/arm/helper-sme.h
16
+++ b/hw/arm/musca.c
17
+++ b/target/arm/helper-sme.h
17
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
18
#include "hw/arm/armsse.h"
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
19
#include "hw/boards.h"
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
20
#include "hw/core/split-irq.h"
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
21
+#include "hw/misc/tz-ppc.h"
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
22
+#include "hw/misc/unimp.h"
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
23
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
24
#define MUSCA_NUMIRQ_MAX 96
25
index XXXXXXX..XXXXXXX 100644
25
+#define MUSCA_PPC_MAX 3
26
--- a/target/arm/sme.decode
26
27
+++ b/target/arm/sme.decode
27
typedef enum MuscaType {
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
28
MUSCA_A,
29
29
@@ -XXX,XX +XXX,XX @@ typedef struct {
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
30
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
31
ARMSSE sse;
32
+
32
SplitIRQ cpu_irq_splitter[MUSCA_NUMIRQ_MAX];
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
33
+ SplitIRQ sec_resp_splitter;
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
34
+ TZPPC ppc[MUSCA_PPC_MAX];
35
index XXXXXXX..XXXXXXX 100644
35
+ MemoryRegion container;
36
--- a/target/arm/sme_helper.c
36
+ UnimplementedDeviceState eflash[2];
37
+++ b/target/arm/sme_helper.c
37
+ UnimplementedDeviceState qspi;
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
38
+ UnimplementedDeviceState mpc[5];
39
}
39
+ UnimplementedDeviceState mhu[2];
40
}
40
+ UnimplementedDeviceState pwm[3];
41
}
41
+ UnimplementedDeviceState i2s;
42
+
42
+ UnimplementedDeviceState uart[2];
43
+ UnimplementedDeviceState i2c[2];
44
+ UnimplementedDeviceState spi;
45
+ UnimplementedDeviceState scc;
46
+ UnimplementedDeviceState timer;
47
+ UnimplementedDeviceState rtc;
48
+ UnimplementedDeviceState pvt;
49
+ UnimplementedDeviceState sdio;
50
+ UnimplementedDeviceState gpio;
51
} MuscaMachineState;
52
53
#define TYPE_MUSCA_MACHINE "musca"
54
@@ -XXX,XX +XXX,XX @@ typedef struct {
55
*/
56
#define SYSCLK_FRQ 40000000
57
58
+/*
43
+/*
59
+ * Most of the devices in the Musca board sit behind Peripheral Protection
44
+ * Alter PAIR as needed for controlling predicates being false,
60
+ * Controllers. These data structures define the layout of which devices
45
+ * and for NEG on an enabled row element.
61
+ * sit behind which PPCs.
62
+ * The devfn for each port is a function which creates, configures
63
+ * and initializes the device, returning the MemoryRegion which
64
+ * needs to be plugged into the downstream end of the PPC port.
65
+ */
46
+ */
66
+typedef MemoryRegion *MakeDevFn(MuscaMachineState *mms, void *opaque,
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
67
+ const char *name, hwaddr size);
68
+
69
+typedef struct PPCPortInfo {
70
+ const char *name;
71
+ MakeDevFn *devfn;
72
+ void *opaque;
73
+ hwaddr addr;
74
+ hwaddr size;
75
+} PPCPortInfo;
76
+
77
+typedef struct PPCInfo {
78
+ const char *name;
79
+ PPCPortInfo ports[TZ_NUM_PORTS];
80
+} PPCInfo;
81
+
82
+static MemoryRegion *make_unimp_dev(MuscaMachineState *mms,
83
+ void *opaque, const char *name, hwaddr size)
84
+{
48
+{
85
+ /*
49
+ /*
86
+ * Initialize, configure and realize a TYPE_UNIMPLEMENTED_DEVICE,
50
+ * The pseudocode uses a conditional negate after the conditional zero.
87
+ * and return a pointer to its MemoryRegion.
51
+ * It is simpler here to unconditionally negate before conditional zero.
88
+ */
52
+ */
89
+ UnimplementedDeviceState *uds = opaque;
53
+ pair ^= neg;
90
+
54
+ if (!(pg & 1)) {
91
+ sysbus_init_child_obj(OBJECT(mms), name, uds,
55
+ pair &= 0xffff0000u;
92
+ sizeof(UnimplementedDeviceState),
56
+ }
93
+ TYPE_UNIMPLEMENTED_DEVICE);
57
+ if (!(pg & 4)) {
94
+ qdev_prop_set_string(DEVICE(uds), "name", name);
58
+ pair &= 0x0000ffffu;
95
+ qdev_prop_set_uint64(DEVICE(uds), "size", size);
59
+ }
96
+ object_property_set_bool(OBJECT(uds), true, "realized", &error_fatal);
60
+ return pair;
97
+ return sysbus_mmio_get_region(SYS_BUS_DEVICE(uds), 0);
98
+}
61
+}
99
+
62
+
100
+static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
101
+ const char *name, hwaddr size)
64
+ void *vpm, uint32_t desc)
102
+{
65
+{
103
+ /*
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
104
+ * Create the container MemoryRegion for all the devices that live
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
105
+ * behind the Musca-A PPC's single port. These devices don't have a PPC
68
+ uint16_t *pn = vpn, *pm = vpm;
106
+ * port each, but we use the PPCPortInfo struct as a convenient way
107
+ * to describe them. Note that addresses here are relative to the base
108
+ * address of the PPC port region: 0x40100000, and devices appear both
109
+ * at the 0x4... NS region and the 0x5... S region.
110
+ */
111
+ int i;
112
+ MemoryRegion *container = &mms->container;
113
+
69
+
114
+ const PPCPortInfo devices[] = {
70
+ for (row = 0; row < oprsz; ) {
115
+ { "uart0", make_unimp_dev, &mms->uart[0], 0x1000, 0x1000 },
71
+ uint16_t prow = pn[H2(row >> 4)];
116
+ { "uart1", make_unimp_dev, &mms->uart[1], 0x2000, 0x1000 },
72
+ do {
117
+ { "spi", make_unimp_dev, &mms->spi, 0x3000, 0x1000 },
73
+ void *vza_row = vza + tile_vslice_offset(row);
118
+ { "i2c0", make_unimp_dev, &mms->i2c[0], 0x4000, 0x1000 },
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
119
+ { "i2c1", make_unimp_dev, &mms->i2c[1], 0x5000, 0x1000 },
120
+ { "i2s", make_unimp_dev, &mms->i2s, 0x6000, 0x1000 },
121
+ { "pwm0", make_unimp_dev, &mms->pwm[0], 0x7000, 0x1000 },
122
+ { "rtc", make_unimp_dev, &mms->rtc, 0x8000, 0x1000 },
123
+ { "qspi", make_unimp_dev, &mms->qspi, 0xa000, 0x1000 },
124
+ { "timer", make_unimp_dev, &mms->timer, 0xb000, 0x1000 },
125
+ { "scc", make_unimp_dev, &mms->scc, 0xc000, 0x1000 },
126
+ { "pwm1", make_unimp_dev, &mms->pwm[1], 0xe000, 0x1000 },
127
+ { "pwm2", make_unimp_dev, &mms->pwm[2], 0xf000, 0x1000 },
128
+ { "gpio", make_unimp_dev, &mms->gpio, 0x10000, 0x1000 },
129
+ { "mpc0", make_unimp_dev, &mms->mpc[0], 0x12000, 0x1000 },
130
+ { "mpc1", make_unimp_dev, &mms->mpc[1], 0x13000, 0x1000 },
131
+ };
132
+
75
+
133
+ memory_region_init(container, OBJECT(mms), "musca-device-container", size);
76
+ n = f16mop_adj_pair(n, prow, neg);
134
+
77
+
135
+ for (i = 0; i < ARRAY_SIZE(devices); i++) {
78
+ for (col = 0; col < oprsz; ) {
136
+ const PPCPortInfo *pinfo = &devices[i];
79
+ uint16_t pcol = pm[H2(col >> 4)];
137
+ MemoryRegion *mr;
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
138
+
84
+
139
+ mr = pinfo->devfn(mms, pinfo->opaque, pinfo->name, pinfo->size);
85
+ m = f16mop_adj_pair(m, pcol, 0);
140
+ memory_region_add_subregion(container, pinfo->addr, mr);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
114
+ return true;
141
+ }
115
+ }
142
+
116
+
143
+ return &mms->container;
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
144
+}
131
+}
145
+
132
+
146
static void musca_init(MachineState *machine)
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
147
{
135
{
148
MuscaMachineState *mms = MUSCA_MACHINE(machine);
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
149
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
137
150
MachineClass *mc = MACHINE_GET_CLASS(machine);
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
151
MemoryRegion *system_memory = get_system_memory();
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
152
DeviceState *ssedev;
153
+ DeviceState *dev_splitter;
154
+ const PPCInfo *ppcs;
155
+ int num_ppcs;
156
int i;
157
158
assert(mmc->num_irqs <= MUSCA_NUMIRQ_MAX);
159
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
160
"EXP_CPU1_IRQ", i));
161
}
162
163
+ /*
164
+ * The sec_resp_cfg output from the SSE-200 must be split into multiple
165
+ * lines, one for each of the PPCs we create here.
166
+ */
167
+ object_initialize(&mms->sec_resp_splitter, sizeof(mms->sec_resp_splitter),
168
+ TYPE_SPLIT_IRQ);
169
+ object_property_add_child(OBJECT(machine), "sec-resp-splitter",
170
+ OBJECT(&mms->sec_resp_splitter), &error_fatal);
171
+ object_property_set_int(OBJECT(&mms->sec_resp_splitter),
172
+ ARRAY_SIZE(mms->ppc), "num-lines", &error_fatal);
173
+ object_property_set_bool(OBJECT(&mms->sec_resp_splitter), true,
174
+ "realized", &error_fatal);
175
+ dev_splitter = DEVICE(&mms->sec_resp_splitter);
176
+ qdev_connect_gpio_out_named(ssedev, "sec_resp_cfg", 0,
177
+ qdev_get_gpio_in(dev_splitter, 0));
178
+
140
+
179
+ /*
141
+/* TODO: FEAT_EBF16 */
180
+ * Most of the devices in the board are behind Peripheral Protection
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
181
+ * Controllers. The required order for initializing things is:
182
+ * + initialize the PPC
183
+ * + initialize, configure and realize downstream devices
184
+ * + connect downstream device MemoryRegions to the PPC
185
+ * + realize the PPC
186
+ * + map the PPC's MemoryRegions to the places in the address map
187
+ * where the downstream devices should appear
188
+ * + wire up the PPC's control lines to the SSE object
189
+ *
190
+ * The PPC mapping differs for the -A and -B1 variants; the -A version
191
+ * is much simpler, using only a single port of a single PPC and putting
192
+ * all the devices behind that.
193
+ */
194
+ const PPCInfo a_ppcs[] = { {
195
+ .name = "ahb_ppcexp0",
196
+ .ports = {
197
+ { "musca-devices", make_musca_a_devs, 0, 0x40100000, 0x100000 },
198
+ },
199
+ },
200
+ };
201
+
202
+ /*
203
+ * Devices listed with an 0x4.. address appear in both the NS 0x4.. region
204
+ * and the 0x5.. S region. Devices listed with an 0x5.. address appear
205
+ * only in the S region.
206
+ */
207
+ const PPCInfo b1_ppcs[] = { {
208
+ .name = "apb_ppcexp0",
209
+ .ports = {
210
+ { "eflash0", make_unimp_dev, &mms->eflash[0],
211
+ 0x52400000, 0x1000 },
212
+ { "eflash1", make_unimp_dev, &mms->eflash[1],
213
+ 0x52500000, 0x1000 },
214
+ { "qspi", make_unimp_dev, &mms->qspi, 0x42800000, 0x100000 },
215
+ { "mpc0", make_unimp_dev, &mms->mpc[0], 0x52000000, 0x1000 },
216
+ { "mpc1", make_unimp_dev, &mms->mpc[1], 0x52100000, 0x1000 },
217
+ { "mpc2", make_unimp_dev, &mms->mpc[2], 0x52200000, 0x1000 },
218
+ { "mpc3", make_unimp_dev, &mms->mpc[3], 0x52300000, 0x1000 },
219
+ { "mhu0", make_unimp_dev, &mms->mhu[0], 0x42600000, 0x100000 },
220
+ { "mhu1", make_unimp_dev, &mms->mhu[1], 0x42700000, 0x100000 },
221
+ { }, /* port 9: unused */
222
+ { }, /* port 10: unused */
223
+ { }, /* port 11: unused */
224
+ { }, /* port 12: unused */
225
+ { }, /* port 13: unused */
226
+ { "mpc4", make_unimp_dev, &mms->mpc[4], 0x52e00000, 0x1000 },
227
+ },
228
+ }, {
229
+ .name = "apb_ppcexp1",
230
+ .ports = {
231
+ { "pwm0", make_unimp_dev, &mms->pwm[0], 0x40101000, 0x1000 },
232
+ { "pwm1", make_unimp_dev, &mms->pwm[1], 0x40102000, 0x1000 },
233
+ { "pwm2", make_unimp_dev, &mms->pwm[2], 0x40103000, 0x1000 },
234
+ { "i2s", make_unimp_dev, &mms->i2s, 0x40104000, 0x1000 },
235
+ { "uart0", make_unimp_dev, &mms->uart[0], 0x40105000, 0x1000 },
236
+ { "uart1", make_unimp_dev, &mms->uart[1], 0x40106000, 0x1000 },
237
+ { "i2c0", make_unimp_dev, &mms->i2c[0], 0x40108000, 0x1000 },
238
+ { "i2c1", make_unimp_dev, &mms->i2c[1], 0x40109000, 0x1000 },
239
+ { "spi", make_unimp_dev, &mms->spi, 0x4010a000, 0x1000 },
240
+ { "scc", make_unimp_dev, &mms->scc, 0x5010b000, 0x1000 },
241
+ { "timer", make_unimp_dev, &mms->timer, 0x4010c000, 0x1000 },
242
+ { "rtc", make_unimp_dev, &mms->rtc, 0x4010d000, 0x1000 },
243
+ { "pvt", make_unimp_dev, &mms->pvt, 0x4010e000, 0x1000 },
244
+ { "sdio", make_unimp_dev, &mms->sdio, 0x4010f000, 0x1000 },
245
+ },
246
+ }, {
247
+ .name = "ahb_ppcexp0",
248
+ .ports = {
249
+ { }, /* port 0: unused */
250
+ { "gpio", make_unimp_dev, &mms->gpio, 0x41000000, 0x1000 },
251
+ },
252
+ },
253
+ };
254
+
255
+ switch (mmc->type) {
256
+ case MUSCA_A:
257
+ ppcs = a_ppcs;
258
+ num_ppcs = ARRAY_SIZE(a_ppcs);
259
+ break;
260
+ case MUSCA_B1:
261
+ ppcs = b1_ppcs;
262
+ num_ppcs = ARRAY_SIZE(b1_ppcs);
263
+ break;
264
+ default:
265
+ g_assert_not_reached();
266
+ }
267
+ assert(num_ppcs <= MUSCA_PPC_MAX);
268
+
269
+ for (i = 0; i < num_ppcs; i++) {
270
+ const PPCInfo *ppcinfo = &ppcs[i];
271
+ TZPPC *ppc = &mms->ppc[i];
272
+ DeviceState *ppcdev;
273
+ int port;
274
+ char *gpioname;
275
+
276
+ sysbus_init_child_obj(OBJECT(machine), ppcinfo->name, ppc,
277
+ sizeof(TZPPC), TYPE_TZ_PPC);
278
+ ppcdev = DEVICE(ppc);
279
+
280
+ for (port = 0; port < TZ_NUM_PORTS; port++) {
281
+ const PPCPortInfo *pinfo = &ppcinfo->ports[port];
282
+ MemoryRegion *mr;
283
+ char *portname;
284
+
285
+ if (!pinfo->devfn) {
286
+ continue;
287
+ }
288
+
289
+ mr = pinfo->devfn(mms, pinfo->opaque, pinfo->name, pinfo->size);
290
+ portname = g_strdup_printf("port[%d]", port);
291
+ object_property_set_link(OBJECT(ppc), OBJECT(mr),
292
+ portname, &error_fatal);
293
+ g_free(portname);
294
+ }
295
+
296
+ object_property_set_bool(OBJECT(ppc), true, "realized", &error_fatal);
297
+
298
+ for (port = 0; port < TZ_NUM_PORTS; port++) {
299
+ const PPCPortInfo *pinfo = &ppcinfo->ports[port];
300
+
301
+ if (!pinfo->devfn) {
302
+ continue;
303
+ }
304
+ sysbus_mmio_map(SYS_BUS_DEVICE(ppc), port, pinfo->addr);
305
+
306
+ gpioname = g_strdup_printf("%s_nonsec", ppcinfo->name);
307
+ qdev_connect_gpio_out_named(ssedev, gpioname, port,
308
+ qdev_get_gpio_in_named(ppcdev,
309
+ "cfg_nonsec",
310
+ port));
311
+ g_free(gpioname);
312
+ gpioname = g_strdup_printf("%s_ap", ppcinfo->name);
313
+ qdev_connect_gpio_out_named(ssedev, gpioname, port,
314
+ qdev_get_gpio_in_named(ppcdev,
315
+ "cfg_ap", port));
316
+ g_free(gpioname);
317
+ }
318
+
319
+ gpioname = g_strdup_printf("%s_irq_enable", ppcinfo->name);
320
+ qdev_connect_gpio_out_named(ssedev, gpioname, 0,
321
+ qdev_get_gpio_in_named(ppcdev,
322
+ "irq_enable", 0));
323
+ g_free(gpioname);
324
+ gpioname = g_strdup_printf("%s_irq_clear", ppcinfo->name);
325
+ qdev_connect_gpio_out_named(ssedev, gpioname, 0,
326
+ qdev_get_gpio_in_named(ppcdev,
327
+ "irq_clear", 0));
328
+ g_free(gpioname);
329
+ gpioname = g_strdup_printf("%s_irq_status", ppcinfo->name);
330
+ qdev_connect_gpio_out_named(ppcdev, "irq", 0,
331
+ qdev_get_gpio_in_named(ssedev,
332
+ gpioname, 0));
333
+ g_free(gpioname);
334
+
335
+ qdev_connect_gpio_out(dev_splitter, i,
336
+ qdev_get_gpio_in_named(ppcdev,
337
+ "cfg_sec_resp", 0));
338
+ }
339
+
340
armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0x2000000);
341
}
342
343
--
143
--
344
2.20.1
144
2.25.1
345
346
diff view generated by jsdifflib
1
Wire up the two PL011 UARTs in the Musca board.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
---
7
---
6
hw/arm/musca.c | 34 +++++++++++++++++++++++++++++-----
8
target/arm/helper-sme.h | 2 ++
7
1 file changed, 29 insertions(+), 5 deletions(-)
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
8
13
9
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
10
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
11
--- a/hw/arm/musca.c
16
--- a/target/arm/helper-sme.h
12
+++ b/hw/arm/musca.c
17
+++ b/target/arm/helper-sme.h
13
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
14
#include "qemu/error-report.h"
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
15
#include "qapi/error.h"
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
16
#include "exec/address-spaces.h"
21
17
+#include "sysemu/sysemu.h"
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
18
#include "hw/arm/arm.h"
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
19
#include "hw/arm/armsse.h"
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
20
#include "hw/boards.h"
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
21
+#include "hw/char/pl011.h"
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
22
#include "hw/core/split-irq.h"
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
23
#include "hw/misc/tz-mpc.h"
28
index XXXXXXX..XXXXXXX 100644
24
#include "hw/misc/tz-ppc.h"
29
--- a/target/arm/sme.decode
25
@@ -XXX,XX +XXX,XX @@ typedef struct {
30
+++ b/target/arm/sme.decode
26
UnimplementedDeviceState mhu[2];
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
27
UnimplementedDeviceState pwm[3];
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
28
UnimplementedDeviceState i2s;
33
29
- UnimplementedDeviceState uart[2];
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
30
+ PL011State uart[2];
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
31
UnimplementedDeviceState i2c[2];
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
32
UnimplementedDeviceState spi;
37
index XXXXXXX..XXXXXXX 100644
33
UnimplementedDeviceState scc;
38
--- a/target/arm/sme_helper.c
34
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_rtc(MuscaMachineState *mms, void *opaque,
39
+++ b/target/arm/sme_helper.c
35
return sysbus_mmio_get_region(SYS_BUS_DEVICE(rtc), 0);
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
41
return pair;
36
}
42
}
37
43
38
+static MemoryRegion *make_uart(MuscaMachineState *mms, void *opaque,
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
39
+ const char *name, hwaddr size)
45
+ float_status *s_std, float_status *s_odd)
40
+{
46
+{
41
+ PL011State *uart = opaque;
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
42
+ int i = uart - &mms->uart[0];
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
43
+ int irqbase = 7 + i * 6;
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
44
+ SysBusDevice *s;
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
45
+
53
+
46
+ sysbus_init_child_obj(OBJECT(mms), name, uart, sizeof(mms->uart[0]),
54
+ /*
47
+ TYPE_PL011);
55
+ * The ARM pseudocode function FPDot performs both multiplies
48
+ qdev_prop_set_chr(DEVICE(uart), "chardev", serial_hd(i));
56
+ * and the add with a single rounding operation. Emulate this
49
+ object_property_set_bool(OBJECT(uart), true, "realized", &error_fatal);
57
+ * by performing the first multiply in round-to-odd, then doing
50
+ s = SYS_BUS_DEVICE(uart);
58
+ * the second multiply as fused multiply-add, and rounding to
51
+ sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqbase + 5)); /* combined */
59
+ * float32 all in one step.
52
+ sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqbase + 0)); /* RX */
60
+ */
53
+ sysbus_connect_irq(s, 2, get_sse_irq_in(mms, irqbase + 1)); /* TX */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
54
+ sysbus_connect_irq(s, 3, get_sse_irq_in(mms, irqbase + 2)); /* RT */
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
55
+ sysbus_connect_irq(s, 4, get_sse_irq_in(mms, irqbase + 3)); /* MS */
63
+
56
+ sysbus_connect_irq(s, 5, get_sse_irq_in(mms, irqbase + 4)); /* E */
64
+ /* This conversion is exact, because we've already rounded. */
57
+ return sysbus_mmio_get_region(SYS_BUS_DEVICE(uart), 0);
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
58
+}
69
+}
59
+
70
+
60
static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
61
const char *name, hwaddr size)
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
62
{
120
{
63
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
64
MemoryRegion *container = &mms->container;
122
index XXXXXXX..XXXXXXX 100644
65
123
--- a/target/arm/translate-sme.c
66
const PPCPortInfo devices[] = {
124
+++ b/target/arm/translate-sme.c
67
- { "uart0", make_unimp_dev, &mms->uart[0], 0x1000, 0x1000 },
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
68
- { "uart1", make_unimp_dev, &mms->uart[1], 0x2000, 0x1000 },
126
return true;
69
+ { "uart0", make_uart, &mms->uart[0], 0x1000, 0x1000 },
127
}
70
+ { "uart1", make_uart, &mms->uart[1], 0x2000, 0x1000 },
128
71
{ "spi", make_unimp_dev, &mms->spi, 0x3000, 0x1000 },
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
72
{ "i2c0", make_unimp_dev, &mms->i2c[0], 0x4000, 0x1000 },
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
73
{ "i2c1", make_unimp_dev, &mms->i2c[1], 0x5000, 0x1000 },
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
74
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
132
75
{ "pwm1", make_unimp_dev, &mms->pwm[1], 0x40102000, 0x1000 },
76
{ "pwm2", make_unimp_dev, &mms->pwm[2], 0x40103000, 0x1000 },
77
{ "i2s", make_unimp_dev, &mms->i2s, 0x40104000, 0x1000 },
78
- { "uart0", make_unimp_dev, &mms->uart[0], 0x40105000, 0x1000 },
79
- { "uart1", make_unimp_dev, &mms->uart[1], 0x40106000, 0x1000 },
80
+ { "uart0", make_uart, &mms->uart[0], 0x40105000, 0x1000 },
81
+ { "uart1", make_uart, &mms->uart[1], 0x40106000, 0x1000 },
82
{ "i2c0", make_unimp_dev, &mms->i2c[0], 0x40108000, 0x1000 },
83
{ "i2c1", make_unimp_dev, &mms->i2c[1], 0x40109000, 0x1000 },
84
{ "spi", make_unimp_dev, &mms->spi, 0x4010a000, 0x1000 },
85
--
133
--
86
2.20.1
134
2.25.1
87
88
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/helper-sme.h | 16 ++++++++
11
target/arm/sme.decode | 10 +++++
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
15
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/helper-sme.h
19
+++ b/target/arm/helper-sme.h
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
48
+
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
53
+
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/sme_helper.c
61
+++ b/target/arm/sme_helper.c
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
63
} while (row & 15);
64
}
65
}
66
+
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
68
+
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
72
+{
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
75
+
76
+ for (row = 0; row < oprsz; ++row) {
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
80
+
81
+ for (col = 0; col < oprsz; ++col) {
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
86
+ }
87
+ }
88
+}
89
+
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
92
+{ \
93
+ uint32_t sum0 = 0, sum1 = 0; \
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
95
+ n &= expand_pred_b(p); \
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
110
+}
111
+
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
114
+{ \
115
+ uint64_t sum = 0; \
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
117
+ n &= expand_pred_h(p); \
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
123
+}
124
+
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
129
+
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
134
+
135
+#define DEF_IMOPH(NAME) \
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
137
+ void *vpm, uint32_t desc) \
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
139
+
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
166
--
167
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sve.decode | 20 +++++++++++++
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
13
2 files changed, 77 insertions(+)
14
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sve.decode
18
+++ b/target/arm/sve.decode
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
20
21
### SVE2 floating-point bfloat16 dot-product (indexed)
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
23
+
24
+### SVE broadcast predicate element
25
+
26
+&psel esz pd pn pm rv imm
27
+%psel_rv 16:2 !function=plus_12
28
+%psel_imm_b 22:2 19:2
29
+%psel_imm_h 22:2 20:1
30
+%psel_imm_s 22:2
31
+%psel_imm_d 23:1
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
51
+
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
53
+{
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
65
+ }
66
+
67
+ tmp = tcg_temp_new_i64();
68
+ dbit = tcg_temp_new_i64();
69
+ didx = tcg_temp_new_i64();
70
+ ptr = tcg_temp_new_ptr();
71
+
72
+ /* Compute the predicate element. */
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
74
+ if (is_power_of_2(elements)) {
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
78
+ }
79
+
80
+ /* Extract the predicate byte and bit indices. */
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
86
+ }
87
+
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
106
+ return true;
107
+}
108
--
109
2.25.1
diff view generated by jsdifflib
1
The Peripheral Protection Controller's handling of unused ports
1
From: Richard Henderson <richard.henderson@linaro.org>
2
is that if there is nothing connected to the port's downstream
3
then it does not create the sysbus MMIO region for the upstream
4
end of the port. This results in odd behaviour when there is
5
an unused port in the middle of the range: since sysbus MMIO
6
regions are implicitly consecutively allocated, any used ports
7
above the unused ones end up with sysbus MMIO region numbers
8
that don't match the port number.
9
2
10
Avoid this numbering mismatch by creating dummy MMIO regions
3
This is an SVE instruction that operates using the SVE vector
11
for the unused ports. This doesn't change anything for our
4
length but that it is present only if SME is implemented.
12
existing boards, which don't have any gaps in the middle of
13
the port ranges they use; but it will be needed for the Musca
14
board.
15
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
---
10
---
19
include/hw/misc/tz-ppc.h | 8 +++++++-
11
target/arm/helper-sve.h | 2 ++
20
hw/misc/tz-ppc.c | 32 ++++++++++++++++++++++++++++++++
12
target/arm/sve.decode | 1 +
21
2 files changed, 39 insertions(+), 1 deletion(-)
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
22
16
23
diff --git a/include/hw/misc/tz-ppc.h b/include/hw/misc/tz-ppc.h
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
24
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
25
--- a/include/hw/misc/tz-ppc.h
19
--- a/target/arm/helper-sve.h
26
+++ b/include/hw/misc/tz-ppc.h
20
+++ b/target/arm/helper-sve.h
27
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
*
22
29
* QEMU interface:
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
* + sysbus MMIO regions 0..15: MemoryRegions defining the upstream end
24
31
- * of each of the 16 ports of the PPC
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
+ * of each of the 16 ports of the PPC. When a port is unused (i.e. no
26
+
33
+ * downstream MemoryRegion is connected to it) at the end of the 0..15
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+ * range then no sysbus MMIO region is created for its upstream. When an
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+ * unused port lies in the middle of the range with other used ports at
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+ * higher port numbers, a dummy MMIO region is created to ensure that
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
37
+ * port N's upstream is always sysbus MMIO region N. Dummy regions should
38
+ * not be mapped, and will assert if any access is made to them.
39
* + Property "port[0..15]": MemoryRegion defining the downstream device(s)
40
* for each of the 16 ports of the PPC
41
* + Named GPIO inputs "cfg_nonsec[0..15]": set to 1 if the port should be
42
diff --git a/hw/misc/tz-ppc.c b/hw/misc/tz-ppc.c
43
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
44
--- a/hw/misc/tz-ppc.c
32
--- a/target/arm/sve.decode
45
+++ b/hw/misc/tz-ppc.c
33
+++ b/target/arm/sve.decode
46
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps tz_ppc_ops = {
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
47
.endianness = DEVICE_LITTLE_ENDIAN,
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
48
};
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
49
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
50
+static bool tz_ppc_dummy_accepts(void *opaque, hwaddr addr,
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
51
+ unsigned size, bool is_write,
39
52
+ MemTxAttrs attrs)
40
# SVE vector splice (predicated, destructive)
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/sve_helper.c
45
+++ b/target/arm/sve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
47
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
49
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
53
+{
51
+{
54
+ /*
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
55
+ * Board code should never map the upstream end of an unused port,
53
+ uint64_t *d = vd, *n = vn;
56
+ * so we should never try to make a memory access to it.
54
+ uint8_t *pg = vg;
57
+ */
55
+
58
+ g_assert_not_reached();
56
+ for (i = 0; i < opr_sz; i += 2) {
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
63
+ }
59
+}
64
+}
60
+
65
+
61
+static const MemoryRegionOps tz_ppc_dummy_ops = {
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
62
+ .valid.accepts = tz_ppc_dummy_accepts,
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
63
+};
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
76
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
64
+
78
+
65
static void tz_ppc_reset(DeviceState *dev)
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
66
{
80
gen_helper_sve_splice, a, a->esz)
67
TZPPC *s = TZ_PPC(dev);
68
@@ -XXX,XX +XXX,XX @@ static void tz_ppc_realize(DeviceState *dev, Error **errp)
69
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
70
TZPPC *s = TZ_PPC(dev);
71
int i;
72
+ int max_port = 0;
73
74
/* We can't create the upstream end of the port until realize,
75
* as we don't know the size of the MR used as the downstream until then.
76
*/
77
for (i = 0; i < TZ_NUM_PORTS; i++) {
78
+ if (s->port[i].downstream) {
79
+ max_port = i;
80
+ }
81
+ }
82
+
83
+ for (i = 0; i <= max_port; i++) {
84
TZPPCPort *port = &s->port[i];
85
char *name;
86
uint64_t size;
87
88
if (!port->downstream) {
89
+ /*
90
+ * Create dummy sysbus MMIO region so the sysbus region
91
+ * numbering doesn't get out of sync with the port numbers.
92
+ * The size is entirely arbitrary.
93
+ */
94
+ name = g_strdup_printf("tz-ppc-dummy-port[%d]", i);
95
+ memory_region_init_io(&port->upstream, obj, &tz_ppc_dummy_ops,
96
+ port, name, 0x10000);
97
+ sysbus_init_mmio(sbd, &port->upstream);
98
+ g_free(name);
99
continue;
100
}
101
81
102
--
82
--
103
2.20.1
83
2.25.1
104
105
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper.h | 18 +++++++
12
target/arm/sve.decode | 5 ++
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
66
}
67
+
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
69
+{
70
+ tcg_gen_smax_i32(d, a, n);
71
+ tcg_gen_smin_i32(d, d, m);
72
+}
73
+
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
75
+{
76
+ tcg_gen_smax_i64(d, a, n);
77
+ tcg_gen_smin_i64(d, d, m);
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
201
--
202
2.25.1
diff view generated by jsdifflib
1
From: Aaron Lindsay OS <aaron@os.amperecomputing.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This was introduced by
3
We can handle both exception entry and exception return by
4
commit bf8d09694ccc07487cd73d7562081fdaec3370c8
4
hooking into aarch64_sve_change_el.
5
target/arm: Don't clear supported PMU events when initializing PMCEID1
6
and identified by Coverity (CID 1398645).
7
5
8
Signed-off-by: Aaron Lindsay <aaron@os.amperecomputing.com>
9
Reported-by: Peter Maydell <peter.maydell@linaro.org>
10
Message-id: 20190219144621.450-1-aaron@os.amperecomputing.com
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
target/arm/helper.c | 2 +-
11
target/arm/helper.c | 15 +++++++++++++--
15
1 file changed, 1 insertion(+), 1 deletion(-)
12
1 file changed, 13 insertions(+), 2 deletions(-)
16
13
17
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.c
16
--- a/target/arm/helper.c
20
+++ b/target/arm/helper.c
17
+++ b/target/arm/helper.c
21
@@ -XXX,XX +XXX,XX @@ void pmu_init(ARMCPU *cpu)
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
22
19
return;
23
if (cnt->supported(&cpu->env)) {
20
}
24
supported_event_map[cnt->number] = i;
21
25
- uint64_t event_mask = 1 << (cnt->number & 0x1f);
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
26
+ uint64_t event_mask = 1ULL << (cnt->number & 0x1f);
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
27
if (cnt->number & 0x20) {
24
+
28
cpu->pmceid1 |= event_mask;
25
+ /*
29
} else {
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
27
+ * invoke ResetSVEState when taking an exception from, or
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
29
+ */
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
31
+ arm_reset_sve_state(env);
32
+ return;
33
+ }
34
+
35
/*
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
37
* at ELx, or not available because the EL is in AArch32 state, then
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
39
* we already have the correct register contents when encountering the
40
* vq0->vq0 transition between EL0->EL1.
41
*/
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
44
? sve_vqm1_for_el(env, old_el) : 0);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
47
? sve_vqm1_for_el(env, new_el) : 0);
48
30
--
49
--
31
2.20.1
50
2.25.1
32
33
diff view generated by jsdifflib
1
Coverity points out (CID 1398632, CID 1398650) that we
1
From: Richard Henderson <richard.henderson@linaro.org>
2
leak a couple of allocated strings in the error-exit
3
code path for setting up the MHUs in the ARMSSE.
4
Fix this bug by moving the allocate-and-free of each
5
string to be closer to the use, so we do the free before
6
doing the error-exit check.
7
2
8
Fixes: f8574705f62b38a ("hw/arm/armsse: Add unimplemented-device stubs for MHUs")
3
Note that SME remains effectively disabled for user-only,
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20190215113707.24553-1-peter.maydell@linaro.org
12
---
11
---
13
hw/arm/armsse.c | 10 ++++++----
12
docs/system/arm/emulation.rst | 4 ++++
14
1 file changed, 6 insertions(+), 4 deletions(-)
13
target/arm/cpu64.c | 11 +++++++++++
14
2 files changed, 15 insertions(+)
15
15
16
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/arm/armsse.c
18
--- a/docs/system/arm/emulation.rst
19
+++ b/hw/arm/armsse.c
19
+++ b/docs/system/arm/emulation.rst
20
@@ -XXX,XX +XXX,XX @@ static void armsse_realize(DeviceState *dev, Error **errp)
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
21
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
22
if (info->has_mhus) {
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
23
for (i = 0; i < ARRAY_SIZE(s->mhu); i++) {
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
24
- char *name = g_strdup_printf("MHU%d", i);
24
+- FEAT_SME (Scalable Matrix Extension)
25
- char *port = g_strdup_printf("port[%d]", i + 3);
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
26
+ char *name;
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
27
+ char *port;
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
28
28
- FEAT_SPECRES (Speculation restriction instructions)
29
+ name = g_strdup_printf("MHU%d", i);
29
- FEAT_SSBS (Speculative Store Bypass Safe)
30
qdev_prop_set_string(DEVICE(&s->mhu[i]), "name", name);
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
31
qdev_prop_set_uint64(DEVICE(&s->mhu[i]), "size", 0x1000);
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
32
object_property_set_bool(OBJECT(&s->mhu[i]), true,
32
index XXXXXXX..XXXXXXX 100644
33
"realized", &err);
33
--- a/target/arm/cpu64.c
34
+ g_free(name);
34
+++ b/target/arm/cpu64.c
35
if (err) {
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
36
error_propagate(errp, err);
36
*/
37
return;
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
38
}
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
39
+ port = g_strdup_printf("port[%d]", i + 3);
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
40
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mhu[i]), 0);
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
41
object_property_set_link(OBJECT(&s->apb_ppc0), OBJECT(mr),
41
cpu->isar.id_aa64pfr1 = t;
42
port, &err);
42
43
+ g_free(port);
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
44
if (err) {
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
45
error_propagate(errp, err);
45
cpu->isar.id_aa64dfr0 = t;
46
return;
46
47
}
47
+ t = cpu->isar.id_aa64smfr0;
48
- g_free(name);
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
49
- g_free(port);
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
50
}
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
51
}
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
55
+ cpu->isar.id_aa64smfr0 = t;
56
+
57
/* Replicate the same data to the 32-bit id registers. */
58
aa32_max_features(cpu);
52
59
53
--
60
--
54
2.20.1
61
2.25.1
55
56
diff view generated by jsdifflib
1
The PL011 UART has six interrupt lines:
1
From: Richard Henderson <richard.henderson@linaro.org>
2
* RX (receive data)
3
* TX (transmit data)
4
* RT (receive timeout)
5
* MS (modem status)
6
* E (errors)
7
* combined (logical OR of all the above)
8
2
9
So far we have only emulated the combined interrupt line;
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
add support for the others, so that boards that wire them
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
up to different interrupt controller inputs can do so.
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/target_cpu.h | 5 ++++-
9
1 file changed, 4 insertions(+), 1 deletion(-)
12
10
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
include/hw/char/pl011.h | 2 +-
17
hw/char/pl011.c | 46 +++++++++++++++++++++++++++++++++++++++--
18
2 files changed, 45 insertions(+), 3 deletions(-)
19
20
diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h
21
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/char/pl011.h
13
--- a/linux-user/aarch64/target_cpu.h
23
+++ b/include/hw/char/pl011.h
14
+++ b/linux-user/aarch64/target_cpu.h
24
@@ -XXX,XX +XXX,XX @@ typedef struct PL011State {
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
25
int read_count;
16
26
int read_trigger;
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
27
CharBackend chr;
28
- qemu_irq irq;
29
+ qemu_irq irq[6];
30
const unsigned char *id;
31
} PL011State;
32
33
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/char/pl011.c
36
+++ b/hw/char/pl011.c
37
@@ -XXX,XX +XXX,XX @@
38
* This code is licensed under the GPL.
39
*/
40
41
+/*
42
+ * QEMU interface:
43
+ * + sysbus MMIO region 0: device registers
44
+ * + sysbus IRQ 0: UARTINTR (combined interrupt line)
45
+ * + sysbus IRQ 1: UARTRXINTR (receive FIFO interrupt line)
46
+ * + sysbus IRQ 2: UARTTXINTR (transmit FIFO interrupt line)
47
+ * + sysbus IRQ 3: UARTRTINTR (receive timeout interrupt line)
48
+ * + sysbus IRQ 4: UARTMSINTR (momem status interrupt line)
49
+ * + sysbus IRQ 5: UARTEINTR (error interrupt line)
50
+ */
51
+
52
#include "qemu/osdep.h"
53
#include "hw/char/pl011.h"
54
#include "hw/sysbus.h"
55
@@ -XXX,XX +XXX,XX @@
56
#define PL011_FLAG_TXFF 0x20
57
#define PL011_FLAG_RXFE 0x10
58
59
+/* Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC */
60
+#define INT_OE (1 << 10)
61
+#define INT_BE (1 << 9)
62
+#define INT_PE (1 << 8)
63
+#define INT_FE (1 << 7)
64
+#define INT_RT (1 << 6)
65
+#define INT_TX (1 << 5)
66
+#define INT_RX (1 << 4)
67
+#define INT_DSR (1 << 3)
68
+#define INT_DCD (1 << 2)
69
+#define INT_CTS (1 << 1)
70
+#define INT_RI (1 << 0)
71
+#define INT_E (INT_OE | INT_BE | INT_PE | INT_FE)
72
+#define INT_MS (INT_RI | INT_DSR | INT_DCD | INT_CTS)
73
+
74
static const unsigned char pl011_id_arm[8] =
75
{ 0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
76
static const unsigned char pl011_id_luminary[8] =
77
{ 0x11, 0x00, 0x18, 0x01, 0x0d, 0xf0, 0x05, 0xb1 };
78
79
+/* Which bits in the interrupt status matter for each outbound IRQ line ? */
80
+static const uint32_t irqmask[] = {
81
+ INT_E | INT_MS | INT_RT | INT_TX | INT_RX, /* combined IRQ */
82
+ INT_RX,
83
+ INT_TX,
84
+ INT_RT,
85
+ INT_MS,
86
+ INT_E,
87
+};
88
+
89
static void pl011_update(PL011State *s)
90
{
18
{
91
uint32_t flags;
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
92
+ int i;
20
+ /*
93
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
94
flags = s->int_level & s->int_enabled;
22
* different from AArch32 Linux, which uses TPIDRRO.
95
trace_pl011_irq_state(flags != 0);
23
*/
96
- qemu_set_irq(s->irq, flags != 0);
24
env->cp15.tpidr_el[0] = newtls;
97
+ for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
98
+ qemu_set_irq(s->irq[i], (flags & irqmask[i]) != 0);
26
+ env->cp15.tpidr2_el0 = 0;
99
+ }
100
}
27
}
101
28
102
static uint64_t pl011_read(void *opaque, hwaddr offset,
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
103
@@ -XXX,XX +XXX,XX @@ static void pl011_init(Object *obj)
104
{
105
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
106
PL011State *s = PL011(obj);
107
+ int i;
108
109
memory_region_init_io(&s->iomem, OBJECT(s), &pl011_ops, s, "pl011", 0x1000);
110
sysbus_init_mmio(sbd, &s->iomem);
111
- sysbus_init_irq(sbd, &s->irq);
112
+ for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
113
+ sysbus_init_irq(sbd, &s->irq[i]);
114
+ }
115
116
s->read_trigger = 1;
117
s->ifl = 0x12;
118
--
30
--
119
2.20.1
31
2.25.1
120
121
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
9
1 file changed, 9 insertions(+)
10
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/cpu_loop.c
14
+++ b/linux-user/aarch64/cpu_loop.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
16
17
switch (trapnr) {
18
case EXCP_SWI:
19
+ /*
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
22
+ */
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
25
+ arm_rebuild_hflags(env);
26
+ arm_reset_sve_state(env);
27
+ }
28
ret = do_syscall(env,
29
env->xregs[8],
30
env->xregs[0],
31
--
32
2.25.1
diff view generated by jsdifflib
1
The region 0x40010000 .. 0x4001ffff and its secure-only alias
1
From: Richard Henderson <richard.henderson@linaro.org>
2
at 0x50010000... are for per-CPU devices. We implement this by
3
giving each CPU its own container memory region, where the
4
per-CPU devices live. Unfortunately, the alias region which
5
makes devices mapped at 0x4... addresses also appear at 0x5...
6
is only implemented in the overall "all CPUs" container. The
7
effect of this bug is that the CPU_IDENTITY register block appears
8
only at 0x4001f000, but not at the 0x5001f000 alias where it should
9
also appear. Guests (like very recent Arm Trusted Firmware-M)
10
which try to access it at 0x5001f000 will crash.
11
2
12
Fix this by moving the handling for this alias from the "all CPUs"
3
Make sure to zero the currently reserved fields.
13
container to the per-CPU container. (We leave the aliases for
14
0x1... and 0x3... in the overall container, because there are
15
no per-CPU devices there.)
16
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
Message-id: 20190215180500.6906-1-peter.maydell@linaro.org
19
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
20
---
9
---
21
include/hw/arm/armsse.h | 2 +-
10
linux-user/aarch64/signal.c | 9 ++++++++-
22
hw/arm/armsse.c | 26 ++++++++++++++++----------
11
1 file changed, 8 insertions(+), 1 deletion(-)
23
2 files changed, 17 insertions(+), 11 deletions(-)
24
12
25
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
26
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
27
--- a/include/hw/arm/armsse.h
15
--- a/linux-user/aarch64/signal.c
28
+++ b/include/hw/arm/armsse.h
16
+++ b/linux-user/aarch64/signal.c
29
@@ -XXX,XX +XXX,XX @@ typedef struct ARMSSE {
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
30
MemoryRegion cpu_container[SSE_MAX_CPUS];
18
struct target_sve_context {
31
MemoryRegion alias1;
19
struct target_aarch64_ctx head;
32
MemoryRegion alias2;
20
uint16_t vl;
33
- MemoryRegion alias3;
21
- uint16_t reserved[3];
34
+ MemoryRegion alias3[SSE_MAX_CPUS];
22
+ uint16_t flags;
35
MemoryRegion sram[MAX_SRAM_BANKS];
23
+ uint16_t reserved[2];
36
24
/* The actual SVE data immediately follows. It is laid out
37
qemu_irq *exp_irqs[SSE_MAX_CPUS];
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
38
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
26
* the original struct pointer.
39
index XXXXXXX..XXXXXXX 100644
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
40
--- a/hw/arm/armsse.c
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
41
+++ b/hw/arm/armsse.c
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
42
@@ -XXX,XX +XXX,XX @@ static bool irq_is_common[32] = {
30
43
/* 30, 31: reserved */
31
+#define TARGET_SVE_SIG_FLAG_SM 1
44
};
32
+
45
33
struct target_rt_sigframe {
46
-/* Create an alias region of @size bytes starting at @base
34
struct target_siginfo info;
47
+/*
35
struct target_ucontext uc;
48
+ * Create an alias region in @container of @size bytes starting at @base
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
49
* which mirrors the memory starting at @orig.
50
*/
51
-static void make_alias(ARMSSE *s, MemoryRegion *mr, const char *name,
52
- hwaddr base, hwaddr size, hwaddr orig)
53
+static void make_alias(ARMSSE *s, MemoryRegion *mr, MemoryRegion *container,
54
+ const char *name, hwaddr base, hwaddr size, hwaddr orig)
55
{
37
{
56
- memory_region_init_alias(mr, NULL, name, &s->container, orig, size);
38
int i, j;
57
+ memory_region_init_alias(mr, NULL, name, container, orig, size);
39
58
/* The alias is even lower priority than unimplemented_device regions */
40
+ memset(sve, 0, sizeof(*sve));
59
- memory_region_add_subregion_overlap(&s->container, base, mr, -1500);
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
60
+ memory_region_add_subregion_overlap(container, base, mr, -1500);
42
__put_user(size, &sve->head.size);
61
}
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
62
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
63
static void irq_status_forwarder(void *opaque, int n, int level)
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
64
@@ -XXX,XX +XXX,XX @@ static void armsse_realize(DeviceState *dev, Error **errp)
65
}
66
67
/* Set up the big aliases first */
68
- make_alias(s, &s->alias1, "alias 1", 0x10000000, 0x10000000, 0x00000000);
69
- make_alias(s, &s->alias2, "alias 2", 0x30000000, 0x10000000, 0x20000000);
70
+ make_alias(s, &s->alias1, &s->container, "alias 1",
71
+ 0x10000000, 0x10000000, 0x00000000);
72
+ make_alias(s, &s->alias2, &s->container,
73
+ "alias 2", 0x30000000, 0x10000000, 0x20000000);
74
/* The 0x50000000..0x5fffffff region is not a pure alias: it has
75
* a few extra devices that only appear there (generally the
76
* control interfaces for the protection controllers).
77
* We implement this by mapping those devices over the top of this
78
- * alias MR at a higher priority.
79
+ * alias MR at a higher priority. Some of the devices in this range
80
+ * are per-CPU, so we must put this alias in the per-cpu containers.
81
*/
82
- make_alias(s, &s->alias3, "alias 3", 0x50000000, 0x10000000, 0x40000000);
83
-
84
+ for (i = 0; i < info->num_cpus; i++) {
85
+ make_alias(s, &s->alias3[i], &s->cpu_container[i],
86
+ "alias 3", 0x50000000, 0x10000000, 0x40000000);
87
+ }
46
+ }
88
47
89
/* Security controller */
48
/* Note that SVE regs are stored as a byte stream, with each byte element
90
object_property_set_bool(OBJECT(&s->secctl), true, "realized", &err);
49
* at a subsequent address. This corresponds to a little-endian store
91
--
50
--
92
2.20.1
51
2.25.1
93
94
diff view generated by jsdifflib
1
Create a new include file for the pl011's device struct,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
type macros, etc, so that it can be instantiated using
3
the "embedded struct" coding style.
4
2
3
Fold the return value setting into the goto, so each
4
point of failure need not do both.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
include/hw/char/pl011.h | 34 ++++++++++++++++++++++++++++++++++
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
10
hw/char/pl011.c | 31 ++-----------------------------
12
1 file changed, 11 insertions(+), 15 deletions(-)
11
2 files changed, 36 insertions(+), 29 deletions(-)
12
13
13
diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/include/hw/char/pl011.h
16
--- a/linux-user/aarch64/signal.c
16
+++ b/include/hw/char/pl011.h
17
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
18
#ifndef HW_PL011_H
19
struct target_sve_context *sve = NULL;
19
#define HW_PL011_H
20
uint64_t extra_datap = 0;
20
21
bool used_extra = false;
21
+#include "hw/sysbus.h"
22
- bool err = false;
22
+#include "chardev/char-fe.h"
23
int vq = 0, sve_size = 0;
24
25
target_restore_general_frame(env, sf);
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
27
switch (magic) {
28
case 0:
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
84
-
85
- exit:
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
23
+
89
+
24
+#define TYPE_PL011 "pl011"
90
+ err:
25
+#define PL011(obj) OBJECT_CHECK(PL011State, (obj), TYPE_PL011)
91
+ unlock_user(extra, extra_datap, 0);
26
+
92
+ return 1;
27
+/* This shares the same struct (and cast macro) as the base pl011 device */
28
+#define TYPE_PL011_LUMINARY "pl011_luminary"
29
+
30
+typedef struct PL011State {
31
+ SysBusDevice parent_obj;
32
+
33
+ MemoryRegion iomem;
34
+ uint32_t readbuff;
35
+ uint32_t flags;
36
+ uint32_t lcr;
37
+ uint32_t rsr;
38
+ uint32_t cr;
39
+ uint32_t dmacr;
40
+ uint32_t int_enabled;
41
+ uint32_t int_level;
42
+ uint32_t read_fifo[16];
43
+ uint32_t ilpr;
44
+ uint32_t ibrd;
45
+ uint32_t fbrd;
46
+ uint32_t ifl;
47
+ int read_pos;
48
+ int read_count;
49
+ int read_trigger;
50
+ CharBackend chr;
51
+ qemu_irq irq;
52
+ const unsigned char *id;
53
+} PL011State;
54
+
55
static inline DeviceState *pl011_create(hwaddr addr,
56
qemu_irq irq,
57
Chardev *chr)
58
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/hw/char/pl011.c
61
+++ b/hw/char/pl011.c
62
@@ -XXX,XX +XXX,XX @@
63
*/
64
65
#include "qemu/osdep.h"
66
+#include "hw/char/pl011.h"
67
#include "hw/sysbus.h"
68
#include "chardev/char-fe.h"
69
#include "qemu/log.h"
70
#include "trace.h"
71
72
-#define TYPE_PL011 "pl011"
73
-#define PL011(obj) OBJECT_CHECK(PL011State, (obj), TYPE_PL011)
74
-
75
-typedef struct PL011State {
76
- SysBusDevice parent_obj;
77
-
78
- MemoryRegion iomem;
79
- uint32_t readbuff;
80
- uint32_t flags;
81
- uint32_t lcr;
82
- uint32_t rsr;
83
- uint32_t cr;
84
- uint32_t dmacr;
85
- uint32_t int_enabled;
86
- uint32_t int_level;
87
- uint32_t read_fifo[16];
88
- uint32_t ilpr;
89
- uint32_t ibrd;
90
- uint32_t fbrd;
91
- uint32_t ifl;
92
- int read_pos;
93
- int read_count;
94
- int read_trigger;
95
- CharBackend chr;
96
- qemu_irq irq;
97
- const unsigned char *id;
98
-} PL011State;
99
-
100
#define PL011_INT_TX 0x20
101
#define PL011_INT_RX 0x10
102
103
@@ -XXX,XX +XXX,XX @@ static void pl011_luminary_init(Object *obj)
104
}
93
}
105
94
106
static const TypeInfo pl011_luminary_info = {
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
107
- .name = "pl011_luminary",
108
+ .name = TYPE_PL011_LUMINARY,
109
.parent = TYPE_PL011,
110
.instance_init = pl011_luminary_init,
111
};
112
--
96
--
113
2.20.1
97
2.25.1
114
115
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
There are lots of special cases within these insns. Split the
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
major argument decode/loading/saving into no_output (compares),
4
or records that are smaller than the header. We were silently
5
rd_is_dp, and rm_is_dp.
5
allowing these cases to pass, dropping the record.
6
6
7
We still need to special case argument load for compare (rd as
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
input, rm as zero) and vcvt fixed (rd as input+output), but lots
9
of special cases do disappear.
10
11
Now that we have a full switch at the beginning, hoist the ISA
12
checks from the code generation.
13
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-id: 20190215192302.27855-4-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
16
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
11
---
19
target/arm/translate.c | 227 ++++++++++++++++++++---------------------
12
linux-user/aarch64/signal.c | 5 ++++-
20
1 file changed, 111 insertions(+), 116 deletions(-)
13
1 file changed, 4 insertions(+), 1 deletion(-)
21
14
22
diff --git a/target/arm/translate.c b/target/arm/translate.c
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
23
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/translate.c
17
--- a/linux-user/aarch64/signal.c
25
+++ b/target/arm/translate.c
18
+++ b/linux-user/aarch64/signal.c
26
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
27
}
20
break;
28
} else {
21
29
/* data processing */
22
case TARGET_SVE_MAGIC:
30
+ bool rd_is_dp = dp;
23
+ if (sve || size < sizeof(struct target_sve_context)) {
31
+ bool rm_is_dp = dp;
24
+ goto err;
32
+ bool no_output = false;
33
+
34
/* The opcode is in bits 23, 21, 20 and 6. */
35
op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
36
- if (dp) {
37
- if (op == 15) {
38
- /* rn is opcode */
39
- rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
40
- } else {
41
- /* rn is register number */
42
- VFP_DREG_N(rn, insn);
43
- }
44
+ rn = VFP_SREG_N(insn);
45
46
- if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
47
- ((rn & 0x1e) == 0x6))) {
48
- /* Integer or single/half precision destination. */
49
- rd = VFP_SREG_D(insn);
50
- } else {
51
- VFP_DREG_D(rd, insn);
52
- }
53
- if (op == 15 &&
54
- (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
55
- ((rn & 0x1e) == 0x4))) {
56
- /* VCVT from int or half precision is always from S reg
57
- * regardless of dp bit. VCVT with immediate frac_bits
58
- * has same format as SREG_M.
59
+ if (op == 15) {
60
+ /* rn is opcode, encoded as per VFP_SREG_N. */
61
+ switch (rn) {
62
+ case 0x00: /* vmov */
63
+ case 0x01: /* vabs */
64
+ case 0x02: /* vneg */
65
+ case 0x03: /* vsqrt */
66
+ break;
67
+
68
+ case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
69
+ case 0x05: /* vcvtt.f64.f16, vcvtt.f32.f16 */
70
+ /*
71
+ * VCVTB, VCVTT: only present with the halfprec extension
72
+ * UNPREDICTABLE if bit 8 is set prior to ARMv8
73
+ * (we choose to UNDEF)
74
*/
75
- rm = VFP_SREG_M(insn);
76
- } else {
77
- VFP_DREG_M(rm, insn);
78
+ if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
79
+ !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
80
+ return 1;
81
+ }
82
+ rm_is_dp = false;
83
+ break;
84
+ case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
85
+ case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
86
+ if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
87
+ !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
88
+ return 1;
89
+ }
90
+ rd_is_dp = false;
91
+ break;
92
+
93
+ case 0x08: case 0x0a: /* vcmp, vcmpz */
94
+ case 0x09: case 0x0b: /* vcmpe, vcmpez */
95
+ no_output = true;
96
+ break;
97
+
98
+ case 0x0c: /* vrintr */
99
+ case 0x0d: /* vrintz */
100
+ case 0x0e: /* vrintx */
101
+ break;
102
+
103
+ case 0x0f: /* vcvt double<->single */
104
+ rd_is_dp = !dp;
105
+ break;
106
+
107
+ case 0x10: /* vcvt.fxx.u32 */
108
+ case 0x11: /* vcvt.fxx.s32 */
109
+ rm_is_dp = false;
110
+ break;
111
+ case 0x18: /* vcvtr.u32.fxx */
112
+ case 0x19: /* vcvtz.u32.fxx */
113
+ case 0x1a: /* vcvtr.s32.fxx */
114
+ case 0x1b: /* vcvtz.s32.fxx */
115
+ rd_is_dp = false;
116
+ break;
117
+
118
+ case 0x14: /* vcvt fp <-> fixed */
119
+ case 0x15:
120
+ case 0x16:
121
+ case 0x17:
122
+ case 0x1c:
123
+ case 0x1d:
124
+ case 0x1e:
125
+ case 0x1f:
126
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
127
+ return 1;
128
+ }
129
+ /* Immediate frac_bits has same format as SREG_M. */
130
+ rm_is_dp = false;
131
+ break;
132
+
133
+ default:
134
+ return 1;
135
}
136
+ } else if (dp) {
137
+ /* rn is register number */
138
+ VFP_DREG_N(rn, insn);
139
+ }
25
+ }
140
+
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
141
+ if (rd_is_dp) {
27
vq = sve_vq(env);
142
+ VFP_DREG_D(rd, insn);
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
143
+ } else {
29
- if (!sve && size == sve_size) {
144
+ rd = VFP_SREG_D(insn);
30
+ if (size == sve_size) {
145
+ }
31
sve = (struct target_sve_context *)ctx;
146
+ if (rm_is_dp) {
147
+ VFP_DREG_M(rm, insn);
148
} else {
149
- rn = VFP_SREG_N(insn);
150
- if (op == 15 && rn == 15) {
151
- /* Double precision destination. */
152
- VFP_DREG_D(rd, insn);
153
- } else {
154
- rd = VFP_SREG_D(insn);
155
- }
156
- /* NB that we implicitly rely on the encoding for the frac_bits
157
- * in VCVT of fixed to float being the same as that of an SREG_M
158
- */
159
rm = VFP_SREG_M(insn);
160
}
161
162
veclen = s->vec_len;
163
- if (op == 15 && rn > 3)
164
+ if (op == 15 && rn > 3) {
165
veclen = 0;
166
+ }
167
168
/* Shut up compiler warnings. */
169
delta_m = 0;
170
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
171
/* Load the initial operands. */
172
if (op == 15) {
173
switch (rn) {
174
- case 16:
175
- case 17:
176
- /* Integer source */
177
- gen_mov_F0_vreg(0, rm);
178
- break;
179
- case 8:
180
- case 9:
181
- /* Compare */
182
+ case 0x08: case 0x09: /* Compare */
183
gen_mov_F0_vreg(dp, rd);
184
gen_mov_F1_vreg(dp, rm);
185
break;
186
- case 10:
187
- case 11:
188
- /* Compare with zero */
189
+ case 0x0a: case 0x0b: /* Compare with zero */
190
gen_mov_F0_vreg(dp, rd);
191
gen_vfp_F1_ld0(dp);
192
break;
193
- case 20:
194
- case 21:
195
- case 22:
196
- case 23:
197
- case 28:
198
- case 29:
199
- case 30:
200
- case 31:
201
+ case 0x14: /* vcvt fp <-> fixed */
202
+ case 0x15:
203
+ case 0x16:
204
+ case 0x17:
205
+ case 0x1c:
206
+ case 0x1d:
207
+ case 0x1e:
208
+ case 0x1f:
209
/* Source and destination the same. */
210
gen_mov_F0_vreg(dp, rd);
211
break;
212
- case 4:
213
- case 5:
214
- case 6:
215
- case 7:
216
- /* VCVTB, VCVTT: only present with the halfprec extension
217
- * UNPREDICTABLE if bit 8 is set prior to ARMv8
218
- * (we choose to UNDEF)
219
- */
220
- if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
221
- !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
222
- return 1;
223
- }
224
- if (!extract32(rn, 1, 1)) {
225
- /* Half precision source. */
226
- gen_mov_F0_vreg(0, rm);
227
- break;
228
- }
229
- /* Otherwise fall through */
230
default:
231
/* One source operand. */
232
- gen_mov_F0_vreg(dp, rm);
233
+ gen_mov_F0_vreg(rm_is_dp, rm);
234
break;
32
break;
235
}
33
}
236
} else {
237
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
238
break;
239
}
240
case 15: /* single<->double conversion */
241
- if (dp)
242
+ if (dp) {
243
gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
244
- else
245
+ } else {
246
gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
247
+ }
248
break;
249
case 16: /* fuito */
250
gen_vfp_uito(dp, 0);
251
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
252
gen_vfp_sito(dp, 0);
253
break;
254
case 20: /* fshto */
255
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
256
- return 1;
257
- }
258
gen_vfp_shto(dp, 16 - rm, 0);
259
break;
260
case 21: /* fslto */
261
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
262
- return 1;
263
- }
264
gen_vfp_slto(dp, 32 - rm, 0);
265
break;
266
case 22: /* fuhto */
267
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
268
- return 1;
269
- }
270
gen_vfp_uhto(dp, 16 - rm, 0);
271
break;
272
case 23: /* fulto */
273
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
274
- return 1;
275
- }
276
gen_vfp_ulto(dp, 32 - rm, 0);
277
break;
278
case 24: /* ftoui */
279
@@ -XXX,XX +XXX,XX @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
280
gen_vfp_tosiz(dp, 0);
281
break;
282
case 28: /* ftosh */
283
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
284
- return 1;
285
- }
286
gen_vfp_tosh(dp, 16 - rm, 0);
287
break;
288
case 29: /* ftosl */
289
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
290
- return 1;
291
- }
292
gen_vfp_tosl(dp, 32 - rm, 0);
293
break;
294
case 30: /* ftouh */
295
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
296
- return 1;
297
- }
298
gen_vfp_touh(dp, 16 - rm, 0);
299
break;
300
case 31: /* ftoul */
301
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
302
- return 1;
303
- }
304
gen_vfp_toul(dp, 32 - rm, 0);
305
break;
306
default: /* undefined */
307
- return 1;
308
+ g_assert_not_reached();
309
}
310
break;
311
default: /* undefined */
312
return 1;
313
}
314
315
- /* Write back the result. */
316
- if (op == 15 && (rn >= 8 && rn <= 11)) {
317
- /* Comparison, do nothing. */
318
- } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
319
- (rn & 0x1e) == 0x6)) {
320
- /* VCVT double to int: always integer result.
321
- * VCVT double to half precision is always a single
322
- * precision result.
323
- */
324
- gen_mov_vreg_F0(0, rd);
325
- } else if (op == 15 && rn == 15) {
326
- /* conversion */
327
- gen_mov_vreg_F0(!dp, rd);
328
- } else {
329
- gen_mov_vreg_F0(dp, rd);
330
+ /* Write back the result, if any. */
331
+ if (!no_output) {
332
+ gen_mov_vreg_F0(rd_is_dp, rd);
333
}
334
335
/* break out of the loop if we have finished */
336
- if (veclen == 0)
337
+ if (veclen == 0) {
338
break;
339
+ }
340
341
if (op == 15 && delta_m == 0) {
342
/* single source one-many */
343
--
34
--
344
2.20.1
35
2.25.1
345
346
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/signal.c | 3 +++
9
1 file changed, 3 insertions(+)
10
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/signal.c
14
+++ b/linux-user/aarch64/signal.c
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
16
__get_user(extra_size,
17
&((struct target_extra_context *)ctx)->size);
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
19
+ if (!extra) {
20
+ return 1;
21
+ }
22
break;
23
24
default:
25
--
26
2.25.1
diff view generated by jsdifflib
1
The Musca board puts its SRAM and flash behind TrustZone
1
From: Richard Henderson <richard.henderson@linaro.org>
2
Memory Protection Controllers (MPCs). Each MPC sits between
3
the CPU and the RAM/flash, and also has a set of memory mapped
4
control registers. Wire up the MPCs, and the memory behind them.
5
For the moment we implement the flash as simple ROM, which
6
cannot be reprogrammed by the guest.
7
2
3
Move the checks out of the parsing loop and into the
4
restore function. This more closely mirrors the code
5
structure in the kernel, and is slightly clearer.
6
7
Reject rather than silently skip incorrect VL and SVE record sizes,
8
bringing our checks in to line with those the kernel does.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
14
---
11
hw/arm/musca.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++---
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
12
1 file changed, 147 insertions(+), 8 deletions(-)
16
1 file changed, 35 insertions(+), 16 deletions(-)
13
17
14
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/arm/musca.c
20
--- a/linux-user/aarch64/signal.c
17
+++ b/hw/arm/musca.c
21
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
19
#include "hw/arm/armsse.h"
23
}
20
#include "hw/boards.h"
21
#include "hw/core/split-irq.h"
22
+#include "hw/misc/tz-mpc.h"
23
#include "hw/misc/tz-ppc.h"
24
#include "hw/misc/unimp.h"
25
26
#define MUSCA_NUMIRQ_MAX 96
27
#define MUSCA_PPC_MAX 3
28
+#define MUSCA_MPC_MAX 5
29
+
30
+typedef struct MPCInfo MPCInfo;
31
32
typedef enum MuscaType {
33
MUSCA_A,
34
@@ -XXX,XX +XXX,XX @@ typedef struct {
35
uint32_t init_svtor;
36
int sram_addr_width;
37
int num_irqs;
38
+ const MPCInfo *mpc_info;
39
+ int num_mpcs;
40
} MuscaMachineClass;
41
42
typedef struct {
43
MachineState parent;
44
45
ARMSSE sse;
46
+ /* RAM and flash */
47
+ MemoryRegion ram[MUSCA_MPC_MAX];
48
SplitIRQ cpu_irq_splitter[MUSCA_NUMIRQ_MAX];
49
SplitIRQ sec_resp_splitter;
50
TZPPC ppc[MUSCA_PPC_MAX];
51
MemoryRegion container;
52
UnimplementedDeviceState eflash[2];
53
UnimplementedDeviceState qspi;
54
- UnimplementedDeviceState mpc[5];
55
+ TZMPC mpc[MUSCA_MPC_MAX];
56
UnimplementedDeviceState mhu[2];
57
UnimplementedDeviceState pwm[3];
58
UnimplementedDeviceState i2s;
59
@@ -XXX,XX +XXX,XX @@ typedef struct {
60
UnimplementedDeviceState pvt;
61
UnimplementedDeviceState sdio;
62
UnimplementedDeviceState gpio;
63
+ UnimplementedDeviceState cryptoisland;
64
} MuscaMachineState;
65
66
#define TYPE_MUSCA_MACHINE "musca"
67
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_unimp_dev(MuscaMachineState *mms,
68
return sysbus_mmio_get_region(SYS_BUS_DEVICE(uds), 0);
69
}
24
}
70
25
71
+typedef enum MPCInfoType {
26
-static void target_restore_sve_record(CPUARMState *env,
72
+ MPC_RAM,
27
- struct target_sve_context *sve, int vq)
73
+ MPC_ROM,
28
+static bool target_restore_sve_record(CPUARMState *env,
74
+ MPC_CRYPTOISLAND,
29
+ struct target_sve_context *sve,
75
+} MPCInfoType;
30
+ int size)
76
+
31
{
77
+struct MPCInfo {
32
- int i, j;
78
+ const char *name;
33
+ int i, j, vl, vq;
79
+ hwaddr addr;
34
80
+ hwaddr size;
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
81
+ MPCInfoType type;
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
82
+};
37
+ return false;
83
+
84
+/* Order of the MPCs here must match the order of the bits in SECMPCINTSTATUS */
85
+static const MPCInfo a_mpc_info[] = { {
86
+ .name = "qspi",
87
+ .type = MPC_ROM,
88
+ .addr = 0x00200000,
89
+ .size = 0x00800000,
90
+ }, {
91
+ .name = "sram",
92
+ .type = MPC_RAM,
93
+ .addr = 0x00000000,
94
+ .size = 0x00200000,
95
+ }
96
+};
97
+
98
+static const MPCInfo b1_mpc_info[] = { {
99
+ .name = "qspi",
100
+ .type = MPC_ROM,
101
+ .addr = 0x00000000,
102
+ .size = 0x02000000,
103
+ }, {
104
+ .name = "sram",
105
+ .type = MPC_RAM,
106
+ .addr = 0x0a400000,
107
+ .size = 0x00080000,
108
+ }, {
109
+ .name = "eflash0",
110
+ .type = MPC_ROM,
111
+ .addr = 0x0a000000,
112
+ .size = 0x00200000,
113
+ }, {
114
+ .name = "eflash1",
115
+ .type = MPC_ROM,
116
+ .addr = 0x0a200000,
117
+ .size = 0x00200000,
118
+ }, {
119
+ .name = "cryptoisland",
120
+ .type = MPC_CRYPTOISLAND,
121
+ .addr = 0x0a000000,
122
+ .size = 0x00200000,
123
+ }
124
+};
125
+
126
+static MemoryRegion *make_mpc(MuscaMachineState *mms, void *opaque,
127
+ const char *name, hwaddr size)
128
+{
129
+ /*
130
+ * Create an MPC and the RAM or flash behind it.
131
+ * MPC 0: eFlash 0
132
+ * MPC 1: eFlash 1
133
+ * MPC 2: SRAM
134
+ * MPC 3: QSPI flash
135
+ * MPC 4: CryptoIsland
136
+ * For now we implement the flash regions as ROM (ie not programmable)
137
+ * (with their control interface memory regions being unimplemented
138
+ * stubs behind the PPCs).
139
+ * The whole CryptoIsland region behind its MPC is an unimplemented stub.
140
+ */
141
+ MuscaMachineClass *mmc = MUSCA_MACHINE_GET_CLASS(mms);
142
+ TZMPC *mpc = opaque;
143
+ int i = mpc - &mms->mpc[0];
144
+ MemoryRegion *downstream;
145
+ MemoryRegion *upstream;
146
+ UnimplementedDeviceState *uds;
147
+ char *mpcname;
148
+ const MPCInfo *mpcinfo = mmc->mpc_info;
149
+
150
+ mpcname = g_strdup_printf("%s-mpc", mpcinfo[i].name);
151
+
152
+ switch (mpcinfo[i].type) {
153
+ case MPC_ROM:
154
+ downstream = &mms->ram[i];
155
+ memory_region_init_rom(downstream, NULL, mpcinfo[i].name,
156
+ mpcinfo[i].size, &error_fatal);
157
+ break;
158
+ case MPC_RAM:
159
+ downstream = &mms->ram[i];
160
+ memory_region_init_ram(downstream, NULL, mpcinfo[i].name,
161
+ mpcinfo[i].size, &error_fatal);
162
+ break;
163
+ case MPC_CRYPTOISLAND:
164
+ /* We don't implement the CryptoIsland yet */
165
+ uds = &mms->cryptoisland;
166
+ sysbus_init_child_obj(OBJECT(mms), name, uds,
167
+ sizeof(UnimplementedDeviceState),
168
+ TYPE_UNIMPLEMENTED_DEVICE);
169
+ qdev_prop_set_string(DEVICE(uds), "name", mpcinfo[i].name);
170
+ qdev_prop_set_uint64(DEVICE(uds), "size", mpcinfo[i].size);
171
+ object_property_set_bool(OBJECT(uds), true, "realized", &error_fatal);
172
+ downstream = sysbus_mmio_get_region(SYS_BUS_DEVICE(uds), 0);
173
+ break;
174
+ default:
175
+ g_assert_not_reached();
176
+ }
38
+ }
177
+
39
+
178
+ sysbus_init_child_obj(OBJECT(mms), mpcname, mpc, sizeof(mms->mpc[0]),
40
+ __get_user(vl, &sve->vl);
179
+ TYPE_TZ_MPC);
41
+ vq = sve_vq(env);
180
+ object_property_set_link(OBJECT(mpc), OBJECT(downstream),
181
+ "downstream", &error_fatal);
182
+ object_property_set_bool(OBJECT(mpc), true, "realized", &error_fatal);
183
+ /* Map the upstream end of the MPC into system memory */
184
+ upstream = sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 1);
185
+ memory_region_add_subregion(get_system_memory(), mpcinfo[i].addr, upstream);
186
+ /* and connect its interrupt to the SSE-200 */
187
+ qdev_connect_gpio_out_named(DEVICE(mpc), "irq", 0,
188
+ qdev_get_gpio_in_named(DEVICE(&mms->sse),
189
+ "mpcexp_status", i));
190
+
42
+
191
+ g_free(mpcname);
43
+ /* Reject mismatched VL. */
192
+ /* Return the register interface MR for our caller to map behind the PPC */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
193
+ return sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 0);
45
+ return false;
194
+}
46
+ }
195
+
47
+
196
static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
197
const char *name, hwaddr size)
49
+ if (size <= sizeof(*sve)) {
198
{
50
+ return true;
199
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
51
+ }
200
{ "pwm1", make_unimp_dev, &mms->pwm[1], 0xe000, 0x1000 },
52
+
201
{ "pwm2", make_unimp_dev, &mms->pwm[2], 0xf000, 0x1000 },
53
+ /* Reject non-empty but incomplete record. */
202
{ "gpio", make_unimp_dev, &mms->gpio, 0x10000, 0x1000 },
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
203
- { "mpc0", make_unimp_dev, &mms->mpc[0], 0x12000, 0x1000 },
55
+ return false;
204
- { "mpc1", make_unimp_dev, &mms->mpc[1], 0x13000, 0x1000 },
56
+ }
205
+ { "mpc0", make_mpc, &mms->mpc[0], 0x12000, 0x1000 },
57
+
206
+ { "mpc1", make_mpc, &mms->mpc[1], 0x13000, 0x1000 },
58
+ /*
207
};
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
208
60
* at a subsequent address. This corresponds to a little-endian load
209
memory_region_init(container, OBJECT(mms), "musca-device-container", size);
61
* of our 64-bit hunks.
210
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
62
*/
211
int i;
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
212
64
}
213
assert(mmc->num_irqs <= MUSCA_NUMIRQ_MAX);
65
}
214
+ assert(mmc->num_mpcs <= MUSCA_MPC_MAX);
66
}
215
67
+ return true;
216
if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) {
217
error_report("This board can only be used with CPU %s",
218
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
219
{ "eflash1", make_unimp_dev, &mms->eflash[1],
220
0x52500000, 0x1000 },
221
{ "qspi", make_unimp_dev, &mms->qspi, 0x42800000, 0x100000 },
222
- { "mpc0", make_unimp_dev, &mms->mpc[0], 0x52000000, 0x1000 },
223
- { "mpc1", make_unimp_dev, &mms->mpc[1], 0x52100000, 0x1000 },
224
- { "mpc2", make_unimp_dev, &mms->mpc[2], 0x52200000, 0x1000 },
225
- { "mpc3", make_unimp_dev, &mms->mpc[3], 0x52300000, 0x1000 },
226
+ { "mpc0", make_mpc, &mms->mpc[0], 0x52000000, 0x1000 },
227
+ { "mpc1", make_mpc, &mms->mpc[1], 0x52100000, 0x1000 },
228
+ { "mpc2", make_mpc, &mms->mpc[2], 0x52200000, 0x1000 },
229
+ { "mpc3", make_mpc, &mms->mpc[3], 0x52300000, 0x1000 },
230
{ "mhu0", make_unimp_dev, &mms->mhu[0], 0x42600000, 0x100000 },
231
{ "mhu1", make_unimp_dev, &mms->mhu[1], 0x42700000, 0x100000 },
232
{ }, /* port 9: unused */
233
@@ -XXX,XX +XXX,XX @@ static void musca_init(MachineState *machine)
234
{ }, /* port 11: unused */
235
{ }, /* port 12: unused */
236
{ }, /* port 13: unused */
237
- { "mpc4", make_unimp_dev, &mms->mpc[4], 0x52e00000, 0x1000 },
238
+ { "mpc4", make_mpc, &mms->mpc[4], 0x52e00000, 0x1000 },
239
},
240
}, {
241
.name = "apb_ppcexp1",
242
@@ -XXX,XX +XXX,XX @@ static void musca_a_class_init(ObjectClass *oc, void *data)
243
mmc->init_svtor = 0x10200000;
244
mmc->sram_addr_width = 15;
245
mmc->num_irqs = 64;
246
+ mmc->mpc_info = a_mpc_info;
247
+ mmc->num_mpcs = ARRAY_SIZE(a_mpc_info);
248
}
68
}
249
69
250
static void musca_b1_class_init(ObjectClass *oc, void *data)
70
static int target_restore_sigframe(CPUARMState *env,
251
@@ -XXX,XX +XXX,XX @@ static void musca_b1_class_init(ObjectClass *oc, void *data)
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
252
mmc->init_svtor = 0x10000000;
72
struct target_sve_context *sve = NULL;
253
mmc->sram_addr_width = 17;
73
uint64_t extra_datap = 0;
254
mmc->num_irqs = 96;
74
bool used_extra = false;
255
+ mmc->mpc_info = b1_mpc_info;
75
- int vq = 0, sve_size = 0;
256
+ mmc->num_mpcs = ARRAY_SIZE(b1_mpc_info);
76
+ int sve_size = 0;
257
}
77
258
78
target_restore_general_frame(env, sf);
259
static const TypeInfo musca_info = {
79
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
81
if (sve || size < sizeof(struct target_sve_context)) {
82
goto err;
83
}
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
85
- vq = sve_vq(env);
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
87
- if (size == sve_size) {
88
- sve = (struct target_sve_context *)ctx;
89
- break;
90
- }
91
- }
92
- goto err;
93
+ sve = (struct target_sve_context *)ctx;
94
+ sve_size = size;
95
+ break;
96
97
case TARGET_EXTRA_MAGIC:
98
if (extra || size != sizeof(struct target_extra_context)) {
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
100
}
101
102
/* SVE data, if present, overwrites FPSIMD data. */
103
- if (sve) {
104
- target_restore_sve_record(env, sve, vq);
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
106
+ goto err;
107
}
108
unlock_user(extra, extra_datap, 0);
109
return 0;
260
--
110
--
261
2.20.1
111
2.25.1
262
263
diff view generated by jsdifflib
1
Convert the debug printing in the PL031 device to use trace events,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
and augment it to cover the interesting parts of device operation.
3
2
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
Restore SM and ZA state according to the records present on return.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
hw/timer/pl031.c | 55 +++++++++++++++++++++++--------------------
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
9
hw/timer/trace-events | 6 +++++
12
1 file changed, 154 insertions(+), 13 deletions(-)
10
2 files changed, 36 insertions(+), 25 deletions(-)
11
13
12
diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/timer/pl031.c
16
--- a/linux-user/aarch64/signal.c
15
+++ b/hw/timer/pl031.c
17
+++ b/linux-user/aarch64/signal.c
16
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
17
#include "sysemu/sysemu.h"
19
18
#include "qemu/cutils.h"
20
#define TARGET_SVE_SIG_FLAG_SM 1
19
#include "qemu/log.h"
21
20
-
22
+#define TARGET_ZA_MAGIC 0x54366345
21
-//#define DEBUG_PL031
23
+
22
-
24
+struct target_za_context {
23
-#ifdef DEBUG_PL031
25
+ struct target_aarch64_ctx head;
24
-#define DPRINTF(fmt, ...) \
26
+ uint16_t vl;
25
-do { printf("pl031: " fmt , ## __VA_ARGS__); } while (0)
27
+ uint16_t reserved[3];
26
-#else
28
+ /* The actual ZA data immediately follows. */
27
-#define DPRINTF(fmt, ...) do {} while(0)
29
+};
28
-#endif
30
+
29
+#include "trace.h"
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
30
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
31
#define RTC_DR 0x00 /* Data read register */
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
32
#define RTC_MR 0x04 /* Match register */
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
33
@@ -XXX,XX +XXX,XX @@ static const unsigned char pl031_id[] = {
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
34
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
35
static void pl031_update(PL031State *s)
37
+
36
{
38
struct target_rt_sigframe {
37
- qemu_set_irq(s->irq, s->is & s->im);
39
struct target_siginfo info;
38
+ uint32_t flags = s->is & s->im;
40
struct target_ucontext uc;
39
+
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
40
+ trace_pl031_irq_state(flags);
41
+ qemu_set_irq(s->irq, flags);
42
}
42
}
43
43
44
static void pl031_interrupt(void * opaque)
44
static void target_setup_sve_record(struct target_sve_context *sve,
45
@@ -XXX,XX +XXX,XX @@ static void pl031_interrupt(void * opaque)
45
- CPUARMState *env, int vq, int size)
46
PL031State *s = (PL031State *)opaque;
46
+ CPUARMState *env, int size)
47
47
{
48
s->is = 1;
48
- int i, j;
49
- DPRINTF("Alarm raised\n");
49
+ int i, j, vq = sve_vq(env);
50
+ trace_pl031_alarm_raised();
50
51
pl031_update(s);
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
52
}
55
}
53
56
54
@@ -XXX,XX +XXX,XX @@ static void pl031_set_alarm(PL031State *s)
57
+static void target_setup_za_record(struct target_za_context *za,
55
/* The timer wraps around. This subtraction also wraps in the same way,
58
+ CPUARMState *env, int size)
56
and gives correct results when alarm < now_ticks. */
59
+{
57
ticks = s->mr - pl031_get_count(s);
60
+ int vq = sme_vq(env);
58
- DPRINTF("Alarm set in %ud ticks\n", ticks);
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
59
+ trace_pl031_set_alarm(ticks);
62
+ int i, j;
60
if (ticks == 0) {
63
+
61
timer_del(s->timer);
64
+ memset(za, 0, sizeof(*za));
62
pl031_interrupt(s);
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
63
@@ -XXX,XX +XXX,XX @@ static uint64_t pl031_read(void *opaque, hwaddr offset,
66
+ __put_user(size, &za->head.size);
64
unsigned size)
67
+ __put_user(vl, &za->vl);
65
{
68
+
66
PL031State *s = (PL031State *)opaque;
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
67
-
70
+ return;
68
- if (offset >= 0xfe0 && offset < 0x1000)
71
+ }
69
- return pl031_id[(offset - 0xfe0) >> 2];
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
70
+ uint64_t r;
73
+
71
74
+ /*
72
switch (offset) {
75
+ * Note that ZA vectors are stored as a byte stream,
73
case RTC_DR:
76
+ * with each byte element at a subsequent address.
74
- return pl031_get_count(s);
77
+ */
75
+ r = pl031_get_count(s);
78
+ for (i = 0; i < vl; ++i) {
76
+ break;
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
77
case RTC_MR:
80
+ for (j = 0; j < vq * 2; ++j) {
78
- return s->mr;
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
79
+ r = s->mr;
82
+ }
80
+ break;
83
+ }
81
case RTC_IMSC:
84
+}
82
- return s->im;
85
+
83
+ r = s->im;
86
static void target_restore_general_frame(CPUARMState *env,
84
+ break;
87
struct target_rt_sigframe *sf)
85
case RTC_RIS:
88
{
86
- return s->is;
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
87
+ r = s->is;
90
88
+ break;
91
static bool target_restore_sve_record(CPUARMState *env,
89
case RTC_LR:
92
struct target_sve_context *sve,
90
- return s->lr;
93
- int size)
91
+ r = s->lr;
94
+ int size, int *svcr)
92
+ break;
95
{
93
case RTC_CR:
96
- int i, j, vl, vq;
94
/* RTC is permanently enabled. */
97
+ int i, j, vl, vq, flags;
95
- return 1;
98
+ bool sm;
96
+ r = 1;
99
97
+ break;
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
98
case RTC_MIS:
101
+ __get_user(vl, &sve->vl);
99
- return s->is & s->im;
102
+ __get_user(flags, &sve->flags);
100
+ r = s->is & s->im;
103
+
101
+ break;
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
102
+ case 0xfe0 ... 0xfff:
105
+
103
+ r = pl031_id[(offset - 0xfe0) >> 2];
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
104
+ break;
107
+ if (sm
105
case RTC_ICR:
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
106
qemu_log_mask(LOG_GUEST_ERROR,
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
107
"pl031: read of write-only register at offset 0x%x\n",
110
return false;
108
(int)offset);
111
}
109
+ r = 0;
112
110
break;
113
- __get_user(vl, &sve->vl);
111
default:
114
- vq = sve_vq(env);
112
qemu_log_mask(LOG_GUEST_ERROR,
115
+ /*
113
"pl031_read: Bad offset 0x%x\n", (int)offset);
116
+ * Note that we cannot use sve_vq() because that depends on the
114
+ r = 0;
117
+ * current setting of PSTATE.SM, not the state to be restored.
115
break;
118
+ */
116
}
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
117
120
118
- return 0;
121
/* Reject mismatched VL. */
119
+ trace_pl031_read(offset, r);
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
120
+ return r;
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
133
return true;
121
}
134
}
122
135
123
static void pl031_write(void * opaque, hwaddr offset,
136
+static bool target_restore_za_record(CPUARMState *env,
124
@@ -XXX,XX +XXX,XX @@ static void pl031_write(void * opaque, hwaddr offset,
137
+ struct target_za_context *za,
125
{
138
+ int size, int *svcr)
126
PL031State *s = (PL031State *)opaque;
139
+{
127
140
+ int i, j, vl, vq;
128
+ trace_pl031_write(offset, value);
141
+
129
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
130
switch (offset) {
143
+ return false;
131
case RTC_LR:
144
+ }
132
@@ -XXX,XX +XXX,XX @@ static void pl031_write(void * opaque, hwaddr offset,
145
+
133
break;
146
+ __get_user(vl, &za->vl);
134
case RTC_IMSC:
147
+ vq = sme_vq(env);
135
s->im = value & 1;
148
+
136
- DPRINTF("Interrupt mask %d\n", s->im);
149
+ /* Reject mismatched VL. */
137
pl031_update(s);
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
138
break;
151
+ return false;
139
case RTC_ICR:
152
+ }
140
@@ -XXX,XX +XXX,XX @@ static void pl031_write(void * opaque, hwaddr offset,
153
+
141
cleared when bit 0 of the written value is set. However the
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
142
arm926e documentation (DDI0287B) states that the interrupt is
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
143
cleared when any value is written. */
156
+ return true;
144
- DPRINTF("Interrupt cleared");
157
+ }
145
s->is = 0;
158
+
146
pl031_update(s);
159
+ /* Reject non-empty but incomplete record. */
147
break;
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
148
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
161
+ return false;
149
index XXXXXXX..XXXXXXX 100644
162
+ }
150
--- a/hw/timer/trace-events
163
+
151
+++ b/hw/timer/trace-events
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
152
@@ -XXX,XX +XXX,XX @@ xlnx_zynqmp_rtc_gettime(int year, int month, int day, int hour, int min, int sec
165
+
153
nrf51_timer_read(uint64_t addr, uint32_t value, unsigned size) "read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
166
+ for (i = 0; i < vl; ++i) {
154
nrf51_timer_write(uint64_t addr, uint32_t value, unsigned size) "write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
155
168
+ for (j = 0; j < vq * 2; ++j) {
156
+# hw/timer/pl031.c
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
157
+pl031_irq_state(int level) "irq state %d"
170
+ }
158
+pl031_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
171
+ }
159
+pl031_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
172
+ return true;
160
+pl031_alarm_raised(void) "alarm raised"
173
+}
161
+pl031_set_alarm(uint32_t ticks) "alarm set for %u ticks"
174
+
175
static int target_restore_sigframe(CPUARMState *env,
176
struct target_rt_sigframe *sf)
177
{
178
struct target_aarch64_ctx *ctx, *extra = NULL;
179
struct target_fpsimd_context *fpsimd = NULL;
180
struct target_sve_context *sve = NULL;
181
+ struct target_za_context *za = NULL;
182
uint64_t extra_datap = 0;
183
bool used_extra = false;
184
int sve_size = 0;
185
+ int za_size = 0;
186
+ int svcr = 0;
187
188
target_restore_general_frame(env, sf);
189
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
191
sve_size = size;
192
break;
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
162
--
288
--
163
2.20.1
289
2.25.1
164
165
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Add "sve" to the sve prctl functions, to distinguish
4
them from the coming "sme" prctls with similar names.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 8 ++++----
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@
20
#ifndef AARCH64_TARGET_PRCTL_H
21
#define AARCH64_TARGET_PRCTL_H
22
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
{
26
ARMCPU *cpu = env_archcpu(env);
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
29
}
30
return -TARGET_EINVAL;
31
}
32
-#define do_prctl_get_vl do_prctl_get_vl
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/linux-user/syscall.c
52
+++ b/linux-user/syscall.c
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
54
#ifndef do_prctl_set_fp_mode
55
#define do_prctl_set_fp_mode do_prctl_inval1
56
#endif
57
-#ifndef do_prctl_get_vl
58
-#define do_prctl_get_vl do_prctl_inval0
59
+#ifndef do_prctl_sve_get_vl
60
+#define do_prctl_sve_get_vl do_prctl_inval0
61
#endif
62
-#ifndef do_prctl_set_vl
63
-#define do_prctl_set_vl do_prctl_inval1
64
+#ifndef do_prctl_sve_set_vl
65
+#define do_prctl_sve_set_vl do_prctl_inval1
66
#endif
67
#ifndef do_prctl_reset_keys
68
#define do_prctl_reset_keys do_prctl_inval1
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
case PR_SET_FP_MODE:
71
return do_prctl_set_fp_mode(env, arg2);
72
case PR_SVE_GET_VL:
73
- return do_prctl_get_vl(env);
74
+ return do_prctl_sve_get_vl(env);
75
case PR_SVE_SET_VL:
76
- return do_prctl_set_vl(env, arg2);
77
+ return do_prctl_sve_set_vl(env, arg2);
78
case PR_PAC_RESET_KEYS:
79
if (arg3 || arg4 || arg5) {
80
return -TARGET_EINVAL;
81
--
82
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
These prctl set the Streaming SVE vector length, which may
4
be completely different from the Normal SVE vector length.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
12
linux-user/syscall.c | 16 +++++++++
13
2 files changed, 70 insertions(+)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
{
21
ARMCPU *cpu = env_archcpu(env);
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
return sve_vq(env) * 16;
25
}
26
return -TARGET_EINVAL;
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
107
+#endif
108
109
#include "target_prctl.h"
110
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
112
#ifndef do_prctl_set_unalign
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
135
--
136
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/cpu.c | 7 +++----
11
1 file changed, 3 insertions(+), 4 deletions(-)
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
18
/* and to the FP/Neon instructions */
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
20
CPACR_EL1, FPEN, 3);
21
- /* and to the SVE instructions */
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
- CPACR_EL1, ZEN, 3);
24
- /* with reasonable vector length */
25
+ /* and to the SVE instructions, with default vector length */
26
if (cpu_isar_feature(aa64_sve, cpu)) {
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
+ CPACR_EL1, ZEN, 3);
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
30
}
31
/*
32
--
33
2.25.1
diff view generated by jsdifflib
1
In commit 4b635cf7a95e501211 we added a QOM property to the ARMSSE
1
From: Richard Henderson <richard.henderson@linaro.org>
2
object, but forgot to add it to the documentation comment in the
3
header. Correct the omission.
4
2
5
Fixes: 4b635cf7a95e501211 ("hw/arm/armsse: Make SRAM bank size configurable")
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
---
9
include/hw/arm/armsse.h | 2 ++
10
target/arm/cpu.c | 11 +++++++++++
10
1 file changed, 2 insertions(+)
11
1 file changed, 11 insertions(+)
11
12
12
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/include/hw/arm/armsse.h
15
--- a/target/arm/cpu.c
15
+++ b/include/hw/arm/armsse.h
16
+++ b/target/arm/cpu.c
16
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
17
* being the same for both, to avoid having to have separate Property
18
CPACR_EL1, ZEN, 3);
18
* lists for different variants. This restriction can be relaxed later
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
19
* if necessary.)
20
}
20
+ * + QOM property "SRAM_ADDR_WIDTH" sets the number of bits used for the
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
21
+ * address of each SRAM bank (and thus the total amount of internal SRAM)
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
22
* + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0,
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
23
* which are wired to its NVIC lines 32 .. n+32
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
24
* + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for
25
+ CPACR_EL1, SMEN, 3);
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
29
+ SMCR, FA64, 1);
30
+ }
31
+ }
32
/*
33
* Enable 48-bit address space (TODO: take reserved_va into account).
34
* Enable TBI0 but not TBI1.
25
--
35
--
26
2.20.1
36
2.25.1
27
28
diff view generated by jsdifflib
1
The Musca boards have DAPLink firmware that sets the initial
1
From: Richard Henderson <richard.henderson@linaro.org>
2
secure VTOR value (the location of the vector table) differently
3
depending on the boot mode (from flash, from RAM, etc). Export
4
the init-svtor as a QOM property of the ARMSSE object so that
5
the board can change it.
6
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
include/hw/arm/armsse.h | 3 +++
8
linux-user/elfload.c | 20 ++++++++++++++++++++
12
hw/arm/armsse.c | 8 ++++----
9
1 file changed, 20 insertions(+)
13
2 files changed, 7 insertions(+), 4 deletions(-)
14
10
15
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/include/hw/arm/armsse.h
13
--- a/linux-user/elfload.c
18
+++ b/include/hw/arm/armsse.h
14
+++ b/linux-user/elfload.c
19
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ enum {
20
* if necessary.)
16
ARM_HWCAP2_A64_RNG = 1 << 16,
21
* + QOM property "SRAM_ADDR_WIDTH" sets the number of bits used for the
17
ARM_HWCAP2_A64_BTI = 1 << 17,
22
* address of each SRAM bank (and thus the total amount of internal SRAM)
18
ARM_HWCAP2_A64_MTE = 1 << 18,
23
+ * + QOM property "init-svtor" sets the initial value of the CPU SVTOR register
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
24
+ * (where it expects to load the PC and SP from the vector table on reset)
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
25
* + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0,
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
26
* which are wired to its NVIC lines 32 .. n+32
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
27
* + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
28
@@ -XXX,XX +XXX,XX @@ typedef struct ARMSSE {
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
29
uint32_t exp_numirq;
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
30
uint32_t mainclk_frq;
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
31
uint32_t sram_addr_width;
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
32
+ uint32_t init_svtor;
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
33
} ARMSSE;
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
34
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
35
typedef struct ARMSSEInfo ARMSSEInfo;
36
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/arm/armsse.c
39
+++ b/hw/arm/armsse.c
40
@@ -XXX,XX +XXX,XX @@ static void armsse_realize(DeviceState *dev, Error **errp)
41
* the INITSVTOR* registers before powering up the CPUs in any case,
42
* so the hardware's default value doesn't matter. QEMU doesn't emulate
43
* the control processor, so instead we behave in the way that the
44
- * firmware does. All boards currently known about have firmware that
45
- * sets the INITSVTOR0 and INITSVTOR1 registers to 0x10000000, like the
46
- * IoTKit default. We can make this more configurable if necessary.
47
+ * firmware does. The initial value is configurable by the board code
48
+ * to match whatever its firmware does.
49
*/
50
- qdev_prop_set_uint32(cpudev, "init-svtor", 0x10000000);
51
+ qdev_prop_set_uint32(cpudev, "init-svtor", s->init_svtor);
52
/*
53
* Start all CPUs except CPU0 powered down. In real hardware it is
54
* a configurable property of the SSE-200 which CPUs start powered up
55
@@ -XXX,XX +XXX,XX @@ static Property armsse_properties[] = {
56
DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64),
57
DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0),
58
DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15),
59
+ DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000),
60
DEFINE_PROP_END_OF_LIST()
61
};
31
};
62
32
33
#define ELF_HWCAP get_elf_hwcap()
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
39
+ ARM_HWCAP2_A64_SME_F32F32 |
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
63
--
49
--
64
2.20.1
50
2.25.1
65
66
diff view generated by jsdifflib