1
arm queue: big stuff here is my MVE codegen optimisation,
1
I don't have anything else queued up at the moment, so this is just
2
and Alex's Apple Silicon hvf support.
2
Richard's SME patches.
3
3
4
-- PMM
4
-- PMM
5
5
6
The following changes since commit 7adb961995a3744f51396502b33ad04a56a317c3:
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
7
7
8
Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20210916' into staging (2021-09-19 18:53:29 +0100)
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
9
9
10
are available in the Git repository at:
10
are available in the Git repository at:
11
11
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210920
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
13
13
14
for you to fetch changes up to 1dc5a60bfe406bc1122d68cbdefda38d23134b27:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
15
15
16
target/arm: Optimize MVE 1op-immediate insns (2021-09-20 14:18:01 +0100)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
17
17
18
----------------------------------------------------------------
18
----------------------------------------------------------------
19
target-arm queue:
19
target-arm:
20
* Optimize codegen for MVE when predication not active
20
* Implement SME emulation, for both system and linux-user
21
* hvf: Add Apple Silicon support
22
* hw/intc: Set GIC maintenance interrupt level to only 0 or 1
23
* Fix mishandling of MVE FPSCR.LTPSIZE reset for usermode emulator
24
* elf2dmp: Fix coverity nits
25
21
26
----------------------------------------------------------------
22
----------------------------------------------------------------
27
Alexander Graf (7):
23
Richard Henderson (45):
28
arm: Move PMC register definitions to internals.h
24
target/arm: Handle SME in aarch64_cpu_dump_state
29
hvf: Add execute to dirty log permission bitmap
25
target/arm: Add infrastructure for disas_sme
30
hvf: Introduce hvf_arch_init() callback
26
target/arm: Trap non-streaming usage when Streaming SVE is active
31
hvf: Add Apple Silicon support
27
target/arm: Mark ADR as non-streaming
32
hvf: arm: Implement PSCI handling
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
33
arm: Add Hypervisor.framework build target
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
34
hvf: arm: Add rudimentary PMC support
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
35
69
36
Peter Collingbourne (1):
70
docs/system/arm/emulation.rst | 4 +
37
arm/hvf: Add a WFI handler
71
linux-user/aarch64/target_cpu.h | 5 +-
38
72
linux-user/aarch64/target_prctl.h | 62 +-
39
Peter Maydell (18):
73
target/arm/cpu.h | 7 +
40
elf2dmp: Check curl_easy_setopt() return value
74
target/arm/helper-sme.h | 126 ++++
41
elf2dmp: Fail cleanly if PDB file specifies zero block_size
75
target/arm/helper-sve.h | 4 +
42
target/arm: Don't skip M-profile reset entirely in user mode
76
target/arm/helper.h | 18 +
43
target/arm: Always clear exclusive monitor on reset
77
target/arm/translate-a64.h | 45 ++
44
target/arm: Consolidate ifdef blocks in reset
78
target/arm/translate.h | 16 +
45
hvf: arm: Implement -cpu host
79
target/arm/sme-fa64.decode | 60 ++
46
target/arm: Avoid goto_tb if we're trying to exit to the main loop
80
target/arm/sme.decode | 88 +++
47
target/arm: Enforce that FPDSCR.LTPSIZE is 4 on inbound migration
81
target/arm/sve.decode | 41 +-
48
target/arm: Add TB flag for "MVE insns not predicated"
82
linux-user/aarch64/cpu_loop.c | 9 +
49
target/arm: Optimize MVE logic ops
83
linux-user/aarch64/signal.c | 243 ++++++--
50
target/arm: Optimize MVE arithmetic ops
84
linux-user/elfload.c | 20 +
51
target/arm: Optimize MVE VNEG, VABS
85
linux-user/syscall.c | 28 +-
52
target/arm: Optimize MVE VDUP
86
target/arm/cpu.c | 35 +-
53
target/arm: Optimize MVE VMVN
87
target/arm/cpu64.c | 11 +
54
target/arm: Optimize MVE VSHL, VSHR immediate forms
88
target/arm/helper.c | 56 +-
55
target/arm: Optimize MVE VSHLL and VMOVL
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
56
target/arm: Optimize MVE VSLI and VSRI
90
target/arm/sve_helper.c | 28 +
57
target/arm: Optimize MVE 1op-immediate insns
91
target/arm/translate-a64.c | 103 +++-
58
92
target/arm/translate-sme.c | 373 ++++++++++++
59
Shashi Mallela (1):
93
target/arm/translate-sve.c | 393 ++++++++++---
60
hw/intc: Set GIC maintenance interrupt level to only 0 or 1
94
target/arm/translate-vfp.c | 12 +
61
95
target/arm/translate.c | 2 +
62
meson.build | 8 +
96
target/arm/vec_helper.c | 24 +
63
include/sysemu/hvf_int.h | 12 +-
97
target/arm/meson.build | 3 +
64
target/arm/cpu.h | 6 +-
98
28 files changed, 2821 insertions(+), 135 deletions(-)
65
target/arm/hvf_arm.h | 18 +
99
create mode 100644 target/arm/sme-fa64.decode
66
target/arm/internals.h | 44 ++
100
create mode 100644 target/arm/sme.decode
67
target/arm/kvm_arm.h | 2 -
101
create mode 100644 target/arm/translate-sme.c
68
target/arm/translate.h | 2 +
69
accel/hvf/hvf-accel-ops.c | 21 +-
70
contrib/elf2dmp/download.c | 22 +-
71
contrib/elf2dmp/pdb.c | 4 +
72
hw/intc/arm_gicv3_cpuif.c | 5 +-
73
target/arm/cpu.c | 56 +-
74
target/arm/helper.c | 77 ++-
75
target/arm/hvf/hvf.c | 1278 +++++++++++++++++++++++++++++++++++++++++
76
target/arm/machine.c | 13 +
77
target/arm/translate-m-nocp.c | 8 +-
78
target/arm/translate-mve.c | 310 +++++++---
79
target/arm/translate-vfp.c | 33 +-
80
target/arm/translate.c | 42 +-
81
target/i386/hvf/hvf.c | 10 +
82
MAINTAINERS | 5 +
83
target/arm/hvf/meson.build | 3 +
84
target/arm/hvf/trace-events | 11 +
85
target/arm/meson.build | 2 +
86
24 files changed, 1824 insertions(+), 168 deletions(-)
87
create mode 100644 target/arm/hvf_arm.h
88
create mode 100644 target/arm/hvf/hvf.c
89
create mode 100644 target/arm/hvf/meson.build
90
create mode 100644 target/arm/hvf/trace-events
91
diff view generated by jsdifflib
1
Move an ifndef CONFIG_USER_ONLY code block up in arm_cpu_reset() so
1
From: Richard Henderson <richard.henderson@linaro.org>
2
it can be merged with another earlier one.
3
2
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210914120725.24992-4-peter.maydell@linaro.org
7
---
9
---
8
target/arm/cpu.c | 22 ++++++++++------------
10
target/arm/cpu.c | 17 ++++++++++++++++-
9
1 file changed, 10 insertions(+), 12 deletions(-)
11
1 file changed, 16 insertions(+), 1 deletion(-)
10
12
11
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu.c
15
--- a/target/arm/cpu.c
14
+++ b/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
15
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
16
env->uncached_cpsr = ARM_CPU_MODE_SVC;
18
int i;
19
int el = arm_current_el(env);
20
const char *ns_status;
21
+ bool sve;
22
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
24
for (i = 0; i < 32; i++) {
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
26
el,
27
psr & PSTATE_SP ? 'h' : 't');
28
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
31
+ env->svcr,
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
+ }
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
17
}
37
}
18
env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
19
+
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
20
+ /* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
21
+ * executing as AArch32 then check if highvecs are enabled and
41
22
+ * adjust the PC accordingly.
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
23
+ */
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
24
+ if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
44
+ sve = sme_exception_el(env, el) == 0;
25
+ env->regs[15] = 0xFFFF0000;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
26
+ }
49
+ }
27
+
50
+
28
+ env->vfp.xregs[ARM_VFP_FPEXC] = 0;
51
+ if (sve) {
29
#endif
52
int j, zcr_len = sve_vqm1_for_el(env, el);
30
53
31
if (arm_feature(env, ARM_FEATURE_M)) {
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
32
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
33
#endif
34
}
35
36
-#ifndef CONFIG_USER_ONLY
37
- /* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently
38
- * executing as AArch32 then check if highvecs are enabled and
39
- * adjust the PC accordingly.
40
- */
41
- if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
42
- env->regs[15] = 0xFFFF0000;
43
- }
44
-
45
- env->vfp.xregs[ARM_VFP_FPEXC] = 0;
46
-#endif
47
-
48
/* M profile requires that reset clears the exclusive monitor;
49
* A profile does not, but clearing it makes more sense than having it
50
* set with an exclusive access on address zero.
51
--
55
--
52
2.20.1
56
2.25.1
53
54
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Now that we have all logic in place that we need to handle Hypervisor.framework
3
This includes the build rules for the decoder, and the
4
on Apple Silicon systems, let's add CONFIG_HVF for aarch64 as well so that we
4
new file for translation, but excludes any instructions.
5
can build it.
6
5
7
Signed-off-by: Alexander Graf <agraf@csgraf.de>
8
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
9
Tested-by: Roman Bolshakov <r.bolshakov@yadro.com> (x86 only)
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20210916155404.86958-9-agraf@csgraf.de
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
10
---
15
meson.build | 7 +++++++
11
target/arm/translate-a64.h | 1 +
16
target/arm/hvf/meson.build | 3 +++
12
target/arm/sme.decode | 20 ++++++++++++++++++++
17
target/arm/meson.build | 2 ++
13
target/arm/translate-a64.c | 7 ++++++-
18
3 files changed, 12 insertions(+)
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
19
create mode 100644 target/arm/hvf/meson.build
15
target/arm/meson.build | 2 ++
16
5 files changed, 64 insertions(+), 1 deletion(-)
17
create mode 100644 target/arm/sme.decode
18
create mode 100644 target/arm/translate-sme.c
20
19
21
diff --git a/meson.build b/meson.build
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
22
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
23
--- a/meson.build
22
--- a/target/arm/translate-a64.h
24
+++ b/meson.build
23
+++ b/target/arm/translate-a64.h
25
@@ -XXX,XX +XXX,XX @@ else
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
26
endif
25
}
27
26
28
accelerator_targets = { 'CONFIG_KVM': kvm_targets }
27
bool disas_sve(DisasContext *, uint32_t);
29
+
28
+bool disas_sme(DisasContext *, uint32_t);
30
+if cpu in ['aarch64']
29
31
+ accelerator_targets += {
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
32
+ 'CONFIG_HVF': ['aarch64-softmmu']
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
33
+ }
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
34
+endif
35
+
36
if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
37
# i386 emulator provides xenpv machine type for multiple architectures
38
accelerator_targets += {
39
diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
40
new file mode 100644
33
new file mode 100644
41
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
42
--- /dev/null
35
--- /dev/null
43
+++ b/target/arm/hvf/meson.build
36
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
45
+arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
38
+# AArch64 SME instruction descriptions
46
+ 'hvf.c',
39
+#
47
+))
40
+# Copyright (c) 2022 Linaro, Ltd
41
+#
42
+# This library is free software; you can redistribute it and/or
43
+# modify it under the terms of the GNU Lesser General Public
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
47
+# This library is distributed in the hope that it will be useful,
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
54
+
55
+#
56
+# This file is processed by scripts/decodetree.py
57
+#
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
63
}
64
65
switch (extract32(insn, 25, 4)) {
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
67
+ case 0x0:
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
69
+ unallocated_encoding(s);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/translate-sme.c
81
@@ -XXX,XX +XXX,XX @@
82
+/*
83
+ * AArch64 SME translation
84
+ *
85
+ * Copyright (c) 2022 Linaro, Ltd
86
+ *
87
+ * This library is free software; you can redistribute it and/or
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
91
+ *
92
+ * This library is distributed in the hope that it will be useful,
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
99
+ */
100
+
101
+#include "qemu/osdep.h"
102
+#include "cpu.h"
103
+#include "tcg/tcg-op.h"
104
+#include "tcg/tcg-op-gvec.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
110
+
111
+
112
+/*
113
+ * Include the generated decoder.
114
+ */
115
+
116
+#include "decode-sme.c.inc"
48
diff --git a/target/arm/meson.build b/target/arm/meson.build
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
49
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/meson.build
119
--- a/target/arm/meson.build
51
+++ b/target/arm/meson.build
120
+++ b/target/arm/meson.build
52
@@ -XXX,XX +XXX,XX @@ arm_softmmu_ss.add(files(
121
@@ -XXX,XX +XXX,XX @@
53
'psci.c',
122
gen = [
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
129
'sme_helper.c',
130
'translate-a64.c',
131
'translate-sve.c',
132
+ 'translate-sme.c',
54
))
133
))
55
134
56
+subdir('hvf')
135
arm_softmmu_ss = ss.source_set()
57
+
58
target_arch += {'arm': arm_ss}
59
target_softmmu_arch += {'arm': arm_softmmu_ss}
60
--
136
--
61
2.20.1
137
2.25.1
62
63
diff view generated by jsdifflib
1
Our current codegen for MVE always calls out to helper functions,
1
From: Richard Henderson <richard.henderson@linaro.org>
2
because some byte lanes might be predicated. The common case is that
2
3
in fact there is no predication active and all lanes should be
3
This new behaviour is in the ARM pseudocode function
4
updated together, so we can produce better code by detecting that and
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
using the TCG generic vector infrastructure.
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
6
the trap would be delivered is in AArch64 mode.
7
Add a TB flag that is set when we can guarantee that there is no
7
8
active MVE predication, and a bool in the DisasContext. Subsequent
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
patches will use this flag to generate improved code for some
9
detection ought to be trivially true, but the pseudocode still contains
10
instructions.
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
11
support for EL[12] when v9 features are present.
12
In most cases when the predication state changes we simply end the TB
12
13
after that instruction. For the code called from vfp_access_check()
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
that handles lazy state preservation and creating a new FP context,
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
we can usually avoid having to try to end the TB because luckily the
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
new value of the flag following the register changes in those
16
17
sequences doesn't depend on any runtime decisions. We do have to end
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
the TB if the guest has enabled lazy FP state preservation but not
18
of instructions illegal in streaming mode.
19
automatic state preservation, but this is an odd corner case that is
19
20
not going to be common in real-world code.
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
21
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
Message-id: 20210913095440.13462-4-peter.maydell@linaro.org
25
---
24
---
26
target/arm/cpu.h | 4 +++-
25
target/arm/cpu.h | 7 +++
27
target/arm/translate.h | 2 ++
26
target/arm/translate.h | 4 ++
28
target/arm/helper.c | 33 +++++++++++++++++++++++++++++++++
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
29
target/arm/translate-m-nocp.c | 8 +++++++-
28
target/arm/helper.c | 41 +++++++++++++++++
30
target/arm/translate-mve.c | 13 ++++++++++++-
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
31
target/arm/translate-vfp.c | 33 +++++++++++++++++++++++++++------
30
target/arm/translate-vfp.c | 12 +++++
32
target/arm/translate.c | 8 ++++++++
31
target/arm/translate.c | 2 +
33
7 files changed, 92 insertions(+), 9 deletions(-)
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
34
35
35
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/cpu.h
38
--- a/target/arm/cpu.h
38
+++ b/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
39
@@ -XXX,XX +XXX,XX @@ typedef ARMCPU ArchCPU;
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
40
* | TBFLAG_AM32 | +-----+----------+
41
* the same thing as the current security state of the processor!
41
* | | |TBFLAG_M32|
42
* +-------------+----------------+----------+
43
- * 31 23 5 4 0
44
+ * 31 23 6 5 0
45
*
46
* Unless otherwise noted, these bits are cached in env->hflags.
47
*/
42
*/
48
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_M32, LSPACT, 2, 1) /* Not cached. */
43
FIELD(TBFLAG_A32, NS, 10, 1)
49
FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */
44
+/*
50
/* Set if FPCCR.S does not match current security state */
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
51
FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */
46
+ * This requires an SME trap from AArch32 mode when using NEON.
52
+/* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */
47
+ */
53
+FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
54
49
55
/*
50
/*
56
* Bit usage when in AArch64 state
51
* Bit usage when in AArch32 state, for M-profile only.
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
55
FIELD(TBFLAG_A64, SVL, 24, 4)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
58
59
/*
60
* Helpers for using the above.
57
diff --git a/target/arm/translate.h b/target/arm/translate.h
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
58
index XXXXXXX..XXXXXXX 100644
62
index XXXXXXX..XXXXXXX 100644
59
--- a/target/arm/translate.h
63
--- a/target/arm/translate.h
60
+++ b/target/arm/translate.h
64
+++ b/target/arm/translate.h
61
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
62
bool align_mem;
66
bool pstate_sm;
63
/* True if PSTATE.IL is set */
67
/* True if PSTATE.ZA is set. */
64
bool pstate_il;
68
bool pstate_za;
65
+ /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
66
+ bool mve_no_pred;
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
67
/*
75
/*
68
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
69
* < 0, set by the current instruction.
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/sme-fa64.decode
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
70
diff --git a/target/arm/helper.c b/target/arm/helper.c
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
71
index XXXXXXX..XXXXXXX 100644
173
index XXXXXXX..XXXXXXX 100644
72
--- a/target/arm/helper.c
174
--- a/target/arm/helper.c
73
+++ b/target/arm/helper.c
175
+++ b/target/arm/helper.c
74
@@ -XXX,XX +XXX,XX @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env)
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
75
#endif
177
return 0;
76
}
178
}
77
179
78
+static bool mve_no_pred(CPUARMState *env)
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
79
+{
181
+static bool sme_fa64(CPUARMState *env, int el)
182
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
184
+ return false;
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
212
80
+ /*
213
+ /*
81
+ * Return true if there is definitely no predication of MVE
214
+ * The SME exception we are testing for is raised via
82
+ * instructions by VPR or LTPSIZE. (Returning false even if there
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
83
+ * isn't any predication is OK; generated code will just be
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
84
+ * a little worse.)
85
+ * If the CPU does not implement MVE then this TB flag is always 0.
86
+ *
87
+ * NOTE: if you change this logic, the "recalculate s->mve_no_pred"
88
+ * logic in gen_update_fp_context() needs to be updated to match.
89
+ *
90
+ * We do not include the effect of the ECI bits here -- they are
91
+ * tracked in other TB flags. This simplifies the logic for
92
+ * "when did we emit code that changes the MVE_NO_PRED TB flag
93
+ * and thus need to end the TB?".
94
+ */
217
+ */
95
+ if (cpu_isar_feature(aa32_mve, env_archcpu(env))) {
218
+ if (el == 0
96
+ return false;
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
97
+ }
220
+ && (!arm_is_el2_enabled(env)
98
+ if (env->v7m.vpr) {
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
99
+ return false;
222
+ && arm_el_is_aa64(env, 1)
100
+ }
223
+ && !sme_fa64(env, el)) {
101
+ if (env->v7m.ltpsize < 4) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
102
+ return false;
225
+ }
103
+ }
226
+
104
+ return true;
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
105
+}
228
}
106
+
229
107
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
108
target_ulong *cs_base, uint32_t *pflags)
231
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
235
}
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
237
}
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/arm/translate-a64.c
241
+++ b/target/arm/translate-a64.c
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
243
* unallocated-encoding checks (otherwise the syndrome information
244
* for the resulting exception will be incorrect).
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
109
{
248
{
110
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
249
if (s->fp_excp_el) {
111
if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
250
assert(!s->fp_access_checked);
112
DP_TBFLAG_M32(flags, LSPACT, 1);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
113
}
114
+
115
+ if (mve_no_pred(env)) {
116
+ DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
117
+ }
118
} else {
119
/*
120
* Note that XSCALE_CPAR shares bits with VECSTRIDE.
121
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/arm/translate-m-nocp.c
124
+++ b/target/arm/translate-m-nocp.c
125
@@ -XXX,XX +XXX,XX @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
126
127
clear_eci_state(s);
128
129
- /* End the TB, because we have updated FP control bits */
130
+ /*
131
+ * End the TB, because we have updated FP control bits,
132
+ * and possibly VPR or LTPSIZE.
133
+ */
134
s->base.is_jmp = DISAS_UPDATE_EXIT;
135
return true;
252
return true;
136
}
253
}
137
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
254
138
store_cpu_field(control, v7m.control[M_REG_S]);
255
+static bool fp_access_check(DisasContext *s)
139
tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
256
+{
140
gen_helper_vfp_set_fpscr(cpu_env, tmp);
257
+ if (!fp_access_check_only(s)) {
141
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
258
+ return false;
142
tcg_temp_free_i32(tmp);
259
+ }
143
tcg_temp_free_i32(sfpa);
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
144
break;
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
145
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
262
+ syn_smetrap(SME_ET_Streaming, false));
263
+ return false;
264
+ }
265
+ return true;
266
+}
267
+
268
/* Check that SVE access is enabled. If it is, return true.
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
277
return;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
279
return;
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
282
}
283
284
+/*
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
146
}
313
}
147
tmp = loadfn(s, opaque, true);
314
}
148
store_cpu_field(tmp, v7m.vpr);
315
149
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
316
+ s->is_nonstreaming = false;
150
break;
317
+ if (s->sme_trap_nonstreaming) {
151
case ARM_VFP_P0:
318
+ disas_sme_fa64(s, insn);
152
{
319
+ }
153
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
320
+
154
tcg_gen_deposit_i32(vpr, vpr, tmp,
321
switch (extract32(insn, 25, 4)) {
155
R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
322
case 0x0:
156
store_cpu_field(vpr, v7m.vpr);
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
157
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
158
tcg_temp_free_i32(tmp);
159
break;
160
}
161
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
162
index XXXXXXX..XXXXXXX 100644
163
--- a/target/arm/translate-mve.c
164
+++ b/target/arm/translate-mve.c
165
@@ -XXX,XX +XXX,XX @@ DO_LOGIC(VORR, gen_helper_mve_vorr)
166
DO_LOGIC(VORN, gen_helper_mve_vorn)
167
DO_LOGIC(VEOR, gen_helper_mve_veor)
168
169
-DO_LOGIC(VPSEL, gen_helper_mve_vpsel)
170
+static bool trans_VPSEL(DisasContext *s, arg_2op *a)
171
+{
172
+ /* This insn updates predication bits */
173
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
174
+ return do_2op(s, a, gen_helper_mve_vpsel);
175
+}
176
177
#define DO_2OP(INSN, FN) \
178
static bool trans_##INSN(DisasContext *s, arg_2op *a) \
179
@@ -XXX,XX +XXX,XX @@ static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
180
}
181
182
gen_helper_mve_vpnot(cpu_env);
183
+ /* This insn updates predication bits */
184
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
185
mve_update_eci(s);
186
return true;
187
}
188
@@ -XXX,XX +XXX,XX @@ static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
189
/* VPT */
190
gen_vpst(s, a->mask);
191
}
192
+ /* This insn updates predication bits */
193
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
194
mve_update_eci(s);
195
return true;
196
}
197
@@ -XXX,XX +XXX,XX @@ static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
198
/* VPT */
199
gen_vpst(s, a->mask);
200
}
201
+ /* This insn updates predication bits */
202
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
203
mve_update_eci(s);
204
return true;
205
}
206
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
207
index XXXXXXX..XXXXXXX 100644
325
index XXXXXXX..XXXXXXX 100644
208
--- a/target/arm/translate-vfp.c
326
--- a/target/arm/translate-vfp.c
209
+++ b/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
210
@@ -XXX,XX +XXX,XX @@ static inline long vfp_f16_offset(unsigned reg, bool top)
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
211
* Generate code for M-profile lazy FP state preservation if needed;
329
return false;
212
* this corresponds to the pseudocode PreserveFPState() function.
330
}
213
*/
331
214
-static void gen_preserve_fp_state(DisasContext *s)
332
+ /*
215
+static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
216
{
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
217
if (s->v7m_lspact) {
335
+ * appear to be any insns which touch VFP which are allowed.
218
/*
336
+ */
219
@@ -XXX,XX +XXX,XX @@ static void gen_preserve_fp_state(DisasContext *s)
337
+ if (s->sme_trap_nonstreaming) {
220
* any further FP insns in this TB.
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
221
*/
339
+ syn_smetrap(SME_ET_Streaming,
222
s->v7m_lspact = false;
340
+ s->base.pc_next - s->pc_curr == 2));
223
+ /*
341
+ return false;
224
+ * The helper might have zeroed VPR, so we do not know the
342
+ }
225
+ * correct value for the MVE_NO_PRED TB flag any more.
343
+
226
+ * If we're about to create a new fp context then that
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
227
+ * will precisely determine the MVE_NO_PRED value (see
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
228
+ * gen_update_fp_context()). Otherwise, we must:
346
unallocated_encoding(s);
229
+ * - set s->mve_no_pred to false, so this instruction
230
+ * is generated to use helper functions
231
+ * - end the TB now, without chaining to the next TB
232
+ */
233
+ if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
234
+ s->mve_no_pred = false;
235
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
236
+ }
237
}
238
}
239
240
@@ -XXX,XX +XXX,XX @@ static void gen_update_fp_context(DisasContext *s)
241
TCGv_i32 z32 = tcg_const_i32(0);
242
store_cpu_field(z32, v7m.vpr);
243
}
244
-
245
/*
246
- * We don't need to arrange to end the TB, because the only
247
- * parts of FPSCR which we cache in the TB flags are the VECLEN
248
- * and VECSTRIDE, and those don't exist for M-profile.
249
+ * We just updated the FPSCR and VPR. Some of this state is cached
250
+ * in the MVE_NO_PRED TB flag. We want to avoid having to end the
251
+ * TB here, which means we need the new value of the MVE_NO_PRED
252
+ * flag to be exactly known here and the same for all executions.
253
+ * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
254
+ * always set to 0, so the new MVE_NO_PRED flag is always 1
255
+ * if and only if we have MVE.
256
+ *
257
+ * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
258
+ * but those do not exist for M-profile, so are not relevant here.)
259
*/
260
+ s->mve_no_pred = dc_isar_feature(aa32_mve, s);
261
262
if (s->v8m_secure) {
263
bits |= R_V7M_CONTROL_SFPA_MASK;
264
@@ -XXX,XX +XXX,XX @@ bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
265
/* Handle M-profile lazy FP state mechanics */
266
267
/* Trigger lazy-state preservation if necessary */
268
- gen_preserve_fp_state(s);
269
+ gen_preserve_fp_state(s, skip_context_update);
270
271
if (!skip_context_update) {
272
/* Update ownership of FP context and create new FP context if needed */
273
diff --git a/target/arm/translate.c b/target/arm/translate.c
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
274
index XXXXXXX..XXXXXXX 100644
348
index XXXXXXX..XXXXXXX 100644
275
--- a/target/arm/translate.c
349
--- a/target/arm/translate.c
276
+++ b/target/arm/translate.c
350
+++ b/target/arm/translate.c
277
@@ -XXX,XX +XXX,XX @@ static bool trans_DLS(DisasContext *s, arg_DLS *a)
278
/* DLSTP: set FPSCR.LTPSIZE */
279
tmp = tcg_const_i32(a->size);
280
store_cpu_field(tmp, v7m.ltpsize);
281
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
282
}
283
return true;
284
}
285
@@ -XXX,XX +XXX,XX @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
286
assert(ok);
287
tmp = tcg_const_i32(a->size);
288
store_cpu_field(tmp, v7m.ltpsize);
289
+ /*
290
+ * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
291
+ * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
292
+ */
293
}
294
gen_jmp_tb(s, s->base.pc_next, 1);
295
296
@@ -XXX,XX +XXX,XX @@ static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
297
gen_helper_mve_vctp(cpu_env, masklen);
298
tcg_temp_free_i32(masklen);
299
tcg_temp_free_i32(rn_shifted);
300
+ /* This insn updates predication bits */
301
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
302
mve_update_eci(s);
303
return true;
304
}
305
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
306
dc->v7m_new_fp_ctxt_needed =
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
307
EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
308
dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
354
}
309
+ dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
355
+ dc->sme_trap_nonstreaming =
310
} else {
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
311
dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
357
}
312
dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
358
dc->cp_regs = cpu->cp_regs;
359
dc->features = env->features;
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
361
index XXXXXXX..XXXXXXX 100644
362
--- a/target/arm/meson.build
363
+++ b/target/arm/meson.build
364
@@ -XXX,XX +XXX,XX @@
365
gen = [
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
313
--
372
--
314
2.20.1
373
2.25.1
315
316
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark ADR as a non-streaming instruction, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Removing entries from sme-fa64.decode is an easy way to see
7
what remains to be done.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 7 +++++++
15
target/arm/sme-fa64.decode | 1 -
16
target/arm/translate-sve.c | 8 ++++----
17
3 files changed, 11 insertions(+), 5 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
26
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
29
+ { \
30
+ s->is_nonstreaming = true; \
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
+ }
33
+
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/sme-fa64.decode
38
+++ b/target/arm/sme-fa64.decode
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
42
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
53
}
54
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
63
64
/*
65
*** SVE Integer Misc - Unpredicated Group
66
--
67
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
49
}
50
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
58
--
59
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
34
NULL, gen_helper_sve_fexpa_h,
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
92
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
13
2 files changed, 15 insertions(+), 11 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
34
NULL, gen_helper_sve2_pmull_d,
35
};
36
- if (a->esz == 0
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
38
- : !dc_isar_feature(aa64_sve, s)) {
39
+
40
+ if (a->esz == 0) {
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
42
+ return false;
43
+ }
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
46
return false;
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
51
*/
52
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
59
+ 0, FPST_FPCR)
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
62
+ 0, FPST_FPCR)
63
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
68
gen_helper_gvec_bfdot_idx, a)
69
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
71
- gen_helper_gvec_bfmmla, a, 0)
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
73
+ gen_helper_gvec_bfmmla, a, 0)
74
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
76
{
77
--
78
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 15 +++++++++++----
13
2 files changed, 11 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
36
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
43
44
/*
45
*** SVE Floating Point Accumulating Reduction Group
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
48
return false;
49
}
50
+ s->is_nonstreaming = true;
51
if (!sve_access_check(s)) {
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
55
DO_FP3(FADD_zzz, fadd)
56
DO_FP3(FSUB_zzz, fsub)
57
DO_FP3(FMUL_zzz, fmul)
58
-DO_FP3(FTSMUL, ftsmul)
59
DO_FP3(FRECPS, recps)
60
DO_FP3(FRSQRTS, rsqrts)
61
62
#undef DO_FP3
63
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
65
+ NULL, gen_helper_gvec_ftsmul_h,
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
67
+};
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
69
+ ftsmul_fns[a->esz], a, 0)
70
+
71
/*
72
*** SVE Floating Point Arithmetic - Predicated Group
73
*/
74
--
75
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
50
--
51
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
92
--
93
2.25.1
diff view generated by jsdifflib
1
Optimize the MVE VDUP insns by using TCG vector ops when possible.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210913095440.13462-8-peter.maydell@linaro.org
6
---
10
---
7
target/arm/translate-mve.c | 12 ++++++++----
11
target/arm/sme-fa64.decode | 9 ---------
8
1 file changed, 8 insertions(+), 4 deletions(-)
12
target/arm/translate-sve.c | 6 ++++++
13
2 files changed, 6 insertions(+), 9 deletions(-)
9
14
10
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate-mve.c
17
--- a/target/arm/sme-fa64.decode
13
+++ b/target/arm/translate-mve.c
18
+++ b/target/arm/sme-fa64.decode
14
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
if (!dc_isar_feature(aa64_sve, s)) {
45
return false;
46
}
47
+ s->is_nonstreaming = true;
48
if (!sve_access_check(s)) {
15
return true;
49
return true;
16
}
50
}
17
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
18
- qd = mve_qreg_ptr(a->qd);
52
if (!dc_isar_feature(aa64_sve, s)) {
19
rt = load_reg(s, a->rt);
53
return false;
20
- tcg_gen_dup_i32(a->size, rt, rt);
54
}
21
- gen_helper_mve_vdup(cpu_env, qd, rt);
55
+ s->is_nonstreaming = true;
22
- tcg_temp_free_ptr(qd);
56
if (!sve_access_check(s)) {
23
+ if (mve_no_predication(s)) {
57
return true;
24
+ tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
58
}
25
+ } else {
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
26
+ qd = mve_qreg_ptr(a->qd);
60
if (!dc_isar_feature(aa64_sve2, s)) {
27
+ tcg_gen_dup_i32(a->size, rt, rt);
61
return false;
28
+ gen_helper_mve_vdup(cpu_env, qd, rt);
62
}
29
+ tcg_temp_free_ptr(qd);
63
+ s->is_nonstreaming = true;
30
+ }
64
if (!sve_access_check(s)) {
31
tcg_temp_free_i32(rt);
65
return true;
32
mve_update_eci(s);
66
}
33
return true;
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
if (!dc_isar_feature(aa64_sve, s)) {
69
return false;
70
}
71
+ s->is_nonstreaming = true;
72
if (!sve_access_check(s)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
if (!dc_isar_feature(aa64_sve, s)) {
77
return false;
78
}
79
+ s->is_nonstreaming = true;
80
if (!sve_access_check(s)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
90
}
34
--
91
--
35
2.20.1
92
2.25.1
36
37
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap if full
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/sme-fa64.decode | 3 ---
13
target/arm/sve.decode | 10 +++++-----
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
16
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/sme-fa64.decode
20
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
72
}
73
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
75
+{
76
+ if (!dc_isar_feature(aa64_sve, s)) {
77
+ return false;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
83
+}
84
+
85
/*
86
* Move Prefix
87
*
88
--
89
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 2 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
32
if (!dc_isar_feature(aa64_sve, s)) {
33
return false;
34
}
35
+ s->is_nonstreaming = true;
36
if (sve_access_check(s)) {
37
TCGv_i64 addr = new_tmp_a64(s);
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
48
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
-
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-sve.c
29
+++ b/target/arm/translate-sve.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
if (a->rm == 31) {
32
return false;
33
}
34
+ s->is_nonstreaming = true;
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
46
--
47
2.25.1
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We can expose cycle counters on the PMU easily. To be as compatible as
3
These functions will be used to verify that the cpu
4
possible, let's do so, but make sure we don't expose any other architectural
4
is in the correct state for a given instruction.
5
counters that we can not model yet.
6
5
7
This allows OSs to work that require PMU support.
8
9
Signed-off-by: Alexander Graf <agraf@csgraf.de>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Message-id: 20210916155404.86958-10-agraf@csgraf.de
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
target/arm/hvf/hvf.c | 179 +++++++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
15
1 file changed, 179 insertions(+)
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
13
2 files changed, 55 insertions(+)
16
14
17
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/hvf/hvf.c
17
--- a/target/arm/translate-a64.h
20
+++ b/target/arm/hvf/hvf.c
18
+++ b/target/arm/translate-a64.h
21
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
22
#define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4)
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
23
#define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4)
21
unsigned int imms, unsigned int immr);
24
#define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1)
22
bool sve_access_check(DisasContext *s);
25
+#define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0)
23
+bool sme_enabled_check(DisasContext *s);
26
+#define SYSREG_PMUSERENR_EL0 SYSREG(3, 3, 9, 14, 0)
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
27
+#define SYSREG_PMCNTENSET_EL0 SYSREG(3, 3, 9, 12, 1)
25
+
28
+#define SYSREG_PMCNTENCLR_EL0 SYSREG(3, 3, 9, 12, 2)
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
29
+#define SYSREG_PMINTENCLR_EL1 SYSREG(3, 0, 9, 14, 2)
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
30
+#define SYSREG_PMOVSCLR_EL0 SYSREG(3, 3, 9, 12, 3)
31
+#define SYSREG_PMSWINC_EL0 SYSREG(3, 3, 9, 12, 4)
32
+#define SYSREG_PMSELR_EL0 SYSREG(3, 3, 9, 12, 5)
33
+#define SYSREG_PMCEID0_EL0 SYSREG(3, 3, 9, 12, 6)
34
+#define SYSREG_PMCEID1_EL0 SYSREG(3, 3, 9, 12, 7)
35
+#define SYSREG_PMCCNTR_EL0 SYSREG(3, 3, 9, 13, 0)
36
+#define SYSREG_PMCCFILTR_EL0 SYSREG(3, 3, 14, 15, 7)
37
38
#define WFX_IS_WFE (1 << 0)
39
40
@@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
41
val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
42
gt_cntfrq_period_ns(arm_cpu);
43
break;
44
+ case SYSREG_PMCR_EL0:
45
+ val = env->cp15.c9_pmcr;
46
+ break;
47
+ case SYSREG_PMCCNTR_EL0:
48
+ pmu_op_start(env);
49
+ val = env->cp15.c15_ccnt;
50
+ pmu_op_finish(env);
51
+ break;
52
+ case SYSREG_PMCNTENCLR_EL0:
53
+ val = env->cp15.c9_pmcnten;
54
+ break;
55
+ case SYSREG_PMOVSCLR_EL0:
56
+ val = env->cp15.c9_pmovsr;
57
+ break;
58
+ case SYSREG_PMSELR_EL0:
59
+ val = env->cp15.c9_pmselr;
60
+ break;
61
+ case SYSREG_PMINTENCLR_EL1:
62
+ val = env->cp15.c9_pminten;
63
+ break;
64
+ case SYSREG_PMCCFILTR_EL0:
65
+ val = env->cp15.pmccfiltr_el0;
66
+ break;
67
+ case SYSREG_PMCNTENSET_EL0:
68
+ val = env->cp15.c9_pmcnten;
69
+ break;
70
+ case SYSREG_PMUSERENR_EL0:
71
+ val = env->cp15.c9_pmuserenr;
72
+ break;
73
+ case SYSREG_PMCEID0_EL0:
74
+ case SYSREG_PMCEID1_EL0:
75
+ /* We can't really count anything yet, declare all events invalid */
76
+ val = 0;
77
+ break;
78
case SYSREG_OSLSR_EL1:
79
val = env->cp15.oslsr_el1;
80
break;
81
@@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
82
return 0;
83
}
84
85
+static void pmu_update_irq(CPUARMState *env)
86
+{
28
+{
87
+ ARMCPU *cpu = env_archcpu(env);
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
88
+ qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) &&
89
+ (env->cp15.c9_pminten & env->cp15.c9_pmovsr));
90
+}
30
+}
91
+
31
+
92
+static bool pmu_event_supported(uint16_t number)
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
33
+static inline bool sme_za_enabled_check(DisasContext *s)
93
+{
34
+{
94
+ return false;
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
95
+}
36
+}
96
+
37
+
97
+/* Returns true if the counter (pass 31 for PMCCNTR) should count events using
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
98
+ * the current EL, security state, and register configuration.
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
99
+ */
100
+static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
101
+{
40
+{
102
+ uint64_t filter;
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
103
+ bool enabled, filtered = true;
104
+ int el = arm_current_el(env);
105
+
106
+ enabled = (env->cp15.c9_pmcr & PMCRE) &&
107
+ (env->cp15.c9_pmcnten & (1 << counter));
108
+
109
+ if (counter == 31) {
110
+ filter = env->cp15.pmccfiltr_el0;
111
+ } else {
112
+ filter = env->cp15.c14_pmevtyper[counter];
113
+ }
114
+
115
+ if (el == 0) {
116
+ filtered = filter & PMXEVTYPER_U;
117
+ } else if (el == 1) {
118
+ filtered = filter & PMXEVTYPER_P;
119
+ }
120
+
121
+ if (counter != 31) {
122
+ /*
123
+ * If not checking PMCCNTR, ensure the counter is setup to an event we
124
+ * support
125
+ */
126
+ uint16_t event = filter & PMXEVTYPER_EVTCOUNT;
127
+ if (!pmu_event_supported(event)) {
128
+ return false;
129
+ }
130
+ }
131
+
132
+ return enabled && !filtered;
133
+}
42
+}
134
+
43
+
135
+static void pmswinc_write(CPUARMState *env, uint64_t value)
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
52
return true;
53
}
54
55
+/* This function corresponds to CheckSMEEnabled. */
56
+bool sme_enabled_check(DisasContext *s)
136
+{
57
+{
137
+ unsigned int i;
58
+ /*
138
+ for (i = 0; i < pmu_num_counters(env); i++) {
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
139
+ /* Increment a counter's count iff: */
60
+ * to be zero when fp_excp_el has priority. This is because we need
140
+ if ((value & (1 << i)) && /* counter's bit is set */
61
+ * sme_excp_el by itself for cpregs access checks.
141
+ /* counter is enabled and not filtered */
62
+ */
142
+ pmu_counter_enabled(env, i) &&
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
143
+ /* counter is SW_INCR */
64
+ s->fp_access_checked = true;
144
+ (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) {
65
+ return sme_access_check(s);
145
+ /*
146
+ * Detect if this write causes an overflow since we can't predict
147
+ * PMSWINC overflows like we can for other events
148
+ */
149
+ uint32_t new_pmswinc = env->cp15.c14_pmevcntr[i] + 1;
150
+
151
+ if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & INT32_MIN) {
152
+ env->cp15.c9_pmovsr |= (1 << i);
153
+ pmu_update_irq(env);
154
+ }
155
+
156
+ env->cp15.c14_pmevcntr[i] = new_pmswinc;
157
+ }
158
+ }
66
+ }
67
+ return fp_access_check_only(s);
159
+}
68
+}
160
+
69
+
161
static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
162
{
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
163
ARMCPU *arm_cpu = ARM_CPU(cpu);
72
+{
164
@@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
73
+ if (!sme_enabled_check(s)) {
165
val);
74
+ return false;
166
75
+ }
167
switch (reg) {
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
168
+ case SYSREG_PMCCNTR_EL0:
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
169
+ pmu_op_start(env);
78
+ syn_smetrap(SME_ET_NotStreaming, false));
170
+ env->cp15.c15_ccnt = val;
79
+ return false;
171
+ pmu_op_finish(env);
80
+ }
172
+ break;
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
173
+ case SYSREG_PMCR_EL0:
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
174
+ pmu_op_start(env);
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
85
+ }
86
+ return true;
87
+}
175
+
88
+
176
+ if (val & PMCRC) {
89
/*
177
+ /* The counter has been reset */
90
* This utility function is for doing register extension with an
178
+ env->cp15.c15_ccnt = 0;
91
* optional shift. You will likely want to pass a temporary for the
179
+ }
180
+
181
+ if (val & PMCRP) {
182
+ unsigned int i;
183
+ for (i = 0; i < pmu_num_counters(env); i++) {
184
+ env->cp15.c14_pmevcntr[i] = 0;
185
+ }
186
+ }
187
+
188
+ env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK;
189
+ env->cp15.c9_pmcr |= (val & PMCR_WRITEABLE_MASK);
190
+
191
+ pmu_op_finish(env);
192
+ break;
193
+ case SYSREG_PMUSERENR_EL0:
194
+ env->cp15.c9_pmuserenr = val & 0xf;
195
+ break;
196
+ case SYSREG_PMCNTENSET_EL0:
197
+ env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env));
198
+ break;
199
+ case SYSREG_PMCNTENCLR_EL0:
200
+ env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env));
201
+ break;
202
+ case SYSREG_PMINTENCLR_EL1:
203
+ pmu_op_start(env);
204
+ env->cp15.c9_pminten |= val;
205
+ pmu_op_finish(env);
206
+ break;
207
+ case SYSREG_PMOVSCLR_EL0:
208
+ pmu_op_start(env);
209
+ env->cp15.c9_pmovsr &= ~val;
210
+ pmu_op_finish(env);
211
+ break;
212
+ case SYSREG_PMSWINC_EL0:
213
+ pmu_op_start(env);
214
+ pmswinc_write(env, val);
215
+ pmu_op_finish(env);
216
+ break;
217
+ case SYSREG_PMSELR_EL0:
218
+ env->cp15.c9_pmselr = val & 0x1f;
219
+ break;
220
+ case SYSREG_PMCCFILTR_EL0:
221
+ pmu_op_start(env);
222
+ env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0;
223
+ pmu_op_finish(env);
224
+ break;
225
case SYSREG_OSLAR_EL1:
226
env->cp15.oslsr_el1 = val & 1;
227
break;
228
--
92
--
229
2.20.1
93
2.25.1
230
231
diff view generated by jsdifflib
1
Optimize the MVE VNEG and VABS insns by using TCG
1
From: Richard Henderson <richard.henderson@linaro.org>
2
vector ops when possible.
3
2
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
SVE mode, and for SME present but SVE absent.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210913095440.13462-7-peter.maydell@linaro.org
8
---
10
---
9
target/arm/translate-mve.c | 32 ++++++++++++++++++++++----------
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
10
1 file changed, 22 insertions(+), 10 deletions(-)
12
1 file changed, 16 insertions(+), 6 deletions(-)
11
13
12
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-mve.c
16
--- a/target/arm/translate-a64.c
15
+++ b/target/arm/translate-mve.c
17
+++ b/target/arm/translate-a64.c
16
@@ -XXX,XX +XXX,XX @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
17
return true;
19
return true;
18
}
20
}
19
21
20
-static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
22
-/* Check that SVE access is enabled. If it is, return true.
21
+static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
23
+/*
22
+ GVecGen2Fn vecfn)
24
+ * Check that SVE access is enabled. If it is, return true.
25
* If not, emit code to generate an appropriate exception and return false.
26
+ * This function corresponds to CheckSVEEnabled().
27
*/
28
bool sve_access_check(DisasContext *s)
23
{
29
{
24
TCGv_ptr qd, qm;
30
- if (s->sve_excp_el) {
25
31
- assert(!s->sve_access_checked);
26
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
32
- s->sve_access_checked = true;
27
return true;
33
-
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
35
+ assert(dc_isar_feature(aa64_sme, s));
36
+ if (!sme_sm_enabled_check(s)) {
37
+ goto fail_exit;
38
+ }
39
+ } else if (s->sve_excp_el) {
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
41
syn_sve_access_trap(), s->sve_excp_el);
42
- return false;
43
+ goto fail_exit;
28
}
44
}
29
45
s->sve_access_checked = true;
30
- qd = mve_qreg_ptr(a->qd);
46
return fp_access_check(s);
31
- qm = mve_qreg_ptr(a->qm);
47
+
32
- fn(cpu_env, qd, qm);
48
+ fail_exit:
33
- tcg_temp_free_ptr(qd);
49
+ /* Assert that we only raise one exception per instruction. */
34
- tcg_temp_free_ptr(qm);
50
+ assert(!s->sve_access_checked);
35
+ if (vecfn && mve_no_predication(s)) {
51
+ s->sve_access_checked = true;
36
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
52
+ return false;
37
+ } else {
38
+ qd = mve_qreg_ptr(a->qd);
39
+ qm = mve_qreg_ptr(a->qm);
40
+ fn(cpu_env, qd, qm);
41
+ tcg_temp_free_ptr(qd);
42
+ tcg_temp_free_ptr(qm);
43
+ }
44
mve_update_eci(s);
45
return true;
46
}
53
}
47
54
48
-#define DO_1OP(INSN, FN) \
55
/*
49
+static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
50
+{
51
+ return do_1op_vec(s, a, fn, NULL);
52
+}
53
+
54
+#define DO_1OP_VEC(INSN, FN, VECFN) \
55
static bool trans_##INSN(DisasContext *s, arg_1op *a) \
56
{ \
57
static MVEGenOneOpFn * const fns[] = { \
58
@@ -XXX,XX +XXX,XX @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
59
gen_helper_mve_##FN##w, \
60
NULL, \
61
}; \
62
- return do_1op(s, a, fns[a->size]); \
63
+ return do_1op_vec(s, a, fns[a->size], VECFN); \
64
}
65
66
+#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
67
+
68
DO_1OP(VCLZ, vclz)
69
DO_1OP(VCLS, vcls)
70
-DO_1OP(VABS, vabs)
71
-DO_1OP(VNEG, vneg)
72
+DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
73
+DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
74
DO_1OP(VQABS, vqabs)
75
DO_1OP(VQNEG, vqneg)
76
DO_1OP(VMAXA, vmaxa)
77
--
56
--
78
2.20.1
57
2.25.1
79
80
diff view generated by jsdifflib
1
Optimize the MVE 1op-immediate insns (VORR, VBIC, VMOV) to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
use TCG vector ops when possible.
3
2
3
These SME instructions are nominally within the SVE decode space,
4
so we add them to sve.decode and translate-sve.c.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210913095440.13462-13-peter.maydell@linaro.org
7
---
10
---
8
target/arm/translate-mve.c | 26 +++++++++++++++++++++-----
11
target/arm/translate-a64.h | 12 ++++++++++++
9
1 file changed, 21 insertions(+), 5 deletions(-)
12
target/arm/sve.decode | 5 ++++-
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
14
3 files changed, 54 insertions(+), 1 deletion(-)
10
15
11
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
12
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-mve.c
18
--- a/target/arm/translate-a64.h
14
+++ b/target/arm/translate-mve.c
19
+++ b/target/arm/translate-a64.h
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
21
return s->vl;
22
}
23
24
+/* Return the byte size of the vector register, SVL / 8. */
25
+static inline int streaming_vec_reg_size(DisasContext *s)
26
+{
27
+ return s->svl;
28
+}
29
+
30
/*
31
* Return the offset info CPUARMState of the predicate vector register Pn.
32
* Note for this purpose, FFR is P16.
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
34
return s->vl >> 3;
35
}
36
37
+/* Return the byte size of the predicate register, SVL / 64. */
38
+static inline int streaming_pred_reg_size(DisasContext *s)
39
+{
40
+ return s->svl >> 3;
41
+}
42
+
43
/*
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
51
# SVE index generation (register start, register increment)
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
53
54
-### SVE Stack Allocation Group
55
+### SVE / Streaming SVE Stack Allocation Group
56
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
16
return true;
74
return true;
17
}
75
}
18
76
19
-static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
20
+static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
78
+{
21
+ GVecGen2iFn *vecfn)
79
+ if (!dc_isar_feature(aa64_sme, s)) {
80
+ return false;
81
+ }
82
+ if (sme_enabled_check(s)) {
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
86
+ }
87
+ return true;
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
22
{
91
{
23
TCGv_ptr qd;
92
if (!dc_isar_feature(aa64_sve, s)) {
24
uint64_t imm;
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
25
@@ -XXX,XX +XXX,XX @@ static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
26
27
imm = asimd_imm_const(a->imm, a->cmode, a->op);
28
29
- qd = mve_qreg_ptr(a->qd);
30
- fn(cpu_env, qd, tcg_constant_i64(imm));
31
- tcg_temp_free_ptr(qd);
32
+ if (vecfn && mve_no_predication(s)) {
33
+ vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
34
+ imm, 16, 16);
35
+ } else {
36
+ qd = mve_qreg_ptr(a->qd);
37
+ fn(cpu_env, qd, tcg_constant_i64(imm));
38
+ tcg_temp_free_ptr(qd);
39
+ }
40
mve_update_eci(s);
41
return true;
94
return true;
42
}
95
}
43
96
44
+static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
45
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
46
+{
98
+{
47
+ tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
99
+ if (!dc_isar_feature(aa64_sme, s)) {
100
+ return false;
101
+ }
102
+ if (sme_enabled_check(s)) {
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
106
+ }
107
+ return true;
48
+}
108
+}
49
+
109
+
50
static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
51
{
111
{
52
/* Handle decode of cmode/op here between VORR/VBIC/VMOV */
112
if (!dc_isar_feature(aa64_sve, s)) {
53
MVEGenOneOpImmFn *fn;
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
54
+ GVecGen2iFn *vecfn;
114
return true;
55
56
if ((a->cmode & 1) && a->cmode < 12) {
57
if (a->op) {
58
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
59
* so the VBIC becomes a logical AND operation.
60
*/
61
fn = gen_helper_mve_vandi;
62
+ vecfn = tcg_gen_gvec_andi;
63
} else {
64
fn = gen_helper_mve_vorri;
65
+ vecfn = tcg_gen_gvec_ori;
66
}
67
} else {
68
/* There is one unallocated cmode/op combination in this space */
69
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
70
}
71
/* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
72
fn = gen_helper_mve_vmovi;
73
+ vecfn = gen_gvec_vmovi;
74
}
75
- return do_1imm(s, a, fn);
76
+ return do_1imm(s, a, fn, vecfn);
77
}
115
}
78
116
79
static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
+{
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
127
+}
128
+
129
/*
130
*** SVE Compute Vector Address Group
131
*/
80
--
132
--
81
2.20.1
133
2.25.1
82
83
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 4 ++++
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
12
4 files changed, 44 insertions(+)
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@
19
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
22
+
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@
29
#
30
# This file is processed by scripts/decodetree.py
31
#
32
+
33
+### SME Misc
34
+
35
+ZERO 11000000 00 001 00000000000 imm:8
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
43
}
44
+
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
46
+{
47
+ uint32_t i;
48
+
49
+ /*
50
+ * Special case clearing the entire ZA space.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * parts of the ZA storage outside of SVL.
53
+ */
54
+ if (imm == 0xff) {
55
+ memset(env->zarray, 0, sizeof(env->zarray));
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sme.c
72
+++ b/target/arm/translate-sme.c
73
@@ -XXX,XX +XXX,XX @@
74
*/
75
76
#include "decode-sme.c.inc"
77
+
78
+
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
80
+{
81
+ if (!dc_isar_feature(aa64_sme, s)) {
82
+ return false;
83
+ }
84
+ if (sme_za_enabled_check(s)) {
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
87
+ }
88
+ return true;
89
+}
90
--
91
2.25.1
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We will need to install a migration helper for the ARM hvf backend.
3
We can reuse the SVE functions for implementing moves to/from
4
Let's introduce an arch callback for the overall hvf init chain to
4
horizontal tile slices, but we need new ones for moves to/from
5
do so.
5
vertical tile slices.
6
6
7
Signed-off-by: Alexander Graf <agraf@csgraf.de>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-id: 20210916155404.86958-4-agraf@csgraf.de
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
include/sysemu/hvf_int.h | 1 +
12
target/arm/helper-sme.h | 12 +++
13
accel/hvf/hvf-accel-ops.c | 3 ++-
13
target/arm/helper-sve.h | 2 +
14
target/i386/hvf/hvf.c | 5 +++++
14
target/arm/translate-a64.h | 8 ++
15
3 files changed, 8 insertions(+), 1 deletion(-)
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
16
21
17
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
18
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
19
--- a/include/sysemu/hvf_int.h
24
--- a/target/arm/helper-sme.h
20
+++ b/include/sysemu/hvf_int.h
25
+++ b/target/arm/helper-sme.h
21
@@ -XXX,XX +XXX,XX @@ struct hvf_vcpu_state {
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
22
};
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
23
28
24
void assert_hvf_ok(hv_return_t ret);
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
25
+int hvf_arch_init(void);
30
+
26
int hvf_arch_init_vcpu(CPUState *cpu);
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
27
void hvf_arch_vcpu_destroy(CPUState *cpu);
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
int hvf_vcpu_exec(CPUState *);
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
index XXXXXXX..XXXXXXX 100644
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
--- a/accel/hvf/hvf-accel-ops.c
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
+++ b/accel/hvf/hvf-accel-ops.c
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
@@ -XXX,XX +XXX,XX @@ static int hvf_accel_init(MachineState *ms)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
hvf_state = s;
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
memory_listener_register(&hvf_memory_listener, &address_space_memory);
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
- return 0;
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
38
+
43
index XXXXXXX..XXXXXXX 100644
39
+ return hvf_arch_init();
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
40
}
61
}
41
62
42
static void hvf_accel_class_init(ObjectClass *oc, void *data)
63
+/* Return a newly allocated pointer to the predicate register. */
43
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
44
index XXXXXXX..XXXXXXX 100644
65
+{
45
--- a/target/i386/hvf/hvf.c
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
46
+++ b/target/i386/hvf/hvf.c
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
47
@@ -XXX,XX +XXX,XX @@ static inline bool apic_bus_freq_is_known(CPUX86State *env)
68
+ return ret;
48
return env->apic_bus_freq != 0;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
49
}
80
}
50
81
51
+int hvf_arch_init(void)
82
+static inline int plus_12(DisasContext *s, int x)
52
+{
83
+{
53
+ return 0;
84
+ return x + 12;
54
+}
85
+}
55
+
86
+
56
int hvf_arch_init_vcpu(CPUState *cpu)
87
static inline int times_2(DisasContext *s, int x)
57
{
88
{
58
X86CPU *x86cpu = X86_CPU(cpu);
89
return x * 2;
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
213
+ * the address arithmetic.
214
+ */
215
+ for (i = 0; i < oprsz; i++) {
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
286
}
287
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
59
--
445
--
60
2.20.1
446
2.25.1
61
62
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Hvf's permission bitmap during and after dirty logging does not include
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
4
the HV_MEMORY_EXEC permission. At least on Apple Silicon, this leads to
4
because those functions accept only a Zreg register number.
5
instruction faults once dirty logging was enabled.
5
For SME, we want to pass a pointer into ZA storage.
6
6
7
Add the bit to make it work properly.
8
9
Signed-off-by: Alexander Graf <agraf@csgraf.de>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Message-id: 20210916155404.86958-3-agraf@csgraf.de
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
11
---
14
accel/hvf/hvf-accel-ops.c | 4 ++--
12
target/arm/helper-sme.h | 82 +++++
15
1 file changed, 2 insertions(+), 2 deletions(-)
13
target/arm/sme.decode | 9 +
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
16
17
17
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/hvf/hvf-accel-ops.c
20
--- a/target/arm/helper-sme.h
20
+++ b/accel/hvf/hvf-accel-ops.c
21
+++ b/target/arm/helper-sme.h
21
@@ -XXX,XX +XXX,XX @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
if (on) {
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
slot->flags |= HVF_SLOT_LOG;
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
- HV_MEMORY_READ);
26
+
26
+ HV_MEMORY_READ | HV_MEMORY_EXEC);
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
27
/* stop tracking region*/
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
28
} else {
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
29
slot->flags &= ~HVF_SLOT_LOG;
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
31
+
31
- HV_MEMORY_READ | HV_MEMORY_WRITE);
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
32
+ HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
}
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
129
@@ -XXX,XX +XXX,XX @@
130
131
#include "qemu/osdep.h"
132
#include "cpu.h"
133
+#include "internals.h"
134
#include "tcg/tcg-gvec-desc.h"
135
#include "exec/helper-proto.h"
136
+#include "exec/cpu_ldst.h"
137
+#include "exec/exec-all.h"
138
#include "qemu/int128.h"
139
#include "vec_internal.h"
140
+#include "sve_ldst_internal.h"
141
142
/* ResetSVEState */
143
void arm_reset_sve_state(CPUARMState *env)
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
34
}
145
}
35
146
147
#undef DO_MOVA_Z
148
+
149
+/*
150
+ * Clear elements in a tile slice comprising len bytes.
151
+ */
152
+
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
154
+
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
156
+{
157
+ memset(ptr + off, 0, len);
158
+}
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
197
+ */
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
36
--
817
--
37
2.20.1
818
2.25.1
38
39
diff view generated by jsdifflib
1
Optimize MVE arithmetic ops when we have a TCG
1
From: Richard Henderson <richard.henderson@linaro.org>
2
vector operation we can use.
3
2
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
We will reuse this for SME save and restore array insns.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210913095440.13462-6-peter.maydell@linaro.org
8
---
10
---
9
target/arm/translate-mve.c | 20 +++++++++++---------
11
target/arm/translate-a64.h | 3 +++
10
1 file changed, 11 insertions(+), 9 deletions(-)
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
13
2 files changed, 39 insertions(+), 12 deletions(-)
11
14
12
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-mve.c
17
--- a/target/arm/translate-a64.h
15
+++ b/target/arm/translate-mve.c
18
+++ b/target/arm/translate-a64.h
16
@@ -XXX,XX +XXX,XX @@ static bool trans_VPSEL(DisasContext *s, arg_2op *a)
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
17
return do_2op(s, a, gen_helper_mve_vpsel);
20
uint32_t rm_ofs, int64_t shift,
21
uint32_t opr_sz, uint32_t max_sz);
22
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
25
+
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
32
* The load should begin at the address Rn + IMM.
33
*/
34
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
37
+ int len, int rn, int imm)
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
81
}
82
83
/*
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
85
default:
86
g_assert_not_reached();
87
}
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
91
}
18
}
92
}
19
93
20
-#define DO_2OP(INSN, FN) \
94
/* Similarly for stores. */
21
+#define DO_2OP_VEC(INSN, FN, VECFN) \
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
22
static bool trans_##INSN(DisasContext *s, arg_2op *a) \
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
23
{ \
97
+ int len, int rn, int imm)
24
static MVEGenTwoOpFn * const fns[] = { \
98
{
25
@@ -XXX,XX +XXX,XX @@ static bool trans_VPSEL(DisasContext *s, arg_2op *a)
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
26
gen_helper_mve_##FN##w, \
100
int len_remain = len % 8;
27
NULL, \
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
28
}; \
102
29
- return do_2op(s, a, fns[a->size]); \
103
t0 = tcg_temp_new_i64();
30
+ return do_2op_vec(s, a, fns[a->size], VECFN); \
104
for (i = 0; i < len_align; i += 8) {
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
109
}
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
111
clean_addr = new_tmp_a64_local(s);
112
tcg_gen_mov_i64(clean_addr, t0);
113
114
+ if (base != cpu_env) {
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
118
+ }
119
+
120
gen_set_label(loop);
121
122
t0 = tcg_temp_new_i64();
123
tp = tcg_temp_new_ptr();
124
- tcg_gen_add_ptr(tp, cpu_env, i);
125
+ tcg_gen_add_ptr(tp, base, i);
126
tcg_gen_ld_i64(t0, tp, vofs);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
31
}
138
}
32
139
33
-DO_2OP(VADD, vadd)
140
/* Predicate register stores can be any multiple of 2. */
34
-DO_2OP(VSUB, vsub)
141
if (len_remain) {
35
-DO_2OP(VMUL, vmul)
142
t0 = tcg_temp_new_i64();
36
+#define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
37
+
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
38
+DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
145
39
+DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
146
switch (len_remain) {
40
+DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
147
case 2:
41
DO_2OP(VMULH_S, vmulhs)
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
42
DO_2OP(VMULH_U, vmulhu)
149
if (sve_access_check(s)) {
43
DO_2OP(VRMULH_S, vrmulhs)
150
int size = vec_full_reg_size(s);
44
DO_2OP(VRMULH_U, vrmulhu)
151
int off = vec_full_reg_offset(s, a->rd);
45
-DO_2OP(VMAX_S, vmaxs)
152
- do_ldr(s, off, size, a->rn, a->imm * size);
46
-DO_2OP(VMAX_U, vmaxu)
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
47
-DO_2OP(VMIN_S, vmins)
154
}
48
-DO_2OP(VMIN_U, vminu)
155
return true;
49
+DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
156
}
50
+DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
51
+DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
158
if (sve_access_check(s)) {
52
+DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
159
int size = pred_full_reg_size(s);
53
DO_2OP(VABD_S, vabds)
160
int off = pred_full_reg_offset(s, a->rd);
54
DO_2OP(VABD_U, vabdu)
161
- do_ldr(s, off, size, a->rn, a->imm * size);
55
DO_2OP(VHADD_S, vhadds)
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
56
--
184
--
57
2.20.1
185
2.25.1
58
59
diff view generated by jsdifflib
1
Coverity points out that if the PDB file we're trying to read
1
From: Richard Henderson <richard.henderson@linaro.org>
2
has a header specifying a block_size of zero then we will
3
end up trying to divide by zero in pdb_ds_read_file().
4
Check for this and fail cleanly instead.
5
2
6
Fixes: Coverity CID 1458869
3
We can reuse the SVE functions for LDR and STR, passing in the
4
base of the ZA vector and a zero offset.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Tested-by: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
11
Message-id: 20210910170656.366592-3-philmd@redhat.com
12
Message-Id: <20210901143910.17112-3-peter.maydell@linaro.org>
13
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
14
---
10
---
15
contrib/elf2dmp/pdb.c | 4 ++++
11
target/arm/sme.decode | 7 +++++++
16
1 file changed, 4 insertions(+)
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
13
2 files changed, 31 insertions(+)
17
14
18
diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/contrib/elf2dmp/pdb.c
17
--- a/target/arm/sme.decode
21
+++ b/contrib/elf2dmp/pdb.c
18
+++ b/target/arm/sme.decode
22
@@ -XXX,XX +XXX,XX @@ out_symbols:
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
23
20
&ldst rs=%mova_rs
24
static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr)
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
25
{
22
&ldst esz=4 rs=%mova_rs
26
+ if (hdr->block_size == 0) {
23
+
27
+ return 1;
24
+&ldstr rv rn imm
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
26
+ &ldstr rv=%mova_rs
27
+
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-sme.c
33
+++ b/target/arm/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
tcg_temp_free_i64(addr);
36
return true;
37
}
38
+
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
40
+
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
42
+{
43
+ int svl = streaming_vec_reg_size(s);
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
46
+
47
+ if (!sme_za_enabled_check(s)) {
48
+ return true;
28
+ }
49
+ }
29
+
50
+
30
memset(r->file_used, 0, sizeof(r->file_used));
51
+ /* ZA[n] equates to ZA0H.B[n]. */
31
r->ds.header = hdr;
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
32
r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr +
53
+
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
55
+
56
+ tcg_temp_free_ptr(base);
57
+ return true;
58
+}
59
+
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
33
--
62
--
34
2.20.1
63
2.25.1
35
36
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
With Apple Silicon available to the masses, it's a good time to add support
4
for driving its virtualization extensions from QEMU.
5
6
This patch adds all necessary architecture specific code to get basic VMs
7
working, including save/restore.
8
9
Known limitations:
10
11
- WFI handling is missing (follows in later patch)
12
- No watchpoint/breakpoint support
13
14
Signed-off-by: Alexander Graf <agraf@csgraf.de>
15
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
16
Reviewed-by: Sergio Lopez <slp@redhat.com>
17
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
18
Message-id: 20210916155404.86958-5-agraf@csgraf.de
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
---
7
---
21
meson.build | 1 +
8
target/arm/helper-sme.h | 5 +++
22
include/sysemu/hvf_int.h | 10 +-
9
target/arm/sme.decode | 11 +++++
23
accel/hvf/hvf-accel-ops.c | 9 +
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
24
target/arm/hvf/hvf.c | 794 ++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 31 +++++++++++++
25
target/i386/hvf/hvf.c | 5 +
12
4 files changed, 137 insertions(+)
26
MAINTAINERS | 5 +
27
target/arm/hvf/trace-events | 10 +
28
7 files changed, 833 insertions(+), 1 deletion(-)
29
create mode 100644 target/arm/hvf/hvf.c
30
create mode 100644 target/arm/hvf/trace-events
31
13
32
diff --git a/meson.build b/meson.build
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
33
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
34
--- a/meson.build
16
--- a/target/arm/helper-sme.h
35
+++ b/meson.build
17
+++ b/target/arm/helper-sme.h
36
@@ -XXX,XX +XXX,XX @@ if have_system or have_user
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
37
'accel/tcg',
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
'hw/core',
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
'target/arm',
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+ 'target/arm/hvf',
22
+
41
'target/hppa',
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
42
'target/i386',
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
43
'target/i386/kvm',
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
44
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
45
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
46
--- a/include/sysemu/hvf_int.h
29
--- a/target/arm/sme.decode
47
+++ b/include/sysemu/hvf_int.h
30
+++ b/target/arm/sme.decode
48
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
49
#ifndef HVF_INT_H
32
50
#define HVF_INT_H
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
51
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
52
+#ifdef __aarch64__
35
+
53
+#include <Hypervisor/Hypervisor.h>
36
+### SME Add Vector to Array
54
+#else
37
+
55
#include <Hypervisor/hv.h>
38
+&adda zad zn pm pn
56
+#endif
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
57
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
58
/* hvf_slot flags */
41
+
59
#define HVF_SLOT_LOG (1 << 0)
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
60
@@ -XXX,XX +XXX,XX @@ struct HVFState {
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
61
int num_slots;
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
62
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
63
hvf_vcpu_caps *hvf_caps;
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
64
+ uint64_t vtimer_offset;
65
};
66
extern HVFState *hvf_state;
67
68
struct hvf_vcpu_state {
69
- int fd;
70
+ uint64_t fd;
71
+ void *exit;
72
+ bool vtimer_masked;
73
};
74
75
void assert_hvf_ok(hv_return_t ret);
76
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *);
77
hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
78
int hvf_put_registers(CPUState *);
79
int hvf_get_registers(CPUState *);
80
+void hvf_kick_vcpu_thread(CPUState *cpu);
81
82
#endif
83
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
84
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
85
--- a/accel/hvf/hvf-accel-ops.c
48
--- a/target/arm/sme_helper.c
86
+++ b/accel/hvf/hvf-accel-ops.c
49
+++ b/target/arm/sme_helper.c
87
@@ -XXX,XX +XXX,XX @@
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
88
51
DO_ST(q, _le, MO_128)
89
HVFState *hvf_state;
52
90
53
#undef DO_ST
91
+#ifdef __aarch64__
92
+#define HV_VM_DEFAULT NULL
93
+#endif
94
+
54
+
95
/* Memory slots */
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
96
56
+ void *vpm, uint32_t desc)
97
hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
57
+{
98
@@ -XXX,XX +XXX,XX @@ static int hvf_init_vcpu(CPUState *cpu)
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
99
pthread_sigmask(SIG_BLOCK, NULL, &set);
59
+ uint64_t *pn = vpn, *pm = vpm;
100
sigdelset(&set, SIG_IPI);
60
+ uint32_t *zda = vzda, *zn = vzn;
101
102
+#ifdef __aarch64__
103
+ r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
104
+#else
105
r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT);
106
+#endif
107
cpu->vcpu_dirty = 1;
108
assert_hvf_ok(r);
109
110
@@ -XXX,XX +XXX,XX @@ static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
111
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
112
113
ops->create_vcpu_thread = hvf_start_vcpu_thread;
114
+ ops->kick_vcpu_thread = hvf_kick_vcpu_thread;
115
116
ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
117
ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
118
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
119
new file mode 100644
120
index XXXXXXX..XXXXXXX
121
--- /dev/null
122
+++ b/target/arm/hvf/hvf.c
123
@@ -XXX,XX +XXX,XX @@
124
+/*
125
+ * QEMU Hypervisor.framework support for Apple Silicon
126
+
61
+
127
+ * Copyright 2020 Alexander Graf <agraf@csgraf.de>
62
+ for (row = 0; row < oprsz; ) {
128
+ *
63
+ uint64_t pa = pn[row >> 4];
129
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
64
+ do {
130
+ * See the COPYING file in the top-level directory.
65
+ if (pa & 1) {
131
+ *
66
+ for (col = 0; col < oprsz; ) {
132
+ */
67
+ uint64_t pb = pm[col >> 4];
133
+
68
+ do {
134
+#include "qemu/osdep.h"
69
+ if (pb & 1) {
135
+#include "qemu-common.h"
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
136
+#include "qemu/error-report.h"
71
+ }
137
+
72
+ pb >>= 4;
138
+#include "sysemu/runstate.h"
73
+ } while (++col & 15);
139
+#include "sysemu/hvf.h"
74
+ }
140
+#include "sysemu/hvf_int.h"
75
+ }
141
+#include "sysemu/hw_accel.h"
76
+ pa >>= 4;
142
+
77
+ } while (++row & 15);
143
+#include <mach/mach_time.h>
144
+
145
+#include "exec/address-spaces.h"
146
+#include "hw/irq.h"
147
+#include "qemu/main-loop.h"
148
+#include "sysemu/cpus.h"
149
+#include "target/arm/cpu.h"
150
+#include "target/arm/internals.h"
151
+#include "trace/trace-target_arm_hvf.h"
152
+#include "migration/vmstate.h"
153
+
154
+#define HVF_SYSREG(crn, crm, op0, op1, op2) \
155
+ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
156
+#define PL1_WRITE_MASK 0x4
157
+
158
+#define SYSREG(op0, op1, crn, crm, op2) \
159
+ ((op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (crm << 1))
160
+#define SYSREG_MASK SYSREG(0x3, 0x7, 0xf, 0xf, 0x7)
161
+#define SYSREG_OSLAR_EL1 SYSREG(2, 0, 1, 0, 4)
162
+#define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4)
163
+#define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4)
164
+#define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1)
165
+
166
+#define WFX_IS_WFE (1 << 0)
167
+
168
+#define TMR_CTL_ENABLE (1 << 0)
169
+#define TMR_CTL_IMASK (1 << 1)
170
+#define TMR_CTL_ISTATUS (1 << 2)
171
+
172
+typedef struct HVFVTimer {
173
+ /* Vtimer value during migration and paused state */
174
+ uint64_t vtimer_val;
175
+} HVFVTimer;
176
+
177
+static HVFVTimer vtimer;
178
+
179
+struct hvf_reg_match {
180
+ int reg;
181
+ uint64_t offset;
182
+};
183
+
184
+static const struct hvf_reg_match hvf_reg_match[] = {
185
+ { HV_REG_X0, offsetof(CPUARMState, xregs[0]) },
186
+ { HV_REG_X1, offsetof(CPUARMState, xregs[1]) },
187
+ { HV_REG_X2, offsetof(CPUARMState, xregs[2]) },
188
+ { HV_REG_X3, offsetof(CPUARMState, xregs[3]) },
189
+ { HV_REG_X4, offsetof(CPUARMState, xregs[4]) },
190
+ { HV_REG_X5, offsetof(CPUARMState, xregs[5]) },
191
+ { HV_REG_X6, offsetof(CPUARMState, xregs[6]) },
192
+ { HV_REG_X7, offsetof(CPUARMState, xregs[7]) },
193
+ { HV_REG_X8, offsetof(CPUARMState, xregs[8]) },
194
+ { HV_REG_X9, offsetof(CPUARMState, xregs[9]) },
195
+ { HV_REG_X10, offsetof(CPUARMState, xregs[10]) },
196
+ { HV_REG_X11, offsetof(CPUARMState, xregs[11]) },
197
+ { HV_REG_X12, offsetof(CPUARMState, xregs[12]) },
198
+ { HV_REG_X13, offsetof(CPUARMState, xregs[13]) },
199
+ { HV_REG_X14, offsetof(CPUARMState, xregs[14]) },
200
+ { HV_REG_X15, offsetof(CPUARMState, xregs[15]) },
201
+ { HV_REG_X16, offsetof(CPUARMState, xregs[16]) },
202
+ { HV_REG_X17, offsetof(CPUARMState, xregs[17]) },
203
+ { HV_REG_X18, offsetof(CPUARMState, xregs[18]) },
204
+ { HV_REG_X19, offsetof(CPUARMState, xregs[19]) },
205
+ { HV_REG_X20, offsetof(CPUARMState, xregs[20]) },
206
+ { HV_REG_X21, offsetof(CPUARMState, xregs[21]) },
207
+ { HV_REG_X22, offsetof(CPUARMState, xregs[22]) },
208
+ { HV_REG_X23, offsetof(CPUARMState, xregs[23]) },
209
+ { HV_REG_X24, offsetof(CPUARMState, xregs[24]) },
210
+ { HV_REG_X25, offsetof(CPUARMState, xregs[25]) },
211
+ { HV_REG_X26, offsetof(CPUARMState, xregs[26]) },
212
+ { HV_REG_X27, offsetof(CPUARMState, xregs[27]) },
213
+ { HV_REG_X28, offsetof(CPUARMState, xregs[28]) },
214
+ { HV_REG_X29, offsetof(CPUARMState, xregs[29]) },
215
+ { HV_REG_X30, offsetof(CPUARMState, xregs[30]) },
216
+ { HV_REG_PC, offsetof(CPUARMState, pc) },
217
+};
218
+
219
+static const struct hvf_reg_match hvf_fpreg_match[] = {
220
+ { HV_SIMD_FP_REG_Q0, offsetof(CPUARMState, vfp.zregs[0]) },
221
+ { HV_SIMD_FP_REG_Q1, offsetof(CPUARMState, vfp.zregs[1]) },
222
+ { HV_SIMD_FP_REG_Q2, offsetof(CPUARMState, vfp.zregs[2]) },
223
+ { HV_SIMD_FP_REG_Q3, offsetof(CPUARMState, vfp.zregs[3]) },
224
+ { HV_SIMD_FP_REG_Q4, offsetof(CPUARMState, vfp.zregs[4]) },
225
+ { HV_SIMD_FP_REG_Q5, offsetof(CPUARMState, vfp.zregs[5]) },
226
+ { HV_SIMD_FP_REG_Q6, offsetof(CPUARMState, vfp.zregs[6]) },
227
+ { HV_SIMD_FP_REG_Q7, offsetof(CPUARMState, vfp.zregs[7]) },
228
+ { HV_SIMD_FP_REG_Q8, offsetof(CPUARMState, vfp.zregs[8]) },
229
+ { HV_SIMD_FP_REG_Q9, offsetof(CPUARMState, vfp.zregs[9]) },
230
+ { HV_SIMD_FP_REG_Q10, offsetof(CPUARMState, vfp.zregs[10]) },
231
+ { HV_SIMD_FP_REG_Q11, offsetof(CPUARMState, vfp.zregs[11]) },
232
+ { HV_SIMD_FP_REG_Q12, offsetof(CPUARMState, vfp.zregs[12]) },
233
+ { HV_SIMD_FP_REG_Q13, offsetof(CPUARMState, vfp.zregs[13]) },
234
+ { HV_SIMD_FP_REG_Q14, offsetof(CPUARMState, vfp.zregs[14]) },
235
+ { HV_SIMD_FP_REG_Q15, offsetof(CPUARMState, vfp.zregs[15]) },
236
+ { HV_SIMD_FP_REG_Q16, offsetof(CPUARMState, vfp.zregs[16]) },
237
+ { HV_SIMD_FP_REG_Q17, offsetof(CPUARMState, vfp.zregs[17]) },
238
+ { HV_SIMD_FP_REG_Q18, offsetof(CPUARMState, vfp.zregs[18]) },
239
+ { HV_SIMD_FP_REG_Q19, offsetof(CPUARMState, vfp.zregs[19]) },
240
+ { HV_SIMD_FP_REG_Q20, offsetof(CPUARMState, vfp.zregs[20]) },
241
+ { HV_SIMD_FP_REG_Q21, offsetof(CPUARMState, vfp.zregs[21]) },
242
+ { HV_SIMD_FP_REG_Q22, offsetof(CPUARMState, vfp.zregs[22]) },
243
+ { HV_SIMD_FP_REG_Q23, offsetof(CPUARMState, vfp.zregs[23]) },
244
+ { HV_SIMD_FP_REG_Q24, offsetof(CPUARMState, vfp.zregs[24]) },
245
+ { HV_SIMD_FP_REG_Q25, offsetof(CPUARMState, vfp.zregs[25]) },
246
+ { HV_SIMD_FP_REG_Q26, offsetof(CPUARMState, vfp.zregs[26]) },
247
+ { HV_SIMD_FP_REG_Q27, offsetof(CPUARMState, vfp.zregs[27]) },
248
+ { HV_SIMD_FP_REG_Q28, offsetof(CPUARMState, vfp.zregs[28]) },
249
+ { HV_SIMD_FP_REG_Q29, offsetof(CPUARMState, vfp.zregs[29]) },
250
+ { HV_SIMD_FP_REG_Q30, offsetof(CPUARMState, vfp.zregs[30]) },
251
+ { HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) },
252
+};
253
+
254
+struct hvf_sreg_match {
255
+ int reg;
256
+ uint32_t key;
257
+ uint32_t cp_idx;
258
+};
259
+
260
+static struct hvf_sreg_match hvf_sreg_match[] = {
261
+ { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) },
262
+ { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) },
263
+ { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) },
264
+ { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) },
265
+
266
+ { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) },
267
+ { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) },
268
+ { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) },
269
+ { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) },
270
+
271
+ { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) },
272
+ { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) },
273
+ { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) },
274
+ { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) },
275
+
276
+ { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) },
277
+ { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) },
278
+ { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) },
279
+ { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) },
280
+
281
+ { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) },
282
+ { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) },
283
+ { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) },
284
+ { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) },
285
+
286
+ { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) },
287
+ { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) },
288
+ { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) },
289
+ { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) },
290
+
291
+ { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) },
292
+ { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) },
293
+ { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) },
294
+ { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) },
295
+
296
+ { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) },
297
+ { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) },
298
+ { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) },
299
+ { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) },
300
+
301
+ { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) },
302
+ { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) },
303
+ { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) },
304
+ { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) },
305
+
306
+ { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) },
307
+ { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) },
308
+ { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) },
309
+ { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) },
310
+
311
+ { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) },
312
+ { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) },
313
+ { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) },
314
+ { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) },
315
+
316
+ { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) },
317
+ { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) },
318
+ { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) },
319
+ { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) },
320
+
321
+ { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) },
322
+ { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) },
323
+ { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) },
324
+ { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) },
325
+
326
+ { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) },
327
+ { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) },
328
+ { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) },
329
+ { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) },
330
+
331
+ { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) },
332
+ { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) },
333
+ { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) },
334
+ { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) },
335
+
336
+ { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) },
337
+ { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) },
338
+ { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) },
339
+ { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) },
340
+
341
+#ifdef SYNC_NO_RAW_REGS
342
+ /*
343
+ * The registers below are manually synced on init because they are
344
+ * marked as NO_RAW. We still list them to make number space sync easier.
345
+ */
346
+ { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) },
347
+ { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) },
348
+ { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
349
+ { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
350
+#endif
351
+ { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) },
352
+ { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
353
+ { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
354
+ { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
355
+ { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) },
356
+#ifdef SYNC_NO_MMFR0
357
+ /* We keep the hardware MMFR0 around. HW limits are there anyway */
358
+ { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) },
359
+#endif
360
+ { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) },
361
+ { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) },
362
+
363
+ { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) },
364
+ { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) },
365
+ { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) },
366
+ { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) },
367
+ { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) },
368
+ { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) },
369
+
370
+ { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) },
371
+ { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) },
372
+ { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) },
373
+ { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) },
374
+ { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) },
375
+ { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) },
376
+ { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) },
377
+ { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) },
378
+ { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) },
379
+ { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) },
380
+
381
+ { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) },
382
+ { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) },
383
+ { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) },
384
+ { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) },
385
+ { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) },
386
+ { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) },
387
+ { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) },
388
+ { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) },
389
+ { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) },
390
+ { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) },
391
+ { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) },
392
+ { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) },
393
+ { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) },
394
+ { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) },
395
+ { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) },
396
+ { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) },
397
+ { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) },
398
+ { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) },
399
+ { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) },
400
+ { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) },
401
+};
402
+
403
+int hvf_get_registers(CPUState *cpu)
404
+{
405
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
406
+ CPUARMState *env = &arm_cpu->env;
407
+ hv_return_t ret;
408
+ uint64_t val;
409
+ hv_simd_fp_uchar16_t fpval;
410
+ int i;
411
+
412
+ for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
413
+ ret = hv_vcpu_get_reg(cpu->hvf->fd, hvf_reg_match[i].reg, &val);
414
+ *(uint64_t *)((void *)env + hvf_reg_match[i].offset) = val;
415
+ assert_hvf_ok(ret);
416
+ }
417
+
418
+ for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
419
+ ret = hv_vcpu_get_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg,
420
+ &fpval);
421
+ memcpy((void *)env + hvf_fpreg_match[i].offset, &fpval, sizeof(fpval));
422
+ assert_hvf_ok(ret);
423
+ }
424
+
425
+ val = 0;
426
+ ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPCR, &val);
427
+ assert_hvf_ok(ret);
428
+ vfp_set_fpcr(env, val);
429
+
430
+ val = 0;
431
+ ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPSR, &val);
432
+ assert_hvf_ok(ret);
433
+ vfp_set_fpsr(env, val);
434
+
435
+ ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_CPSR, &val);
436
+ assert_hvf_ok(ret);
437
+ pstate_write(env, val);
438
+
439
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
440
+ if (hvf_sreg_match[i].cp_idx == -1) {
441
+ continue;
442
+ }
443
+
444
+ ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, &val);
445
+ assert_hvf_ok(ret);
446
+
447
+ arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
448
+ }
449
+ assert(write_list_to_cpustate(arm_cpu));
450
+
451
+ aarch64_restore_sp(env, arm_current_el(env));
452
+
453
+ return 0;
454
+}
455
+
456
+int hvf_put_registers(CPUState *cpu)
457
+{
458
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
459
+ CPUARMState *env = &arm_cpu->env;
460
+ hv_return_t ret;
461
+ uint64_t val;
462
+ hv_simd_fp_uchar16_t fpval;
463
+ int i;
464
+
465
+ for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
466
+ val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
467
+ ret = hv_vcpu_set_reg(cpu->hvf->fd, hvf_reg_match[i].reg, val);
468
+ assert_hvf_ok(ret);
469
+ }
470
+
471
+ for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
472
+ memcpy(&fpval, (void *)env + hvf_fpreg_match[i].offset, sizeof(fpval));
473
+ ret = hv_vcpu_set_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg,
474
+ fpval);
475
+ assert_hvf_ok(ret);
476
+ }
477
+
478
+ ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPCR, vfp_get_fpcr(env));
479
+ assert_hvf_ok(ret);
480
+
481
+ ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPSR, vfp_get_fpsr(env));
482
+ assert_hvf_ok(ret);
483
+
484
+ ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_CPSR, pstate_read(env));
485
+ assert_hvf_ok(ret);
486
+
487
+ aarch64_save_sp(env, arm_current_el(env));
488
+
489
+ assert(write_cpustate_to_list(arm_cpu, false));
490
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
491
+ if (hvf_sreg_match[i].cp_idx == -1) {
492
+ continue;
493
+ }
494
+
495
+ val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx];
496
+ ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, val);
497
+ assert_hvf_ok(ret);
498
+ }
499
+
500
+ ret = hv_vcpu_set_vtimer_offset(cpu->hvf->fd, hvf_state->vtimer_offset);
501
+ assert_hvf_ok(ret);
502
+
503
+ return 0;
504
+}
505
+
506
+static void flush_cpu_state(CPUState *cpu)
507
+{
508
+ if (cpu->vcpu_dirty) {
509
+ hvf_put_registers(cpu);
510
+ cpu->vcpu_dirty = false;
511
+ }
78
+ }
512
+}
79
+}
513
+
80
+
514
+static void hvf_set_reg(CPUState *cpu, int rt, uint64_t val)
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
515
+{
83
+{
516
+ hv_return_t r;
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
517
+
87
+
518
+ flush_cpu_state(cpu);
88
+ for (row = 0; row < oprsz; ++row) {
519
+
89
+ if (pn[H1(row)] & 1) {
520
+ if (rt < 31) {
90
+ for (col = 0; col < oprsz; ++col) {
521
+ r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_X0 + rt, val);
91
+ if (pm[H1(col)] & 1) {
522
+ assert_hvf_ok(r);
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
523
+ }
96
+ }
524
+}
97
+}
525
+
98
+
526
+static uint64_t hvf_get_reg(CPUState *cpu, int rt)
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
527
+{
101
+{
528
+ uint64_t val = 0;
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
529
+ hv_return_t r;
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
530
+
105
+
531
+ flush_cpu_state(cpu);
106
+ for (row = 0; row < oprsz; ) {
532
+
107
+ uint64_t pa = pn[row >> 4];
533
+ if (rt < 31) {
108
+ do {
534
+ r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_X0 + rt, &val);
109
+ if (pa & 1) {
535
+ assert_hvf_ok(r);
110
+ uint32_t zn_row = zn[H4(row)];
536
+ }
111
+ for (col = 0; col < oprsz; ) {
537
+
112
+ uint64_t pb = pm[col >> 4];
538
+ return val;
113
+ do {
539
+}
114
+ if (pb & 1) {
540
+
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
541
+void hvf_arch_vcpu_destroy(CPUState *cpu)
116
+ }
542
+{
117
+ pb >>= 4;
543
+}
118
+ } while (++col & 15);
544
+
119
+ }
545
+int hvf_arch_init_vcpu(CPUState *cpu)
120
+ }
546
+{
121
+ pa >>= 4;
547
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
122
+ } while (++row & 15);
548
+ CPUARMState *env = &arm_cpu->env;
549
+ uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match);
550
+ uint32_t sregs_cnt = 0;
551
+ uint64_t pfr;
552
+ hv_return_t ret;
553
+ int i;
554
+
555
+ env->aarch64 = 1;
556
+ asm volatile("mrs %0, cntfrq_el0" : "=r"(arm_cpu->gt_cntfrq_hz));
557
+
558
+ /* Allocate enough space for our sysreg sync */
559
+ arm_cpu->cpreg_indexes = g_renew(uint64_t, arm_cpu->cpreg_indexes,
560
+ sregs_match_len);
561
+ arm_cpu->cpreg_values = g_renew(uint64_t, arm_cpu->cpreg_values,
562
+ sregs_match_len);
563
+ arm_cpu->cpreg_vmstate_indexes = g_renew(uint64_t,
564
+ arm_cpu->cpreg_vmstate_indexes,
565
+ sregs_match_len);
566
+ arm_cpu->cpreg_vmstate_values = g_renew(uint64_t,
567
+ arm_cpu->cpreg_vmstate_values,
568
+ sregs_match_len);
569
+
570
+ memset(arm_cpu->cpreg_values, 0, sregs_match_len * sizeof(uint64_t));
571
+
572
+ /* Populate cp list for all known sysregs */
573
+ for (i = 0; i < sregs_match_len; i++) {
574
+ const ARMCPRegInfo *ri;
575
+ uint32_t key = hvf_sreg_match[i].key;
576
+
577
+ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
578
+ if (ri) {
579
+ assert(!(ri->type & ARM_CP_NO_RAW));
580
+ hvf_sreg_match[i].cp_idx = sregs_cnt;
581
+ arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key);
582
+ } else {
583
+ hvf_sreg_match[i].cp_idx = -1;
584
+ }
585
+ }
586
+ arm_cpu->cpreg_array_len = sregs_cnt;
587
+ arm_cpu->cpreg_vmstate_array_len = sregs_cnt;
588
+
589
+ assert(write_cpustate_to_list(arm_cpu, false));
590
+
591
+ /* Set CP_NO_RAW system registers on init */
592
+ ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MIDR_EL1,
593
+ arm_cpu->midr);
594
+ assert_hvf_ok(ret);
595
+
596
+ ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MPIDR_EL1,
597
+ arm_cpu->mp_affinity);
598
+ assert_hvf_ok(ret);
599
+
600
+ ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, &pfr);
601
+ assert_hvf_ok(ret);
602
+ pfr |= env->gicv3state ? (1 << 24) : 0;
603
+ ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, pfr);
604
+ assert_hvf_ok(ret);
605
+
606
+ /* We're limited to underlying hardware caps, override internal versions */
607
+ ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64MMFR0_EL1,
608
+ &arm_cpu->isar.id_aa64mmfr0);
609
+ assert_hvf_ok(ret);
610
+
611
+ return 0;
612
+}
613
+
614
+void hvf_kick_vcpu_thread(CPUState *cpu)
615
+{
616
+ hv_vcpus_exit(&cpu->hvf->fd, 1);
617
+}
618
+
619
+static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
620
+ uint32_t syndrome)
621
+{
622
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
623
+ CPUARMState *env = &arm_cpu->env;
624
+
625
+ cpu->exception_index = excp;
626
+ env->exception.target_el = 1;
627
+ env->exception.syndrome = syndrome;
628
+
629
+ arm_cpu_do_interrupt(cpu);
630
+}
631
+
632
+static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
633
+{
634
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
635
+ CPUARMState *env = &arm_cpu->env;
636
+ uint64_t val = 0;
637
+
638
+ switch (reg) {
639
+ case SYSREG_CNTPCT_EL0:
640
+ val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
641
+ gt_cntfrq_period_ns(arm_cpu);
642
+ break;
643
+ case SYSREG_OSLSR_EL1:
644
+ val = env->cp15.oslsr_el1;
645
+ break;
646
+ case SYSREG_OSDLR_EL1:
647
+ /* Dummy register */
648
+ break;
649
+ default:
650
+ cpu_synchronize_state(cpu);
651
+ trace_hvf_unhandled_sysreg_read(env->pc, reg,
652
+ (reg >> 20) & 0x3,
653
+ (reg >> 14) & 0x7,
654
+ (reg >> 10) & 0xf,
655
+ (reg >> 1) & 0xf,
656
+ (reg >> 17) & 0x7);
657
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
658
+ return 1;
659
+ }
660
+
661
+ trace_hvf_sysreg_read(reg,
662
+ (reg >> 20) & 0x3,
663
+ (reg >> 14) & 0x7,
664
+ (reg >> 10) & 0xf,
665
+ (reg >> 1) & 0xf,
666
+ (reg >> 17) & 0x7,
667
+ val);
668
+ hvf_set_reg(cpu, rt, val);
669
+
670
+ return 0;
671
+}
672
+
673
+static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
674
+{
675
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
676
+ CPUARMState *env = &arm_cpu->env;
677
+
678
+ trace_hvf_sysreg_write(reg,
679
+ (reg >> 20) & 0x3,
680
+ (reg >> 14) & 0x7,
681
+ (reg >> 10) & 0xf,
682
+ (reg >> 1) & 0xf,
683
+ (reg >> 17) & 0x7,
684
+ val);
685
+
686
+ switch (reg) {
687
+ case SYSREG_OSLAR_EL1:
688
+ env->cp15.oslsr_el1 = val & 1;
689
+ break;
690
+ case SYSREG_OSDLR_EL1:
691
+ /* Dummy register */
692
+ break;
693
+ default:
694
+ cpu_synchronize_state(cpu);
695
+ trace_hvf_unhandled_sysreg_write(env->pc, reg,
696
+ (reg >> 20) & 0x3,
697
+ (reg >> 14) & 0x7,
698
+ (reg >> 10) & 0xf,
699
+ (reg >> 1) & 0xf,
700
+ (reg >> 17) & 0x7);
701
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
702
+ return 1;
703
+ }
704
+
705
+ return 0;
706
+}
707
+
708
+static int hvf_inject_interrupts(CPUState *cpu)
709
+{
710
+ if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) {
711
+ trace_hvf_inject_fiq();
712
+ hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_FIQ,
713
+ true);
714
+ }
715
+
716
+ if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
717
+ trace_hvf_inject_irq();
718
+ hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_IRQ,
719
+ true);
720
+ }
721
+
722
+ return 0;
723
+}
724
+
725
+static uint64_t hvf_vtimer_val_raw(void)
726
+{
727
+ /*
728
+ * mach_absolute_time() returns the vtimer value without the VM
729
+ * offset that we define. Add our own offset on top.
730
+ */
731
+ return mach_absolute_time() - hvf_state->vtimer_offset;
732
+}
733
+
734
+static void hvf_sync_vtimer(CPUState *cpu)
735
+{
736
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
737
+ hv_return_t r;
738
+ uint64_t ctl;
739
+ bool irq_state;
740
+
741
+ if (!cpu->hvf->vtimer_masked) {
742
+ /* We will get notified on vtimer changes by hvf, nothing to do */
743
+ return;
744
+ }
745
+
746
+ r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
747
+ assert_hvf_ok(r);
748
+
749
+ irq_state = (ctl & (TMR_CTL_ENABLE | TMR_CTL_IMASK | TMR_CTL_ISTATUS)) ==
750
+ (TMR_CTL_ENABLE | TMR_CTL_ISTATUS);
751
+ qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], irq_state);
752
+
753
+ if (!irq_state) {
754
+ /* Timer no longer asserting, we can unmask it */
755
+ hv_vcpu_set_vtimer_mask(cpu->hvf->fd, false);
756
+ cpu->hvf->vtimer_masked = false;
757
+ }
123
+ }
758
+}
124
+}
759
+
125
+
760
+int hvf_vcpu_exec(CPUState *cpu)
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
127
+ void *vpm, uint32_t desc)
761
+{
128
+{
762
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
763
+ CPUARMState *env = &arm_cpu->env;
130
+ uint8_t *pn = vpn, *pm = vpm;
764
+ hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit;
131
+ uint64_t *zda = vzda, *zn = vzn;
765
+ hv_return_t r;
766
+ bool advance_pc = false;
767
+
132
+
768
+ if (hvf_inject_interrupts(cpu)) {
133
+ for (row = 0; row < oprsz; ++row) {
769
+ return EXCP_INTERRUPT;
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
770
+ }
162
+ }
771
+
163
+
772
+ if (cpu->halted) {
164
+ /* Sum XZR+zad to find ZAd. */
773
+ return EXCP_HLT;
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
774
+ }
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
775
+
169
+
776
+ flush_cpu_state(cpu);
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
777
+
171
+
778
+ qemu_mutex_unlock_iothread();
172
+ tcg_temp_free_ptr(za);
779
+ assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
173
+ tcg_temp_free_ptr(zn);
780
+
174
+ tcg_temp_free_ptr(pn);
781
+ /* handle VMEXIT */
175
+ tcg_temp_free_ptr(pm);
782
+ uint64_t exit_reason = hvf_exit->reason;
176
+ return true;
783
+ uint64_t syndrome = hvf_exit->exception.syndrome;
784
+ uint32_t ec = syn_get_ec(syndrome);
785
+
786
+ qemu_mutex_lock_iothread();
787
+ switch (exit_reason) {
788
+ case HV_EXIT_REASON_EXCEPTION:
789
+ /* This is the main one, handle below. */
790
+ break;
791
+ case HV_EXIT_REASON_VTIMER_ACTIVATED:
792
+ qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
793
+ cpu->hvf->vtimer_masked = true;
794
+ return 0;
795
+ case HV_EXIT_REASON_CANCELED:
796
+ /* we got kicked, no exit to process */
797
+ return 0;
798
+ default:
799
+ assert(0);
800
+ }
801
+
802
+ hvf_sync_vtimer(cpu);
803
+
804
+ switch (ec) {
805
+ case EC_DATAABORT: {
806
+ bool isv = syndrome & ARM_EL_ISV;
807
+ bool iswrite = (syndrome >> 6) & 1;
808
+ bool s1ptw = (syndrome >> 7) & 1;
809
+ uint32_t sas = (syndrome >> 22) & 3;
810
+ uint32_t len = 1 << sas;
811
+ uint32_t srt = (syndrome >> 16) & 0x1f;
812
+ uint64_t val = 0;
813
+
814
+ trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address,
815
+ hvf_exit->exception.physical_address, isv,
816
+ iswrite, s1ptw, len, srt);
817
+
818
+ assert(isv);
819
+
820
+ if (iswrite) {
821
+ val = hvf_get_reg(cpu, srt);
822
+ address_space_write(&address_space_memory,
823
+ hvf_exit->exception.physical_address,
824
+ MEMTXATTRS_UNSPECIFIED, &val, len);
825
+ } else {
826
+ address_space_read(&address_space_memory,
827
+ hvf_exit->exception.physical_address,
828
+ MEMTXATTRS_UNSPECIFIED, &val, len);
829
+ hvf_set_reg(cpu, srt, val);
830
+ }
831
+
832
+ advance_pc = true;
833
+ break;
834
+ }
835
+ case EC_SYSTEMREGISTERTRAP: {
836
+ bool isread = (syndrome >> 0) & 1;
837
+ uint32_t rt = (syndrome >> 5) & 0x1f;
838
+ uint32_t reg = syndrome & SYSREG_MASK;
839
+ uint64_t val;
840
+ int ret = 0;
841
+
842
+ if (isread) {
843
+ ret = hvf_sysreg_read(cpu, reg, rt);
844
+ } else {
845
+ val = hvf_get_reg(cpu, rt);
846
+ ret = hvf_sysreg_write(cpu, reg, val);
847
+ }
848
+
849
+ advance_pc = !ret;
850
+ break;
851
+ }
852
+ case EC_WFX_TRAP:
853
+ advance_pc = true;
854
+ break;
855
+ case EC_AA64_HVC:
856
+ cpu_synchronize_state(cpu);
857
+ trace_hvf_unknown_hvc(env->xregs[0]);
858
+ /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
859
+ env->xregs[0] = -1;
860
+ break;
861
+ case EC_AA64_SMC:
862
+ cpu_synchronize_state(cpu);
863
+ trace_hvf_unknown_smc(env->xregs[0]);
864
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
865
+ break;
866
+ default:
867
+ cpu_synchronize_state(cpu);
868
+ trace_hvf_exit(syndrome, ec, env->pc);
869
+ error_report("0x%llx: unhandled exception ec=0x%x", env->pc, ec);
870
+ }
871
+
872
+ if (advance_pc) {
873
+ uint64_t pc;
874
+
875
+ flush_cpu_state(cpu);
876
+
877
+ r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_PC, &pc);
878
+ assert_hvf_ok(r);
879
+ pc += 4;
880
+ r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_PC, pc);
881
+ assert_hvf_ok(r);
882
+ }
883
+
884
+ return 0;
885
+}
177
+}
886
+
178
+
887
+static const VMStateDescription vmstate_hvf_vtimer = {
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
888
+ .name = "hvf-vtimer",
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
889
+ .version_id = 1,
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
890
+ .minimum_version_id = 1,
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
891
+ .fields = (VMStateField[]) {
892
+ VMSTATE_UINT64(vtimer_val, HVFVTimer),
893
+ VMSTATE_END_OF_LIST()
894
+ },
895
+};
896
+
897
+static void hvf_vm_state_change(void *opaque, bool running, RunState state)
898
+{
899
+ HVFVTimer *s = opaque;
900
+
901
+ if (running) {
902
+ /* Update vtimer offset on all CPUs */
903
+ hvf_state->vtimer_offset = mach_absolute_time() - s->vtimer_val;
904
+ cpu_synchronize_all_states();
905
+ } else {
906
+ /* Remember vtimer value on every pause */
907
+ s->vtimer_val = hvf_vtimer_val_raw();
908
+ }
909
+}
910
+
911
+int hvf_arch_init(void)
912
+{
913
+ hvf_state->vtimer_offset = mach_absolute_time();
914
+ vmstate_register(NULL, 0, &vmstate_hvf_vtimer, &vtimer);
915
+ qemu_add_vm_change_state_handler(hvf_vm_state_change, &vtimer);
916
+ return 0;
917
+}
918
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
919
index XXXXXXX..XXXXXXX 100644
920
--- a/target/i386/hvf/hvf.c
921
+++ b/target/i386/hvf/hvf.c
922
@@ -XXX,XX +XXX,XX @@ static inline bool apic_bus_freq_is_known(CPUX86State *env)
923
return env->apic_bus_freq != 0;
924
}
925
926
+void hvf_kick_vcpu_thread(CPUState *cpu)
927
+{
928
+ cpus_kick_thread(cpu);
929
+}
930
+
931
int hvf_arch_init(void)
932
{
933
return 0;
934
diff --git a/MAINTAINERS b/MAINTAINERS
935
index XXXXXXX..XXXXXXX 100644
936
--- a/MAINTAINERS
937
+++ b/MAINTAINERS
938
@@ -XXX,XX +XXX,XX @@ F: accel/accel-*.c
939
F: accel/Makefile.objs
940
F: accel/stubs/Makefile.objs
941
942
+Apple Silicon HVF CPUs
943
+M: Alexander Graf <agraf@csgraf.de>
944
+S: Maintained
945
+F: target/arm/hvf/
946
+
947
X86 HVF CPUs
948
M: Cameron Esfahani <dirty@apple.com>
949
M: Roman Bolshakov <r.bolshakov@yadro.com>
950
diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
951
new file mode 100644
952
index XXXXXXX..XXXXXXX
953
--- /dev/null
954
+++ b/target/arm/hvf/trace-events
955
@@ -XXX,XX +XXX,XX @@
956
+hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg read at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
957
+hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
958
+hvf_inject_fiq(void) "injecting FIQ"
959
+hvf_inject_irq(void) "injecting IRQ"
960
+hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
961
+hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64
962
+hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")"
963
+hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
964
+hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
965
+hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
966
--
183
--
967
2.20.1
184
2.25.1
968
969
diff view generated by jsdifflib
1
Optimize the MVE VSHLL insns by using TCG vector ops when possible.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
This includes the VMOVL insn, which we handle in mve.decode as "VSHLL
3
with zero shift count".
4
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Message-id: 20210913095440.13462-11-peter.maydell@linaro.org
8
---
7
---
9
target/arm/translate-mve.c | 67 +++++++++++++++++++++++++++++++++-----
8
target/arm/helper-sme.h | 5 +++
10
1 file changed, 59 insertions(+), 8 deletions(-)
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
11
13
12
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-mve.c
16
--- a/target/arm/helper-sme.h
15
+++ b/target/arm/translate-mve.c
17
+++ b/target/arm/helper-sme.h
16
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
17
DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
18
DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
-#define DO_VSHLL(INSN, FN) \
22
+
21
- static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
22
- { \
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
23
- static MVEGenTwoOpShiftFn * const fns[] = { \
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
24
- gen_helper_mve_##FN##b, \
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
- gen_helper_mve_##FN##h, \
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
26
- }; \
28
index XXXXXXX..XXXXXXX 100644
27
- return do_2shift(s, a, fns[a->size], false); \
29
--- a/target/arm/sme.decode
28
+#define DO_VSHLL(INSN, FN) \
30
+++ b/target/arm/sme.decode
29
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
30
+ { \
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
31
+ static MVEGenTwoOpShiftFn * const fns[] = { \
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
32
+ gen_helper_mve_##FN##b, \
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
33
+ gen_helper_mve_##FN##h, \
35
+
34
+ }; \
36
+### SME Outer Product
35
+ return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
36
}
58
}
37
59
}
38
+/*
60
+
39
+ * For the VSHLL vector helpers, the vece is the size of the input
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
40
+ * (ie MO_8 or MO_16); the helpers want to work in the output size.
62
+ void *vpm, void *vst, uint32_t desc)
41
+ * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
42
+ */
43
+static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
44
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
45
+{
63
+{
46
+ unsigned ovece = vece + 1;
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
47
+ unsigned ibits = vece == MO_8 ? 8 : 16;
65
+ uint32_t neg = simd_data(desc) << 31;
48
+ tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
66
+ uint16_t *pn = vpn, *pm = vpm;
49
+ tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
67
+ float_status fpst;
50
+}
51
+
68
+
52
+static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
69
+ /*
53
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
70
+ * Make a copy of float_status because this operation does not
54
+{
71
+ * update the cumulative fp exception status. It also produces
55
+ unsigned ovece = vece + 1;
72
+ * default nans.
56
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
73
+ */
57
+ ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
74
+ fpst = *(float_status *)vst;
58
+ tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
75
+ set_default_nan_mode(true, &fpst);
59
+}
60
+
76
+
61
+static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
77
+ for (row = 0; row < oprsz; ) {
62
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
78
+ uint16_t pa = pn[H2(row >> 4)];
63
+{
79
+ do {
64
+ unsigned ovece = vece + 1;
80
+ if (pa & 1) {
65
+ unsigned ibits = vece == MO_8 ? 8 : 16;
81
+ void *vza_row = vza + tile_vslice_offset(row);
66
+ if (shift == 0) {
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
67
+ tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
83
+
68
+ } else {
84
+ for (col = 0; col < oprsz; ) {
69
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
85
+ uint16_t pb = pm[H2(col >> 4)];
70
+ ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
86
+ do {
71
+ tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
72
+ }
100
+ }
73
+}
101
+}
74
+
102
+
75
+static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
76
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
104
+ void *vpm, void *vst, uint32_t desc)
77
+{
105
+{
78
+ unsigned ovece = vece + 1;
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
79
+ unsigned ibits = vece == MO_8 ? 8 : 16;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
80
+ if (shift == 0) {
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
81
+ tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
109
+ uint8_t *pn = vpn, *pm = vpm;
82
+ } else {
110
+ float_status fpst = *(float_status *)vst;
83
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
111
+
84
+ ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
112
+ set_default_nan_mode(true, &fpst);
85
+ tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
86
+ }
126
+ }
87
+}
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
88
+
136
+
89
DO_VSHLL(VSHLL_BS, vshllbs)
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
90
DO_VSHLL(VSHLL_BU, vshllbu)
138
+ gen_helper_gvec_5_ptr *fn)
91
DO_VSHLL(VSHLL_TS, vshllts)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
92
--
168
--
93
2.20.1
169
2.25.1
94
95
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We need to handle PSCI calls. Most of the TCG code works for us,
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
but we can simplify it to only handle aa64 mode and we need to
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
5
handle SUSPEND differently.
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
6
13
7
This patch takes the TCG code as template and duplicates it in HVF.
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
8
9
To tell the guest that we support PSCI 0.2 now, update the check in
10
arm_cpu_initfn() as well.
11
12
Signed-off-by: Alexander Graf <agraf@csgraf.de>
13
Reviewed-by: Sergio Lopez <slp@redhat.com>
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
15
Message-id: 20210916155404.86958-8-agraf@csgraf.de
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
target/arm/cpu.c | 4 +-
19
target/arm/hvf/hvf.c | 141 ++++++++++++++++++++++++++++++++++--
20
target/arm/hvf/trace-events | 1 +
21
3 files changed, 139 insertions(+), 7 deletions(-)
22
23
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
24
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/cpu.c
16
--- a/target/arm/helper-sme.h
26
+++ b/target/arm/cpu.c
17
+++ b/target/arm/helper-sme.h
27
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_initfn(Object *obj)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
28
cpu->psci_version = 1; /* By default assume PSCI v0.1 */
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
29
cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
30
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
31
- if (tcg_enabled()) {
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
32
- cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
33
+ if (tcg_enabled() || hvf_enabled()) {
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
34
+ cpu->psci_version = 2; /* TCG and HVF implement PSCI 0.2 */
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
35
}
40
}
36
}
41
}
37
38
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/hvf/hvf.c
41
+++ b/target/arm/hvf/hvf.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "hw/irq.h"
44
#include "qemu/main-loop.h"
45
#include "sysemu/cpus.h"
46
+#include "arm-powerctl.h"
47
#include "target/arm/cpu.h"
48
#include "target/arm/internals.h"
49
#include "trace/trace-target_arm_hvf.h"
50
@@ -XXX,XX +XXX,XX @@
51
#define TMR_CTL_IMASK (1 << 1)
52
#define TMR_CTL_ISTATUS (1 << 2)
53
54
+static void hvf_wfi(CPUState *cpu);
55
+
42
+
56
typedef struct HVFVTimer {
43
+/*
57
/* Vtimer value during migration and paused state */
44
+ * Alter PAIR as needed for controlling predicates being false,
58
uint64_t vtimer_val;
45
+ * and for NEG on an enabled row element.
59
@@ -XXX,XX +XXX,XX @@ static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
46
+ */
60
arm_cpu_do_interrupt(cpu);
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
61
}
62
63
+static void hvf_psci_cpu_off(ARMCPU *arm_cpu)
64
+{
48
+{
65
+ int32_t ret = arm_set_cpu_off(arm_cpu->mp_affinity);
49
+ /*
66
+ assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
50
+ * The pseudocode uses a conditional negate after the conditional zero.
51
+ * It is simpler here to unconditionally negate before conditional zero.
52
+ */
53
+ pair ^= neg;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
67
+}
61
+}
68
+
62
+
69
+/*
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
70
+ * Handle a PSCI call.
64
+ void *vpm, uint32_t desc)
71
+ *
72
+ * Returns 0 on success
73
+ * -1 when the PSCI call is unknown,
74
+ */
75
+static bool hvf_handle_psci_call(CPUState *cpu)
76
+{
65
+{
77
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
78
+ CPUARMState *env = &arm_cpu->env;
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
79
+ uint64_t param[4] = {
68
+ uint16_t *pn = vpn, *pm = vpm;
80
+ env->xregs[0],
81
+ env->xregs[1],
82
+ env->xregs[2],
83
+ env->xregs[3]
84
+ };
85
+ uint64_t context_id, mpidr;
86
+ bool target_aarch64 = true;
87
+ CPUState *target_cpu_state;
88
+ ARMCPU *target_cpu;
89
+ target_ulong entry;
90
+ int target_el = 1;
91
+ int32_t ret = 0;
92
+
69
+
93
+ trace_hvf_psci_call(param[0], param[1], param[2], param[3],
70
+ for (row = 0; row < oprsz; ) {
94
+ arm_cpu->mp_affinity);
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
95
+
75
+
96
+ switch (param[0]) {
76
+ n = f16mop_adj_pair(n, prow, neg);
97
+ case QEMU_PSCI_0_2_FN_PSCI_VERSION:
98
+ ret = QEMU_PSCI_0_2_RET_VERSION_0_2;
99
+ break;
100
+ case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
101
+ ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */
102
+ break;
103
+ case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
104
+ case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
105
+ mpidr = param[1];
106
+
77
+
107
+ switch (param[2]) {
78
+ for (col = 0; col < oprsz; ) {
108
+ case 0:
79
+ uint16_t pcol = pm[H2(col >> 4)];
109
+ target_cpu_state = arm_get_cpu_by_id(mpidr);
80
+ do {
110
+ if (!target_cpu_state) {
81
+ if (prow & pcol & 0b0101) {
111
+ ret = QEMU_PSCI_RET_INVALID_PARAMS;
82
+ uint32_t *a = vza_row + H1_4(col);
112
+ break;
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
113
+ }
92
+ }
114
+ target_cpu = ARM_CPU(target_cpu_state);
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
115
+
112
+
116
+ ret = target_cpu->power_state;
113
+ if (!sme_smza_enabled_check(s)) {
117
+ break;
114
+ return true;
118
+ default:
119
+ /* Everything above affinity level 0 is always on. */
120
+ ret = 0;
121
+ }
122
+ break;
123
+ case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
124
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
125
+ /*
126
+ * QEMU reset and shutdown are async requests, but PSCI
127
+ * mandates that we never return from the reset/shutdown
128
+ * call, so power the CPU off now so it doesn't execute
129
+ * anything further.
130
+ */
131
+ hvf_psci_cpu_off(arm_cpu);
132
+ break;
133
+ case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
134
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
135
+ hvf_psci_cpu_off(arm_cpu);
136
+ break;
137
+ case QEMU_PSCI_0_1_FN_CPU_ON:
138
+ case QEMU_PSCI_0_2_FN_CPU_ON:
139
+ case QEMU_PSCI_0_2_FN64_CPU_ON:
140
+ mpidr = param[1];
141
+ entry = param[2];
142
+ context_id = param[3];
143
+ ret = arm_set_cpu_on(mpidr, entry, context_id,
144
+ target_el, target_aarch64);
145
+ break;
146
+ case QEMU_PSCI_0_1_FN_CPU_OFF:
147
+ case QEMU_PSCI_0_2_FN_CPU_OFF:
148
+ hvf_psci_cpu_off(arm_cpu);
149
+ break;
150
+ case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
151
+ case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
152
+ case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
153
+ /* Affinity levels are not supported in QEMU */
154
+ if (param[1] & 0xfffe0000) {
155
+ ret = QEMU_PSCI_RET_INVALID_PARAMS;
156
+ break;
157
+ }
158
+ /* Powerdown is not supported, we always go into WFI */
159
+ env->xregs[0] = 0;
160
+ hvf_wfi(cpu);
161
+ break;
162
+ case QEMU_PSCI_0_1_FN_MIGRATE:
163
+ case QEMU_PSCI_0_2_FN_MIGRATE:
164
+ ret = QEMU_PSCI_RET_NOT_SUPPORTED;
165
+ break;
166
+ default:
167
+ return false;
168
+ }
115
+ }
169
+
116
+
170
+ env->xregs[0] = ret;
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
171
+ return true;
130
+ return true;
172
+}
131
+}
173
+
132
+
174
static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
175
{
135
{
176
ARMCPU *arm_cpu = ARM_CPU(cpu);
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
177
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
137
178
break;
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
179
case EC_AA64_HVC:
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
180
cpu_synchronize_state(cpu);
181
- trace_hvf_unknown_hvc(env->xregs[0]);
182
- /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
183
- env->xregs[0] = -1;
184
+ if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) {
185
+ if (!hvf_handle_psci_call(cpu)) {
186
+ trace_hvf_unknown_hvc(env->xregs[0]);
187
+ /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
188
+ env->xregs[0] = -1;
189
+ }
190
+ } else {
191
+ trace_hvf_unknown_hvc(env->xregs[0]);
192
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
193
+ }
194
break;
195
case EC_AA64_SMC:
196
cpu_synchronize_state(cpu);
197
- trace_hvf_unknown_smc(env->xregs[0]);
198
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
199
+ if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_SMC) {
200
+ advance_pc = true;
201
+
140
+
202
+ if (!hvf_handle_psci_call(cpu)) {
141
+/* TODO: FEAT_EBF16 */
203
+ trace_hvf_unknown_smc(env->xregs[0]);
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
204
+ /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
205
+ env->xregs[0] = -1;
206
+ }
207
+ } else {
208
+ trace_hvf_unknown_smc(env->xregs[0]);
209
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
210
+ }
211
break;
212
default:
213
cpu_synchronize_state(cpu);
214
diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
215
index XXXXXXX..XXXXXXX 100644
216
--- a/target/arm/hvf/trace-events
217
+++ b/target/arm/hvf/trace-events
218
@@ -XXX,XX +XXX,XX @@ hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_
219
hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
220
hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
221
hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
222
+hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
223
--
143
--
224
2.20.1
144
2.25.1
225
226
diff view generated by jsdifflib
1
Optimize the MVE VSHL and VSHR immediate forms by using TCG vector
1
From: Richard Henderson <richard.henderson@linaro.org>
2
ops when possible.
3
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Message-id: 20210913095440.13462-10-peter.maydell@linaro.org
7
---
7
---
8
target/arm/translate-mve.c | 83 +++++++++++++++++++++++++++++---------
8
target/arm/helper-sme.h | 2 ++
9
1 file changed, 63 insertions(+), 20 deletions(-)
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
10
13
11
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-mve.c
16
--- a/target/arm/helper-sme.h
14
+++ b/target/arm/translate-mve.c
17
+++ b/target/arm/helper-sme.h
15
@@ -XXX,XX +XXX,XX @@ static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
16
return do_1imm(s, a, fn);
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
33
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
41
return pair;
17
}
42
}
18
43
19
-static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
20
- bool negateshift)
45
+ float_status *s_std, float_status *s_odd)
21
+static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
22
+ bool negateshift, GVecGen2iFn vecfn)
23
{
24
TCGv_ptr qd, qm;
25
int shift = a->shift;
26
@@ -XXX,XX +XXX,XX @@ static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
27
shift = -shift;
28
}
29
30
- qd = mve_qreg_ptr(a->qd);
31
- qm = mve_qreg_ptr(a->qm);
32
- fn(cpu_env, qd, qm, tcg_constant_i32(shift));
33
- tcg_temp_free_ptr(qd);
34
- tcg_temp_free_ptr(qm);
35
+ if (vecfn && mve_no_predication(s)) {
36
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
37
+ shift, 16, 16);
38
+ } else {
39
+ qd = mve_qreg_ptr(a->qd);
40
+ qm = mve_qreg_ptr(a->qm);
41
+ fn(cpu_env, qd, qm, tcg_constant_i32(shift));
42
+ tcg_temp_free_ptr(qd);
43
+ tcg_temp_free_ptr(qm);
44
+ }
45
mve_update_eci(s);
46
return true;
47
}
48
49
-#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
50
- static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
51
- { \
52
- static MVEGenTwoOpShiftFn * const fns[] = { \
53
- gen_helper_mve_##FN##b, \
54
- gen_helper_mve_##FN##h, \
55
- gen_helper_mve_##FN##w, \
56
- NULL, \
57
- }; \
58
- return do_2shift(s, a, fns[a->size], NEGATESHIFT); \
59
+static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
60
+ bool negateshift)
61
+{
46
+{
62
+ return do_2shift_vec(s, a, fn, negateshift, NULL);
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
53
+
54
+ /*
55
+ * The ARM pseudocode function FPDot performs both multiplies
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
63
+}
69
+}
64
+
70
+
65
+#define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
66
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
72
+ void *vpm, void *vst, uint32_t desc)
67
+ { \
73
+{
68
+ static MVEGenTwoOpShiftFn * const fns[] = { \
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
69
+ gen_helper_mve_##FN##b, \
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
70
+ gen_helper_mve_##FN##h, \
76
+ uint16_t *pn = vpn, *pm = vpm;
71
+ gen_helper_mve_##FN##w, \
77
+ float_status fpst_odd, fpst_std;
72
+ NULL, \
73
+ }; \
74
+ return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
75
}
76
77
-DO_2SHIFT(VSHLI, vshli_u, false)
78
+#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
79
+ DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
80
+
78
+
81
+static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
82
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
83
+{
84
+ /*
79
+ /*
85
+ * We get here with a negated shift count, and we must handle
80
+ * Make a copy of float_status because this operation does not
86
+ * shifts by the element size, which tcg_gen_gvec_sari() does not do.
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
87
+ */
83
+ */
88
+ shift = -shift;
84
+ fpst_std = *(float_status *)vst;
89
+ if (shift == (8 << vece)) {
85
+ set_default_nan_mode(true, &fpst_std);
90
+ shift--;
86
+ fpst_odd = fpst_std;
91
+ }
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
92
+ tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
93
+}
94
+
88
+
95
+static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
89
+ for (row = 0; row < oprsz; ) {
96
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
90
+ uint16_t prow = pn[H2(row >> 4)];
97
+{
91
+ do {
98
+ /*
92
+ void *vza_row = vza + tile_vslice_offset(row);
99
+ * We get here with a negated shift count, and we must handle
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
100
+ * shifts by the element size, which tcg_gen_gvec_shri() does not do.
94
+
101
+ */
95
+ n = f16mop_adj_pair(n, prow, neg);
102
+ shift = -shift;
96
+
103
+ if (shift == (8 << vece)) {
97
+ for (col = 0; col < oprsz; ) {
104
+ tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
98
+ uint16_t pcol = pm[H2(col >> 4)];
105
+ } else {
99
+ do {
106
+ tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
107
+ }
115
+ }
108
+}
116
+}
109
+
117
+
110
+DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
111
DO_2SHIFT(VQSHLI_S, vqshli_s, false)
119
void *vpm, uint32_t desc)
112
DO_2SHIFT(VQSHLI_U, vqshli_u, false)
120
{
113
DO_2SHIFT(VQSHLUI, vqshlui_s, false)
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
114
/* These right shifts use a left-shift helper with negated shift count */
122
index XXXXXXX..XXXXXXX 100644
115
-DO_2SHIFT(VSHRI_S, vshli_s, true)
123
--- a/target/arm/translate-sme.c
116
-DO_2SHIFT(VSHRI_U, vshli_u, true)
124
+++ b/target/arm/translate-sme.c
117
+DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
118
+DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
126
return true;
119
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
127
}
120
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
128
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
121
132
122
--
133
--
123
2.20.1
134
2.25.1
124
125
diff view generated by jsdifflib
1
Architecturally, for an M-profile CPU with the LOB feature the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
LTPSIZE field in FPDSCR is always constant 4. QEMU's implementation
3
enforces this everywhere, except that we don't check that it is true
4
in incoming migration data.
5
2
6
We're going to add come in gen_update_fp_context() which relies on
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
7
the "always 4" property. Since this is TCG-only, we don't actually
8
need to be robust to bogus incoming migration data, and the effect of
9
it being wrong would be wrong code generation rather than a QEMU
10
crash; but if it did ever happen somehow it would be very difficult
11
to track down the cause. Add a check so that we fail the inbound
12
migration if the FPDSCR.LTPSIZE value is incorrect.
13
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Message-id: 20210913095440.13462-3-peter.maydell@linaro.org
17
---
9
---
18
target/arm/machine.c | 13 +++++++++++++
10
target/arm/helper-sme.h | 16 ++++++++
19
1 file changed, 13 insertions(+)
11
target/arm/sme.decode | 10 +++++
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
20
15
21
diff --git a/target/arm/machine.c b/target/arm/machine.c
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
22
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/machine.c
18
--- a/target/arm/helper-sme.h
24
+++ b/target/arm/machine.c
19
+++ b/target/arm/helper-sme.h
25
@@ -XXX,XX +XXX,XX @@ static int cpu_post_load(void *opaque, int version_id)
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
hw_breakpoint_update_all(cpu);
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
hw_watchpoint_update_all(cpu);
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
28
23
void, ptr, ptr, ptr, ptr, ptr, i32)
29
+ /*
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
30
+ * TCG gen_update_fp_context() relies on the invariant that
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
31
+ * FPDSCR.LTPSIZE is constant 4 for M-profile with the LOB extension;
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
32
+ * forbid bogus incoming data with some other value.
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
33
+ */
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
34
+ if (arm_feature(env, ARM_FEATURE_M) && cpu_isar_feature(aa32_lob, cpu)) {
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
35
+ if (extract32(env->v7m.fpdscr[M_REG_NS],
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
36
+ FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4 ||
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
37
+ extract32(env->v7m.fpdscr[M_REG_S],
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
38
+ FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4) {
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
39
+ return -1;
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
48
+
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
53
+
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/sme_helper.c
61
+++ b/target/arm/sme_helper.c
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
63
} while (row & 15);
64
}
65
}
66
+
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
68
+
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
72
+{
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
75
+
76
+ for (row = 0; row < oprsz; ++row) {
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
80
+
81
+ for (col = 0; col < oprsz; ++col) {
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
40
+ }
86
+ }
41
+ }
87
+ }
42
if (!kvm_enabled()) {
88
+}
43
pmu_op_finish(&cpu->env);
89
+
44
}
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
92
+{ \
93
+ uint32_t sum0 = 0, sum1 = 0; \
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
95
+ n &= expand_pred_b(p); \
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
110
+}
111
+
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
114
+{ \
115
+ uint64_t sum = 0; \
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
117
+ n &= expand_pred_h(p); \
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
123
+}
124
+
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
129
+
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
134
+
135
+#define DEF_IMOPH(NAME) \
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
137
+ void *vpm, uint32_t desc) \
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
139
+
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
45
--
166
--
46
2.20.1
167
2.25.1
47
48
diff view generated by jsdifflib
1
From: Peter Collingbourne <pcc@google.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Sleep on WFI until the VTIMER is due but allow ourselves to be woken
3
This is an SVE instruction that operates using the SVE vector
4
up on IPI.
4
length but that it is present only if SME is implemented.
5
5
6
In this implementation IPI is blocked on the CPU thread at startup and
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
pselect() is used to atomically unblock the signal and begin sleeping.
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
The signal is sent unconditionally so there's no need to worry about
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
9
races between actually sleeping and the "we think we're sleeping"
10
state. It may lead to an extra wakeup but that's better than missing
11
it entirely.
12
13
Signed-off-by: Peter Collingbourne <pcc@google.com>
14
Signed-off-by: Alexander Graf <agraf@csgraf.de>
15
Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
16
Reviewed-by: Sergio Lopez <slp@redhat.com>
17
Message-id: 20210916155404.86958-6-agraf@csgraf.de
18
[agraf: Remove unused 'set' variable, always advance PC on WFX trap,
19
support vm stop / continue operations and cntv offsets]
20
Signed-off-by: Alexander Graf <agraf@csgraf.de>
21
Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
22
Reviewed-by: Sergio Lopez <slp@redhat.com>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
---
10
---
25
include/sysemu/hvf_int.h | 1 +
11
target/arm/sve.decode | 20 +++++++++++++
26
accel/hvf/hvf-accel-ops.c | 5 +--
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
27
target/arm/hvf/hvf.c | 79 +++++++++++++++++++++++++++++++++++++++
13
2 files changed, 77 insertions(+)
28
3 files changed, 82 insertions(+), 3 deletions(-)
29
14
30
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
31
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
32
--- a/include/sysemu/hvf_int.h
17
--- a/target/arm/sve.decode
33
+++ b/include/sysemu/hvf_int.h
18
+++ b/target/arm/sve.decode
34
@@ -XXX,XX +XXX,XX @@ struct hvf_vcpu_state {
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
35
uint64_t fd;
20
36
void *exit;
21
### SVE2 floating-point bfloat16 dot-product (indexed)
37
bool vtimer_masked;
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
38
+ sigset_t unblock_ipi_mask;
23
+
39
};
24
+### SVE broadcast predicate element
40
25
+
41
void assert_hvf_ok(hv_return_t ret);
26
+&psel esz pd pn pm rv imm
42
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
27
+%psel_rv 16:2 !function=plus_12
28
+%psel_imm_b 22:2 19:2
29
+%psel_imm_h 22:2 20:1
30
+%psel_imm_s 22:2
31
+%psel_imm_d 23:1
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
43
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
44
--- a/accel/hvf/hvf-accel-ops.c
45
--- a/target/arm/translate-sve.c
45
+++ b/accel/hvf/hvf-accel-ops.c
46
+++ b/target/arm/translate-sve.c
46
@@ -XXX,XX +XXX,XX @@ static int hvf_init_vcpu(CPUState *cpu)
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
47
cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
48
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
49
/* init cpu signals */
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
50
- sigset_t set;
51
+
51
struct sigaction sigact;
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
52
53
memset(&sigact, 0, sizeof(sigact));
54
sigact.sa_handler = dummy_signal;
55
sigaction(SIG_IPI, &sigact, NULL);
56
57
- pthread_sigmask(SIG_BLOCK, NULL, &set);
58
- sigdelset(&set, SIG_IPI);
59
+ pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
60
+ sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
61
62
#ifdef __aarch64__
63
r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
64
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/arm/hvf/hvf.c
67
+++ b/target/arm/hvf/hvf.c
68
@@ -XXX,XX +XXX,XX @@
69
* QEMU Hypervisor.framework support for Apple Silicon
70
71
* Copyright 2020 Alexander Graf <agraf@csgraf.de>
72
+ * Copyright 2020 Google LLC
73
*
74
* This work is licensed under the terms of the GNU GPL, version 2 or later.
75
* See the COPYING file in the top-level directory.
76
@@ -XXX,XX +XXX,XX @@ int hvf_arch_init_vcpu(CPUState *cpu)
77
78
void hvf_kick_vcpu_thread(CPUState *cpu)
79
{
80
+ cpus_kick_thread(cpu);
81
hv_vcpus_exit(&cpu->hvf->fd, 1);
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint64_t hvf_vtimer_val_raw(void)
85
return mach_absolute_time() - hvf_state->vtimer_offset;
86
}
87
88
+static uint64_t hvf_vtimer_val(void)
89
+{
53
+{
90
+ if (!runstate_is_running()) {
54
+ int vl = vec_full_reg_size(s);
91
+ /* VM is paused, the vtimer value is in vtimer.vtimer_val */
55
+ int pl = pred_gvec_reg_size(s);
92
+ return vtimer.vtimer_val;
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
93
+ }
65
+ }
94
+
66
+
95
+ return hvf_vtimer_val_raw();
67
+ tmp = tcg_temp_new_i64();
96
+}
68
+ dbit = tcg_temp_new_i64();
69
+ didx = tcg_temp_new_i64();
70
+ ptr = tcg_temp_new_ptr();
97
+
71
+
98
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
72
+ /* Compute the predicate element. */
99
+{
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
100
+ /*
74
+ if (is_power_of_2(elements)) {
101
+ * Use pselect to sleep so that other threads can IPI us while we're
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
102
+ * sleeping.
76
+ } else {
103
+ */
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
104
+ qatomic_mb_set(&cpu->thread_kicked, false);
105
+ qemu_mutex_unlock_iothread();
106
+ pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
107
+ qemu_mutex_lock_iothread();
108
+}
109
+
110
+static void hvf_wfi(CPUState *cpu)
111
+{
112
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
113
+ struct timespec ts;
114
+ hv_return_t r;
115
+ uint64_t ctl;
116
+ uint64_t cval;
117
+ int64_t ticks_to_sleep;
118
+ uint64_t seconds;
119
+ uint64_t nanos;
120
+ uint32_t cntfrq;
121
+
122
+ if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) {
123
+ /* Interrupt pending, no need to wait */
124
+ return;
125
+ }
78
+ }
126
+
79
+
127
+ r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
80
+ /* Extract the predicate byte and bit indices. */
128
+ assert_hvf_ok(r);
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
129
+
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
130
+ if (!(ctl & 1) || (ctl & 2)) {
83
+ tcg_gen_shri_i64(didx, tmp, 3);
131
+ /* Timer disabled or masked, just wait for an IPI. */
84
+ if (HOST_BIG_ENDIAN) {
132
+ hvf_wait_for_ipi(cpu, NULL);
85
+ tcg_gen_xori_i64(didx, didx, 7);
133
+ return;
134
+ }
86
+ }
135
+
87
+
136
+ r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
88
+ /* Load the predicate word. */
137
+ assert_hvf_ok(r);
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
138
+
92
+
139
+ ticks_to_sleep = cval - hvf_vtimer_val();
93
+ /* Extract the predicate bit and replicate to MO_64. */
140
+ if (ticks_to_sleep < 0) {
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
141
+ return;
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
142
+ }
96
+ tcg_gen_neg_i64(tmp, tmp);
143
+
97
+
144
+ cntfrq = gt_cntfrq_period_ns(arm_cpu);
98
+ /* Apply to either copy the source, or write zeros. */
145
+ seconds = muldiv64(ticks_to_sleep, cntfrq, NANOSECONDS_PER_SECOND);
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
146
+ ticks_to_sleep -= muldiv64(seconds, NANOSECONDS_PER_SECOND, cntfrq);
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
147
+ nanos = ticks_to_sleep * cntfrq;
148
+
101
+
149
+ /*
102
+ tcg_temp_free_i64(tmp);
150
+ * Don't sleep for less than the time a context switch would take,
103
+ tcg_temp_free_i64(dbit);
151
+ * so that we can satisfy fast timer requests on the same CPU.
104
+ tcg_temp_free_i64(didx);
152
+ * Measurements on M1 show the sweet spot to be ~2ms.
105
+ tcg_temp_free_ptr(ptr);
153
+ */
106
+ return true;
154
+ if (!seconds && nanos < (2 * SCALE_MS)) {
155
+ return;
156
+ }
157
+
158
+ ts = (struct timespec) { seconds, nanos };
159
+ hvf_wait_for_ipi(cpu, &ts);
160
+}
107
+}
161
+
162
static void hvf_sync_vtimer(CPUState *cpu)
163
{
164
ARMCPU *arm_cpu = ARM_CPU(cpu);
165
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
166
}
167
case EC_WFX_TRAP:
168
advance_pc = true;
169
+ if (!(syndrome & WFX_IS_WFE)) {
170
+ hvf_wfi(cpu);
171
+ }
172
break;
173
case EC_AA64_HVC:
174
cpu_synchronize_state(cpu);
175
--
108
--
176
2.20.1
109
2.25.1
177
178
diff view generated by jsdifflib
1
Coverity points out that we aren't checking the return value
1
From: Richard Henderson <richard.henderson@linaro.org>
2
from curl_easy_setopt().
3
2
4
Fixes: Coverity CID 1458895
3
This is an SVE instruction that operates using the SVE vector
5
Inspired-by: Peter Maydell <peter.maydell@linaro.org>
4
length but that it is present only if SME is implemented.
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
7
Reviewed-by: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Tested-by: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210910170656.366592-2-philmd@redhat.com
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
contrib/elf2dmp/download.c | 22 ++++++++++------------
11
target/arm/helper-sve.h | 2 ++
13
1 file changed, 10 insertions(+), 12 deletions(-)
12
target/arm/sve.decode | 1 +
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
14
16
15
diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/contrib/elf2dmp/download.c
19
--- a/target/arm/helper-sve.h
18
+++ b/contrib/elf2dmp/download.c
20
+++ b/target/arm/helper-sve.h
19
@@ -XXX,XX +XXX,XX @@ int download_url(const char *name, const char *url)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
20
goto out_curl;
22
21
}
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
24
23
- curl_easy_setopt(curl, CURLOPT_URL, url);
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
26
+
25
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
-
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
29
- if (curl_easy_perform(curl) != CURLE_OK) {
31
index XXXXXXX..XXXXXXX 100644
30
- err = 1;
32
--- a/target/arm/sve.decode
31
- fclose(file);
33
+++ b/target/arm/sve.decode
32
+ if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
33
+ || curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
34
+ || curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) != CURLE_OK
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
35
+ || curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
36
+ || curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
37
+ || curl_easy_perform(curl) != CURLE_OK) {
39
38
unlink(name);
40
# SVE vector splice (predicated, destructive)
39
- goto out_curl;
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
40
+ fclose(file);
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
41
+ err = 1;
43
index XXXXXXX..XXXXXXX 100644
42
+ } else {
44
--- a/target/arm/sve_helper.c
43
+ err = fclose(file);
45
+++ b/target/arm/sve_helper.c
44
}
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
45
47
46
- err = fclose(file);
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
47
-
49
48
out_curl:
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
49
curl_easy_cleanup(curl);
51
+{
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
53
+ uint64_t *d = vd, *n = vn;
54
+ uint8_t *pg = vg;
55
+
56
+ for (i = 0; i < opr_sz; i += 2) {
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
63
+ }
64
+}
65
+
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
76
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
78
+
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
80
gen_helper_sve_splice, a, a->esz)
50
81
51
--
82
--
52
2.20.1
83
2.25.1
53
54
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper.h | 18 +++++++
12
target/arm/sve.decode | 5 ++
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
66
}
67
+
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
69
+{
70
+ tcg_gen_smax_i32(d, a, n);
71
+ tcg_gen_smin_i32(d, d, m);
72
+}
73
+
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
75
+{
76
+ tcg_gen_smax_i64(d, a, n);
77
+ tcg_gen_smin_i64(d, d, m);
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
201
--
202
2.25.1
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We will need PMC register definitions in accel specific code later.
3
We can handle both exception entry and exception return by
4
Move all constant definitions to common arm headers so we can reuse
4
hooking into aarch64_sve_change_el.
5
them.
6
5
7
Signed-off-by: Alexander Graf <agraf@csgraf.de>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-id: 20210916155404.86958-2-agraf@csgraf.de
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/internals.h | 44 ++++++++++++++++++++++++++++++++++++++++++
11
target/arm/helper.c | 15 +++++++++++++--
13
target/arm/helper.c | 44 ------------------------------------------
12
1 file changed, 13 insertions(+), 2 deletions(-)
14
2 files changed, 44 insertions(+), 44 deletions(-)
15
13
16
diff --git a/target/arm/internals.h b/target/arm/internals.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/internals.h
19
+++ b/target/arm/internals.h
20
@@ -XXX,XX +XXX,XX @@ enum MVEECIState {
21
/* All other values reserved */
22
};
23
24
+/* Definitions for the PMU registers */
25
+#define PMCRN_MASK 0xf800
26
+#define PMCRN_SHIFT 11
27
+#define PMCRLC 0x40
28
+#define PMCRDP 0x20
29
+#define PMCRX 0x10
30
+#define PMCRD 0x8
31
+#define PMCRC 0x4
32
+#define PMCRP 0x2
33
+#define PMCRE 0x1
34
+/*
35
+ * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
36
+ * which can be written as 1 to trigger behaviour but which stay RAZ).
37
+ */
38
+#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
39
+
40
+#define PMXEVTYPER_P 0x80000000
41
+#define PMXEVTYPER_U 0x40000000
42
+#define PMXEVTYPER_NSK 0x20000000
43
+#define PMXEVTYPER_NSU 0x10000000
44
+#define PMXEVTYPER_NSH 0x08000000
45
+#define PMXEVTYPER_M 0x04000000
46
+#define PMXEVTYPER_MT 0x02000000
47
+#define PMXEVTYPER_EVTCOUNT 0x0000ffff
48
+#define PMXEVTYPER_MASK (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
49
+ PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
50
+ PMXEVTYPER_M | PMXEVTYPER_MT | \
51
+ PMXEVTYPER_EVTCOUNT)
52
+
53
+#define PMCCFILTR 0xf8000000
54
+#define PMCCFILTR_M PMXEVTYPER_M
55
+#define PMCCFILTR_EL0 (PMCCFILTR | PMCCFILTR_M)
56
+
57
+static inline uint32_t pmu_num_counters(CPUARMState *env)
58
+{
59
+ return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
60
+}
61
+
62
+/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
63
+static inline uint64_t pmu_counter_mask(CPUARMState *env)
64
+{
65
+ return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
66
+}
67
+
68
#endif
69
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
70
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/helper.c
16
--- a/target/arm/helper.c
72
+++ b/target/arm/helper.c
17
+++ b/target/arm/helper.c
73
@@ -XXX,XX +XXX,XX @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
74
REGINFO_SENTINEL
19
return;
75
};
20
}
76
21
77
-/* Definitions for the PMU registers */
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
78
-#define PMCRN_MASK 0xf800
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
79
-#define PMCRN_SHIFT 11
24
+
80
-#define PMCRLC 0x40
25
+ /*
81
-#define PMCRDP 0x20
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
82
-#define PMCRX 0x10
27
+ * invoke ResetSVEState when taking an exception from, or
83
-#define PMCRD 0x8
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
84
-#define PMCRC 0x4
29
+ */
85
-#define PMCRP 0x2
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
86
-#define PMCRE 0x1
31
+ arm_reset_sve_state(env);
87
-/*
32
+ return;
88
- * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
33
+ }
89
- * which can be written as 1 to trigger behaviour but which stay RAZ).
34
+
90
- */
35
/*
91
-#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
92
-
37
* at ELx, or not available because the EL is in AArch32 state, then
93
-#define PMXEVTYPER_P 0x80000000
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
94
-#define PMXEVTYPER_U 0x40000000
39
* we already have the correct register contents when encountering the
95
-#define PMXEVTYPER_NSK 0x20000000
40
* vq0->vq0 transition between EL0->EL1.
96
-#define PMXEVTYPER_NSU 0x10000000
41
*/
97
-#define PMXEVTYPER_NSH 0x08000000
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
98
-#define PMXEVTYPER_M 0x04000000
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
99
-#define PMXEVTYPER_MT 0x02000000
44
? sve_vqm1_for_el(env, old_el) : 0);
100
-#define PMXEVTYPER_EVTCOUNT 0x0000ffff
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
101
-#define PMXEVTYPER_MASK (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
102
- PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
47
? sve_vqm1_for_el(env, new_el) : 0);
103
- PMXEVTYPER_M | PMXEVTYPER_MT | \
48
104
- PMXEVTYPER_EVTCOUNT)
105
-
106
-#define PMCCFILTR 0xf8000000
107
-#define PMCCFILTR_M PMXEVTYPER_M
108
-#define PMCCFILTR_EL0 (PMCCFILTR | PMCCFILTR_M)
109
-
110
-static inline uint32_t pmu_num_counters(CPUARMState *env)
111
-{
112
- return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
113
-}
114
-
115
-/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
116
-static inline uint64_t pmu_counter_mask(CPUARMState *env)
117
-{
118
- return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
119
-}
120
-
121
typedef struct pm_event {
122
uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */
123
/* If the event is supported on this CPU (used to generate PMCEID[01]) */
124
--
49
--
125
2.20.1
50
2.25.1
126
127
diff view generated by jsdifflib
1
Currently gen_jmp_tb() assumes that if it is called then the jump it
1
From: Richard Henderson <richard.henderson@linaro.org>
2
is handling is the only reason that we might be trying to end the TB,
3
so it will use goto_tb if it can. This is usually the case: mostly
4
"we did something that means we must end the TB" happens on a
5
non-branch instruction. However, there are cases where we decide
6
early in handling an instruction that we need to end the TB and
7
return to the main loop, and then the insn is a complex one that
8
involves gen_jmp_tb(). For instance, for M-profile FP instructions,
9
in gen_preserve_fp_state() which is called from vfp_access_check() we
10
want to force an exit to the main loop if lazy state preservation is
11
active and we are in icount mode.
12
2
13
Make gen_jmp_tb() look at the current value of is_jmp, and only use
3
Note that SME remains effectively disabled for user-only,
14
goto_tb if the previous is_jmp was DISAS_NEXT or DISAS_TOO_MANY.
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
15
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Message-id: 20210913095440.13462-2-peter.maydell@linaro.org
19
---
11
---
20
target/arm/translate.c | 34 +++++++++++++++++++++++++++++++++-
12
docs/system/arm/emulation.rst | 4 ++++
21
1 file changed, 33 insertions(+), 1 deletion(-)
13
target/arm/cpu64.c | 11 +++++++++++
14
2 files changed, 15 insertions(+)
22
15
23
diff --git a/target/arm/translate.c b/target/arm/translate.c
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
24
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/translate.c
18
--- a/docs/system/arm/emulation.rst
26
+++ b/target/arm/translate.c
19
+++ b/docs/system/arm/emulation.rst
27
@@ -XXX,XX +XXX,XX @@ static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
28
/* An indirect jump so that we still trigger the debug exception. */
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
29
gen_set_pc_im(s, dest);
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
30
s->base.is_jmp = DISAS_JUMP;
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
31
- } else {
24
+- FEAT_SME (Scalable Matrix Extension)
32
+ return;
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
33
+ }
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
34
+ switch (s->base.is_jmp) {
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
35
+ case DISAS_NEXT:
28
- FEAT_SPECRES (Speculation restriction instructions)
36
+ case DISAS_TOO_MANY:
29
- FEAT_SSBS (Speculative Store Bypass Safe)
37
+ case DISAS_NORETURN:
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
38
+ /*
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
39
+ * The normal case: just go to the destination TB.
32
index XXXXXXX..XXXXXXX 100644
40
+ * NB: NORETURN happens if we generate code like
33
--- a/target/arm/cpu64.c
41
+ * gen_brcondi(l);
34
+++ b/target/arm/cpu64.c
42
+ * gen_jmp();
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
43
+ * gen_set_label(l);
36
*/
44
+ * gen_jmp();
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
45
+ * on the second call to gen_jmp().
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
46
+ */
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
47
gen_goto_tb(s, tbno, dest);
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
48
+ break;
41
cpu->isar.id_aa64pfr1 = t;
49
+ case DISAS_UPDATE_NOCHAIN:
42
50
+ case DISAS_UPDATE_EXIT:
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
51
+ /*
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
52
+ * We already decided we're leaving the TB for some other reason.
45
cpu->isar.id_aa64dfr0 = t;
53
+ * Avoid using goto_tb so we really do exit back to the main loop
46
54
+ * and don't chain to another TB.
47
+ t = cpu->isar.id_aa64smfr0;
55
+ */
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
56
+ gen_set_pc_im(s, dest);
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
57
+ gen_goto_ptr();
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
58
+ s->base.is_jmp = DISAS_NORETURN;
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
59
+ break;
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
60
+ default:
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
61
+ /*
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
62
+ * We shouldn't be emitting code for a jump and also have
55
+ cpu->isar.id_aa64smfr0 = t;
63
+ * is_jmp set to one of the special cases like DISAS_SWI.
56
+
64
+ */
57
/* Replicate the same data to the 32-bit id registers. */
65
+ g_assert_not_reached();
58
aa32_max_features(cpu);
66
}
67
}
68
59
69
--
60
--
70
2.20.1
61
2.25.1
71
72
diff view generated by jsdifflib
1
Optimize the MVE VMVN insn by using TCG vector ops when possible.
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
3
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210913095440.13462-9-peter.maydell@linaro.org
6
---
7
---
7
target/arm/translate-mve.c | 2 +-
8
linux-user/aarch64/target_cpu.h | 5 ++++-
8
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 4 insertions(+), 1 deletion(-)
9
10
10
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate-mve.c
13
--- a/linux-user/aarch64/target_cpu.h
13
+++ b/target/arm/translate-mve.c
14
+++ b/linux-user/aarch64/target_cpu.h
14
@@ -XXX,XX +XXX,XX @@ static bool trans_VREV64(DisasContext *s, arg_1op *a)
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
15
16
16
static bool trans_VMVN(DisasContext *s, arg_1op *a)
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
17
{
18
{
18
- return do_1op(s, a, gen_helper_mve_vmvn);
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
19
+ return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
20
+ /*
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
* different from AArch32 Linux, which uses TPIDRRO.
23
*/
24
env->cp15.tpidr_el[0] = newtls;
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
26
+ env->cp15.tpidr2_el0 = 0;
20
}
27
}
21
28
22
static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
23
--
30
--
24
2.20.1
31
2.25.1
25
26
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
9
1 file changed, 9 insertions(+)
10
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/cpu_loop.c
14
+++ b/linux-user/aarch64/cpu_loop.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
16
17
switch (trapnr) {
18
case EXCP_SWI:
19
+ /*
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
22
+ */
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
25
+ arm_rebuild_hflags(env);
26
+ arm_reset_sve_state(env);
27
+ }
28
ret = do_syscall(env,
29
env->xregs[8],
30
env->xregs[0],
31
--
32
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Make sure to zero the currently reserved fields.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
linux-user/aarch64/signal.c | 9 ++++++++-
11
1 file changed, 8 insertions(+), 1 deletion(-)
12
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/linux-user/aarch64/signal.c
16
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
18
struct target_sve_context {
19
struct target_aarch64_ctx head;
20
uint16_t vl;
21
- uint16_t reserved[3];
22
+ uint16_t flags;
23
+ uint16_t reserved[2];
24
/* The actual SVE data immediately follows. It is laid out
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
26
* the original struct pointer.
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
30
31
+#define TARGET_SVE_SIG_FLAG_SM 1
32
+
33
struct target_rt_sigframe {
34
struct target_siginfo info;
35
struct target_ucontext uc;
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
37
{
38
int i, j;
39
40
+ memset(sve, 0, sizeof(*sve));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
42
__put_user(size, &sve->head.size);
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
46
+ }
47
48
/* Note that SVE regs are stored as a byte stream, with each byte element
49
* at a subsequent address. This corresponds to a little-endian store
50
--
51
2.25.1
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
During sbsa acs level 3 testing, it is seen that the GIC maintenance
3
Fold the return value setting into the goto, so each
4
interrupts are not triggered and the related test cases fail. This
4
point of failure need not do both.
5
is because we were incorrectly passing the value of the MISR register
6
(from maintenance_interrupt_state()) to qemu_set_irq() as the level
7
argument, whereas the device on the other end of this irq line
8
expects a 0/1 value.
9
5
10
Fix the logic to pass a 0/1 level indication, rather than a
11
0/not-0 value.
12
13
Fixes: c5fc89b36c0 ("hw/intc/arm_gicv3: Implement gicv3_cpuif_virt_update()")
14
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
16
Message-id: 20210915205809.59068-1-shashi.mallela@linaro.org
17
[PMM: tweaked commit message; collapsed nested if()s into one]
18
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
---
10
---
21
hw/intc/arm_gicv3_cpuif.c | 5 +++--
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
22
1 file changed, 3 insertions(+), 2 deletions(-)
12
1 file changed, 11 insertions(+), 15 deletions(-)
23
13
24
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
25
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/intc/arm_gicv3_cpuif.c
16
--- a/linux-user/aarch64/signal.c
27
+++ b/hw/intc/arm_gicv3_cpuif.c
17
+++ b/linux-user/aarch64/signal.c
28
@@ -XXX,XX +XXX,XX @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
19
struct target_sve_context *sve = NULL;
20
uint64_t extra_datap = 0;
21
bool used_extra = false;
22
- bool err = false;
23
int vq = 0, sve_size = 0;
24
25
target_restore_general_frame(env, sf);
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
27
switch (magic) {
28
case 0:
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
29
}
69
}
70
ctx = (void *)ctx + size;
30
}
71
}
31
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
32
- if (cs->ich_hcr_el2 & ICH_HCR_EL2_EN) {
73
if (fpsimd) {
33
- maintlevel = maintenance_interrupt_state(cs);
74
target_restore_fpsimd_record(env, fpsimd);
34
+ if ((cs->ich_hcr_el2 & ICH_HCR_EL2_EN) &&
75
} else {
35
+ maintenance_interrupt_state(cs) != 0) {
76
- err = true;
36
+ maintlevel = 1;
77
+ goto err;
37
}
78
}
38
79
39
trace_gicv3_cpuif_virt_set_irqs(gicv3_redist_affid(cs), fiqlevel,
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
84
-
85
- exit:
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
89
+
90
+ err:
91
+ unlock_user(extra, extra_datap, 0);
92
+ return 1;
93
}
94
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
40
--
96
--
41
2.20.1
97
2.25.1
42
43
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
or records that are smaller than the header. We were silently
5
allowing these cases to pass, dropping the record.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
linux-user/aarch64/signal.c | 5 ++++-
13
1 file changed, 4 insertions(+), 1 deletion(-)
14
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/signal.c
18
+++ b/linux-user/aarch64/signal.c
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
20
break;
21
22
case TARGET_SVE_MAGIC:
23
+ if (sve || size < sizeof(struct target_sve_context)) {
24
+ goto err;
25
+ }
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
27
vq = sve_vq(env);
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
29
- if (!sve && size == sve_size) {
30
+ if (size == sve_size) {
31
sve = (struct target_sve_context *)ctx;
32
break;
33
}
34
--
35
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/signal.c | 3 +++
9
1 file changed, 3 insertions(+)
10
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/signal.c
14
+++ b/linux-user/aarch64/signal.c
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
16
__get_user(extra_size,
17
&((struct target_extra_context *)ctx)->size);
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
19
+ if (!extra) {
20
+ return 1;
21
+ }
22
break;
23
24
default:
25
--
26
2.25.1
diff view generated by jsdifflib
1
Now that we have working system register sync, we push more target CPU
1
From: Richard Henderson <richard.henderson@linaro.org>
2
properties into the virtual machine. That might be useful in some
3
situations, but is not the typical case that users want.
4
2
5
So let's add a -cpu host option that allows them to explicitly pass all
3
Move the checks out of the parsing loop and into the
6
CPU capabilities of their host CPU into the guest.
4
restore function. This more closely mirrors the code
5
structure in the kernel, and is slightly clearer.
7
6
8
Signed-off-by: Alexander Graf <agraf@csgraf.de>
7
Reject rather than silently skip incorrect VL and SVE record sizes,
9
Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
8
bringing our checks in to line with those the kernel does.
10
Reviewed-by: Sergio Lopez <slp@redhat.com>
9
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Message-id: 20210916155404.86958-7-agraf@csgraf.de
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
[PMM: drop unnecessary #include line from .h file]
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
14
---
16
target/arm/cpu.h | 2 +
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
17
target/arm/hvf_arm.h | 18 +++++++++
16
1 file changed, 35 insertions(+), 16 deletions(-)
18
target/arm/kvm_arm.h | 2 -
19
target/arm/cpu.c | 13 ++++--
20
target/arm/hvf/hvf.c | 95 ++++++++++++++++++++++++++++++++++++++++++++
21
5 files changed, 124 insertions(+), 6 deletions(-)
22
create mode 100644 target/arm/hvf_arm.h
23
17
24
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
25
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/cpu.h
20
--- a/linux-user/aarch64/signal.c
27
+++ b/target/arm/cpu.h
21
+++ b/linux-user/aarch64/signal.c
28
@@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
29
#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
23
}
30
#define CPU_RESOLVING_TYPE TYPE_ARM_CPU
31
32
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
33
+
34
#define cpu_signal_handler cpu_arm_signal_handler
35
#define cpu_list arm_cpu_list
36
37
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
38
new file mode 100644
39
index XXXXXXX..XXXXXXX
40
--- /dev/null
41
+++ b/target/arm/hvf_arm.h
42
@@ -XXX,XX +XXX,XX @@
43
+/*
44
+ * QEMU Hypervisor.framework (HVF) support -- ARM specifics
45
+ *
46
+ * Copyright (c) 2021 Alexander Graf
47
+ *
48
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
49
+ * See the COPYING file in the top-level directory.
50
+ *
51
+ */
52
+
53
+#ifndef QEMU_HVF_ARM_H
54
+#define QEMU_HVF_ARM_H
55
+
56
+#include "cpu.h"
57
+
58
+void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu);
59
+
60
+#endif
61
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/kvm_arm.h
64
+++ b/target/arm/kvm_arm.h
65
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
66
*/
67
void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
68
69
-#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
70
-
71
/**
72
* ARMHostCPUFeatures: information about the host CPU (identified
73
* by asking the host kernel)
74
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/cpu.c
77
+++ b/target/arm/cpu.c
78
@@ -XXX,XX +XXX,XX @@
79
#include "sysemu/tcg.h"
80
#include "sysemu/hw_accel.h"
81
#include "kvm_arm.h"
82
+#include "hvf_arm.h"
83
#include "disas/capstone.h"
84
#include "fpu/softfloat.h"
85
86
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
87
* this is the first point where we can report it.
88
*/
89
if (cpu->host_cpu_probe_failed) {
90
- if (!kvm_enabled()) {
91
- error_setg(errp, "The 'host' CPU type can only be used with KVM");
92
+ if (!kvm_enabled() && !hvf_enabled()) {
93
+ error_setg(errp, "The 'host' CPU type can only be used with KVM or HVF");
94
} else {
95
error_setg(errp, "Failed to retrieve host CPU features");
96
}
97
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
98
#endif /* CONFIG_TCG */
99
}
24
}
100
25
101
-#ifdef CONFIG_KVM
26
-static void target_restore_sve_record(CPUARMState *env,
102
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
27
- struct target_sve_context *sve, int vq)
103
static void arm_host_initfn(Object *obj)
28
+static bool target_restore_sve_record(CPUARMState *env,
29
+ struct target_sve_context *sve,
30
+ int size)
104
{
31
{
105
ARMCPU *cpu = ARM_CPU(obj);
32
- int i, j;
106
33
+ int i, j, vl, vq;
107
+#ifdef CONFIG_KVM
34
108
kvm_arm_set_cpu_features_from_host(cpu);
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
109
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
aarch64_add_sve_properties(obj);
111
}
112
+#else
113
+ hvf_arm_set_cpu_features_from_host(cpu);
114
+#endif
115
arm_cpu_post_init(obj);
116
}
117
118
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_register_types(void)
119
{
120
type_register_static(&arm_cpu_type_info);
121
122
-#ifdef CONFIG_KVM
123
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
124
type_register_static(&host_arm_cpu_type_info);
125
#endif
126
}
127
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/target/arm/hvf/hvf.c
130
+++ b/target/arm/hvf/hvf.c
131
@@ -XXX,XX +XXX,XX @@
132
#include "sysemu/hvf.h"
133
#include "sysemu/hvf_int.h"
134
#include "sysemu/hw_accel.h"
135
+#include "hvf_arm.h"
136
137
#include <mach/mach_time.h>
138
139
@@ -XXX,XX +XXX,XX @@ typedef struct HVFVTimer {
140
141
static HVFVTimer vtimer;
142
143
+typedef struct ARMHostCPUFeatures {
144
+ ARMISARegisters isar;
145
+ uint64_t features;
146
+ uint64_t midr;
147
+ uint32_t reset_sctlr;
148
+ const char *dtb_compatible;
149
+} ARMHostCPUFeatures;
150
+
151
+static ARMHostCPUFeatures arm_host_cpu_features;
152
+
153
struct hvf_reg_match {
154
int reg;
155
uint64_t offset;
156
@@ -XXX,XX +XXX,XX @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt)
157
return val;
158
}
159
160
+static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
161
+{
162
+ ARMISARegisters host_isar = {};
163
+ const struct isar_regs {
164
+ int reg;
165
+ uint64_t *val;
166
+ } regs[] = {
167
+ { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 },
168
+ { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 },
169
+ { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 },
170
+ { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 },
171
+ { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 },
172
+ { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 },
173
+ { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
174
+ { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
175
+ { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
176
+ };
177
+ hv_vcpu_t fd;
178
+ hv_return_t r = HV_SUCCESS;
179
+ hv_vcpu_exit_t *exit;
180
+ int i;
181
+
182
+ ahcf->dtb_compatible = "arm,arm-v8";
183
+ ahcf->features = (1ULL << ARM_FEATURE_V8) |
184
+ (1ULL << ARM_FEATURE_NEON) |
185
+ (1ULL << ARM_FEATURE_AARCH64) |
186
+ (1ULL << ARM_FEATURE_PMU) |
187
+ (1ULL << ARM_FEATURE_GENERIC_TIMER);
188
+
189
+ /* We set up a small vcpu to extract host registers */
190
+
191
+ if (hv_vcpu_create(&fd, &exit, NULL) != HV_SUCCESS) {
192
+ return false;
37
+ return false;
193
+ }
38
+ }
194
+
39
+
195
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
40
+ __get_user(vl, &sve->vl);
196
+ r |= hv_vcpu_get_sys_reg(fd, regs[i].reg, regs[i].val);
41
+ vq = sve_vq(env);
197
+ }
198
+ r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr);
199
+ r |= hv_vcpu_destroy(fd);
200
+
42
+
201
+ ahcf->isar = host_isar;
43
+ /* Reject mismatched VL. */
202
+
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
203
+ /*
204
+ * A scratch vCPU returns SCTLR 0, so let's fill our default with the M1
205
+ * boot SCTLR from https://github.com/AsahiLinux/m1n1/issues/97
206
+ */
207
+ ahcf->reset_sctlr = 0x30100180;
208
+ /*
209
+ * SPAN is disabled by default when SCTLR.SPAN=1. To improve compatibility,
210
+ * let's disable it on boot and then allow guest software to turn it on by
211
+ * setting it to 0.
212
+ */
213
+ ahcf->reset_sctlr |= 0x00800000;
214
+
215
+ /* Make sure we don't advertise AArch32 support for EL0/EL1 */
216
+ if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) {
217
+ return false;
45
+ return false;
218
+ }
46
+ }
219
+
47
+
220
+ return r == HV_SUCCESS;
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
221
+}
49
+ if (size <= sizeof(*sve)) {
222
+
50
+ return true;
223
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
224
+{
225
+ if (!arm_host_cpu_features.dtb_compatible) {
226
+ if (!hvf_enabled() ||
227
+ !hvf_arm_get_host_cpu_features(&arm_host_cpu_features)) {
228
+ /*
229
+ * We can't report this error yet, so flag that we need to
230
+ * in arm_cpu_realizefn().
231
+ */
232
+ cpu->host_cpu_probe_failed = true;
233
+ return;
234
+ }
235
+ }
51
+ }
236
+
52
+
237
+ cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
53
+ /* Reject non-empty but incomplete record. */
238
+ cpu->isar = arm_host_cpu_features.isar;
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
239
+ cpu->env.features = arm_host_cpu_features.features;
55
+ return false;
240
+ cpu->midr = arm_host_cpu_features.midr;
56
+ }
241
+ cpu->reset_sctlr = arm_host_cpu_features.reset_sctlr;
242
+}
243
+
57
+
244
void hvf_arch_vcpu_destroy(CPUState *cpu)
58
+ /*
245
{
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
60
* at a subsequent address. This corresponds to a little-endian load
61
* of our 64-bit hunks.
62
*/
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
64
}
65
}
66
}
67
+ return true;
246
}
68
}
69
70
static int target_restore_sigframe(CPUARMState *env,
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
72
struct target_sve_context *sve = NULL;
73
uint64_t extra_datap = 0;
74
bool used_extra = false;
75
- int vq = 0, sve_size = 0;
76
+ int sve_size = 0;
77
78
target_restore_general_frame(env, sf);
79
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
81
if (sve || size < sizeof(struct target_sve_context)) {
82
goto err;
83
}
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
85
- vq = sve_vq(env);
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
87
- if (size == sve_size) {
88
- sve = (struct target_sve_context *)ctx;
89
- break;
90
- }
91
- }
92
- goto err;
93
+ sve = (struct target_sve_context *)ctx;
94
+ sve_size = size;
95
+ break;
96
97
case TARGET_EXTRA_MAGIC:
98
if (extra || size != sizeof(struct target_extra_context)) {
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
100
}
101
102
/* SVE data, if present, overwrites FPSIMD data. */
103
- if (sve) {
104
- target_restore_sve_record(env, sve, vq);
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
106
+ goto err;
107
}
108
unlock_user(extra, extra_datap, 0);
109
return 0;
247
--
110
--
248
2.20.1
111
2.25.1
249
250
diff view generated by jsdifflib
1
When not predicating, implement the MVE bitwise logical insns
1
From: Richard Henderson <richard.henderson@linaro.org>
2
directly using TCG vector operations.
3
2
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
Restore SM and ZA state according to the records present on return.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210913095440.13462-5-peter.maydell@linaro.org
8
---
10
---
9
target/arm/translate-mve.c | 51 +++++++++++++++++++++++++++-----------
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
10
1 file changed, 36 insertions(+), 15 deletions(-)
12
1 file changed, 154 insertions(+), 13 deletions(-)
11
13
12
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/target/arm/translate-mve.c
16
--- a/linux-user/aarch64/signal.c
15
+++ b/target/arm/translate-mve.c
17
+++ b/linux-user/aarch64/signal.c
16
@@ -XXX,XX +XXX,XX @@ static TCGv_ptr mve_qreg_ptr(unsigned reg)
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
17
return ret;
19
20
#define TARGET_SVE_SIG_FLAG_SM 1
21
22
+#define TARGET_ZA_MAGIC 0x54366345
23
+
24
+struct target_za_context {
25
+ struct target_aarch64_ctx head;
26
+ uint16_t vl;
27
+ uint16_t reserved[3];
28
+ /* The actual ZA data immediately follows. */
29
+};
30
+
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
37
+
38
struct target_rt_sigframe {
39
struct target_siginfo info;
40
struct target_ucontext uc;
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
18
}
42
}
19
43
20
+static bool mve_no_predication(DisasContext *s)
44
static void target_setup_sve_record(struct target_sve_context *sve,
45
- CPUARMState *env, int vq, int size)
46
+ CPUARMState *env, int size)
47
{
48
- int i, j;
49
+ int i, j, vq = sve_vq(env);
50
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
55
}
56
57
+static void target_setup_za_record(struct target_za_context *za,
58
+ CPUARMState *env, int size)
21
+{
59
+{
60
+ int vq = sme_vq(env);
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
62
+ int i, j;
63
+
64
+ memset(za, 0, sizeof(*za));
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
66
+ __put_user(size, &za->head.size);
67
+ __put_user(vl, &za->vl);
68
+
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
70
+ return;
71
+ }
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
73
+
22
+ /*
74
+ /*
23
+ * Return true if we are executing the entire MVE instruction
75
+ * Note that ZA vectors are stored as a byte stream,
24
+ * with no predication or partial-execution, and so we can safely
76
+ * with each byte element at a subsequent address.
25
+ * use an inline TCG vector implementation.
26
+ */
77
+ */
27
+ return s->eci == 0 && s->mve_no_pred;
78
+ for (i = 0; i < vl; ++i) {
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
80
+ for (j = 0; j < vq * 2; ++j) {
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
82
+ }
83
+ }
28
+}
84
+}
29
+
85
+
30
static bool mve_check_qreg_bank(DisasContext *s, int qmask)
86
static void target_restore_general_frame(CPUARMState *env,
31
{
87
struct target_rt_sigframe *sf)
88
{
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
90
91
static bool target_restore_sve_record(CPUARMState *env,
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
112
113
- __get_user(vl, &sve->vl);
114
- vq = sve_vq(env);
115
+ /*
116
+ * Note that we cannot use sve_vq() because that depends on the
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
32
/*
129
/*
33
@@ -XXX,XX +XXX,XX @@ static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
130
* Note that SVE regs are stored as a byte stream, with each byte element
34
return do_1op(s, a, fns[a->size]);
131
* at a subsequent address. This corresponds to a little-endian load
35
}
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
36
37
-static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
38
+static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
39
+ GVecGen3Fn *vecfn)
40
{
41
TCGv_ptr qd, qn, qm;
42
43
@@ -XXX,XX +XXX,XX @@ static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
44
return true;
45
}
46
47
- qd = mve_qreg_ptr(a->qd);
48
- qn = mve_qreg_ptr(a->qn);
49
- qm = mve_qreg_ptr(a->qm);
50
- fn(cpu_env, qd, qn, qm);
51
- tcg_temp_free_ptr(qd);
52
- tcg_temp_free_ptr(qn);
53
- tcg_temp_free_ptr(qm);
54
+ if (vecfn && mve_no_predication(s)) {
55
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
56
+ mve_qreg_offset(a->qm), 16, 16);
57
+ } else {
58
+ qd = mve_qreg_ptr(a->qd);
59
+ qn = mve_qreg_ptr(a->qn);
60
+ qm = mve_qreg_ptr(a->qm);
61
+ fn(cpu_env, qd, qn, qm);
62
+ tcg_temp_free_ptr(qd);
63
+ tcg_temp_free_ptr(qn);
64
+ tcg_temp_free_ptr(qm);
65
+ }
66
mve_update_eci(s);
67
return true;
133
return true;
68
}
134
}
69
135
70
-#define DO_LOGIC(INSN, HELPER) \
136
+static bool target_restore_za_record(CPUARMState *env,
71
+static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
137
+ struct target_za_context *za,
138
+ int size, int *svcr)
72
+{
139
+{
73
+ return do_2op_vec(s, a, fn, NULL);
140
+ int i, j, vl, vq;
141
+
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
143
+ return false;
144
+ }
145
+
146
+ __get_user(vl, &za->vl);
147
+ vq = sme_vq(env);
148
+
149
+ /* Reject mismatched VL. */
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
151
+ return false;
152
+ }
153
+
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
156
+ return true;
157
+ }
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
161
+ return false;
162
+ }
163
+
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
165
+
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
172
+ return true;
74
+}
173
+}
75
+
174
+
76
+#define DO_LOGIC(INSN, HELPER, VECFN) \
175
static int target_restore_sigframe(CPUARMState *env,
77
static bool trans_##INSN(DisasContext *s, arg_2op *a) \
176
struct target_rt_sigframe *sf)
78
{ \
177
{
79
- return do_2op(s, a, HELPER); \
178
struct target_aarch64_ctx *ctx, *extra = NULL;
80
+ return do_2op_vec(s, a, HELPER, VECFN); \
179
struct target_fpsimd_context *fpsimd = NULL;
81
}
180
struct target_sve_context *sve = NULL;
82
181
+ struct target_za_context *za = NULL;
83
-DO_LOGIC(VAND, gen_helper_mve_vand)
182
uint64_t extra_datap = 0;
84
-DO_LOGIC(VBIC, gen_helper_mve_vbic)
183
bool used_extra = false;
85
-DO_LOGIC(VORR, gen_helper_mve_vorr)
184
int sve_size = 0;
86
-DO_LOGIC(VORN, gen_helper_mve_vorn)
185
+ int za_size = 0;
87
-DO_LOGIC(VEOR, gen_helper_mve_veor)
186
+ int svcr = 0;
88
+DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
187
89
+DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
188
target_restore_general_frame(env, sf);
90
+DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
189
91
+DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
92
+DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
191
sve_size = size;
93
192
break;
94
static bool trans_VPSEL(DisasContext *s, arg_2op *a)
193
95
{
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
96
--
288
--
97
2.20.1
289
2.25.1
98
99
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Add "sve" to the sve prctl functions, to distinguish
4
them from the coming "sme" prctls with similar names.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 8 ++++----
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@
20
#ifndef AARCH64_TARGET_PRCTL_H
21
#define AARCH64_TARGET_PRCTL_H
22
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
{
26
ARMCPU *cpu = env_archcpu(env);
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
29
}
30
return -TARGET_EINVAL;
31
}
32
-#define do_prctl_get_vl do_prctl_get_vl
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/linux-user/syscall.c
52
+++ b/linux-user/syscall.c
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
54
#ifndef do_prctl_set_fp_mode
55
#define do_prctl_set_fp_mode do_prctl_inval1
56
#endif
57
-#ifndef do_prctl_get_vl
58
-#define do_prctl_get_vl do_prctl_inval0
59
+#ifndef do_prctl_sve_get_vl
60
+#define do_prctl_sve_get_vl do_prctl_inval0
61
#endif
62
-#ifndef do_prctl_set_vl
63
-#define do_prctl_set_vl do_prctl_inval1
64
+#ifndef do_prctl_sve_set_vl
65
+#define do_prctl_sve_set_vl do_prctl_inval1
66
#endif
67
#ifndef do_prctl_reset_keys
68
#define do_prctl_reset_keys do_prctl_inval1
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
case PR_SET_FP_MODE:
71
return do_prctl_set_fp_mode(env, arg2);
72
case PR_SVE_GET_VL:
73
- return do_prctl_get_vl(env);
74
+ return do_prctl_sve_get_vl(env);
75
case PR_SVE_SET_VL:
76
- return do_prctl_set_vl(env, arg2);
77
+ return do_prctl_sve_set_vl(env, arg2);
78
case PR_PAC_RESET_KEYS:
79
if (arg3 || arg4 || arg5) {
80
return -TARGET_EINVAL;
81
--
82
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
These prctl set the Streaming SVE vector length, which may
4
be completely different from the Normal SVE vector length.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
12
linux-user/syscall.c | 16 +++++++++
13
2 files changed, 70 insertions(+)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
{
21
ARMCPU *cpu = env_archcpu(env);
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
return sve_vq(env) * 16;
25
}
26
return -TARGET_EINVAL;
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
107
+#endif
108
109
#include "target_prctl.h"
110
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
112
#ifndef do_prctl_set_unalign
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
135
--
136
2.25.1
diff view generated by jsdifflib
1
There's no particular reason why the exclusive monitor should
1
From: Richard Henderson <richard.henderson@linaro.org>
2
be only cleared on reset in system emulation mode. It doesn't
3
hurt if it isn't cleared in user mode, but we might as well
4
reduce the amount of code we have that's inside an ifdef.
5
2
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210914120725.24992-3-peter.maydell@linaro.org
9
---
9
---
10
target/arm/cpu.c | 6 +++---
10
target/arm/cpu.c | 7 +++----
11
1 file changed, 3 insertions(+), 3 deletions(-)
11
1 file changed, 3 insertions(+), 4 deletions(-)
12
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
18
env->regs[15] = 0xFFFF0000;
18
/* and to the FP/Neon instructions */
19
}
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
20
20
CPACR_EL1, FPEN, 3);
21
+ env->vfp.xregs[ARM_VFP_FPEXC] = 0;
21
- /* and to the SVE instructions */
22
+#endif
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
+
23
- CPACR_EL1, ZEN, 3);
24
/* M profile requires that reset clears the exclusive monitor;
24
- /* with reasonable vector length */
25
* A profile does not, but clearing it makes more sense than having it
25
+ /* and to the SVE instructions, with default vector length */
26
* set with an exclusive access on address zero.
26
if (cpu_isar_feature(aa64_sve, cpu)) {
27
*/
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
arm_clear_exclusive(env);
28
+ CPACR_EL1, ZEN, 3);
29
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
30
- env->vfp.xregs[ARM_VFP_FPEXC] = 0;
30
}
31
-#endif
31
/*
32
-
33
if (arm_feature(env, ARM_FEATURE_PMSA)) {
34
if (cpu->pmsav7_dregion > 0) {
35
if (arm_feature(env, ARM_FEATURE_V8)) {
36
--
32
--
37
2.20.1
33
2.25.1
38
39
diff view generated by jsdifflib
1
Currently all of the M-profile specific code in arm_cpu_reset() is
1
From: Richard Henderson <richard.henderson@linaro.org>
2
inside a !defined(CONFIG_USER_ONLY) ifdef block. This is
3
unintentional: it happened because originally the only
4
M-profile-specific handling was the setup of the initial SP and PC
5
from the vector table, which is system-emulation only. But then we
6
added a lot of other M-profile setup to the same "if (ARM_FEATURE_M)"
7
code block without noticing that it was all inside a not-user-mode
8
ifdef. This has generally been harmless, but with the addition of
9
v8.1M low-overhead-loop support we ran into a problem: the reset of
10
FPSCR.LTPSIZE to 4 was only being done for system emulation mode, so
11
if a user-mode guest tried to execute the LE instruction it would
12
incorrectly take a UsageFault.
13
2
14
Adjust the ifdefs so only the really system-emulation specific parts
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
15
are covered. Because this means we now run some reset code that sets
16
up initial values in the FPCCR and similar FPU related registers,
17
explicitly set up the registers controlling FPU context handling in
18
user-emulation mode so that the FPU works by design and not by
19
chance.
20
4
21
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/613
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
22
Cc: qemu-stable@nongnu.org
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
25
Message-id: 20210914120725.24992-2-peter.maydell@linaro.org
26
---
9
---
27
target/arm/cpu.c | 19 +++++++++++++++++++
10
target/arm/cpu.c | 11 +++++++++++
28
1 file changed, 19 insertions(+)
11
1 file changed, 11 insertions(+)
29
12
30
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
31
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/cpu.c
15
--- a/target/arm/cpu.c
33
+++ b/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
34
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
35
env->uncached_cpsr = ARM_CPU_MODE_SVC;
18
CPACR_EL1, ZEN, 3);
36
}
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
37
env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
38
+#endif
39
40
if (arm_feature(env, ARM_FEATURE_M)) {
41
+#ifndef CONFIG_USER_ONLY
42
uint32_t initial_msp; /* Loaded from 0x0 */
43
uint32_t initial_pc; /* Loaded from 0x4 */
44
uint8_t *rom;
45
uint32_t vecbase;
46
+#endif
47
48
if (cpu_isar_feature(aa32_lob, cpu)) {
49
/*
50
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
51
env->v7m.fpccr[M_REG_S] = R_V7M_FPCCR_ASPEN_MASK |
52
R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK;
53
}
20
}
54
+
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
55
+#ifndef CONFIG_USER_ONLY
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
56
/* Unlike A/R profile, M profile defines the reset LR value */
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
57
env->regs[14] = 0xffffffff;
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
58
25
+ CPACR_EL1, SMEN, 3);
59
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
60
env->regs[13] = initial_msp & 0xFFFFFFFC;
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
61
env->regs[15] = initial_pc & ~1;
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
62
env->thumb = initial_pc & 1;
29
+ SMCR, FA64, 1);
63
+#else
30
+ }
64
+ /*
31
+ }
65
+ * For user mode we run non-secure and with access to the FPU.
32
/*
66
+ * The FPU context is active (ie does not need further setup)
33
* Enable 48-bit address space (TODO: take reserved_va into account).
67
+ * and is owned by non-secure.
34
* Enable TBI0 but not TBI1.
68
+ */
69
+ env->v7m.secure = false;
70
+ env->v7m.nsacr = 0xcff;
71
+ env->v7m.cpacr[M_REG_NS] = 0xf0ffff;
72
+ env->v7m.fpccr[M_REG_S] &=
73
+ ~(R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK);
74
+ env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
75
+#endif
76
}
77
78
+#ifndef CONFIG_USER_ONLY
79
/* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently
80
* executing as AArch32 then check if highvecs are enabled and
81
* adjust the PC accordingly.
82
--
35
--
83
2.20.1
36
2.25.1
84
85
diff view generated by jsdifflib
1
Optimize the MVE shift-and-insert insns by using TCG
1
From: Richard Henderson <richard.henderson@linaro.org>
2
vector ops when possible.
3
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210913095440.13462-12-peter.maydell@linaro.org
7
---
7
---
8
target/arm/translate-mve.c | 4 ++--
8
linux-user/elfload.c | 20 ++++++++++++++++++++
9
1 file changed, 2 insertions(+), 2 deletions(-)
9
1 file changed, 20 insertions(+)
10
10
11
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-mve.c
13
--- a/linux-user/elfload.c
14
+++ b/target/arm/translate-mve.c
14
+++ b/linux-user/elfload.c
15
@@ -XXX,XX +XXX,XX @@ DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
15
@@ -XXX,XX +XXX,XX @@ enum {
16
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
16
ARM_HWCAP2_A64_RNG = 1 << 16,
17
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
17
ARM_HWCAP2_A64_BTI = 1 << 17,
18
18
ARM_HWCAP2_A64_MTE = 1 << 18,
19
-DO_2SHIFT(VSRI, vsri, false)
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
20
-DO_2SHIFT(VSLI, vsli, false)
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
21
+DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
22
+DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
23
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
24
#define DO_2SHIFT_FP(INSN, FN) \
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
25
static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
31
};
32
33
#define ELF_HWCAP get_elf_hwcap()
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
39
+ ARM_HWCAP2_A64_SME_F32F32 |
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
26
--
49
--
27
2.20.1
50
2.25.1
28
29
diff view generated by jsdifflib