1
The following changes since commit a97978bcc2d1f650c7d411428806e5b03082b8c7:
1
I don't have anything else queued up at the moment, so this is just
2
Richard's SME patches.
2
3
3
Merge remote-tracking branch 'remotes/dg-gitlab/tags/ppc-for-6.1-20210603' into staging (2021-06-03 10:00:35 +0100)
4
-- PMM
5
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
7
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210603
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
8
13
9
for you to fetch changes up to 1c861885894d840235954060050d240259f5340b:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
10
15
11
tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed (2021-06-03 16:43:27 +0100)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
target-arm queue:
19
target-arm:
15
* Some not-yet-enabled preliminaries for M-profile MVE support
20
* Implement SME emulation, for both system and linux-user
16
* Consistently use "Cortex-Axx", not "Cortex Axx" in docs, comments
17
* docs: Fix installation of man pages with Sphinx 4.x
18
* Mark LDS{MIN,MAX} as signed operations
19
* Fix missing syndrome value for DAIF and PAC check exceptions
20
* Implement BFloat16 extensions
21
* Refactoring of hvf accelerator code in preparation for aarch64 support
22
* Fix some coverity nits in test code
23
21
24
----------------------------------------------------------------
22
----------------------------------------------------------------
25
Alexander Graf (12):
23
Richard Henderson (45):
26
hvf: Move assert_hvf_ok() into common directory
24
target/arm: Handle SME in aarch64_cpu_dump_state
27
hvf: Move vcpu thread functions into common directory
25
target/arm: Add infrastructure for disas_sme
28
hvf: Move cpu functions into common directory
26
target/arm: Trap non-streaming usage when Streaming SVE is active
29
hvf: Move hvf internal definitions into common header
27
target/arm: Mark ADR as non-streaming
30
hvf: Make hvf_set_phys_mem() static
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
31
hvf: Remove use of hv_uvaddr_t and hv_gpaddr_t
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
32
hvf: Split out common code on vcpu init and destroy
30
target/arm: Mark PMULL, FMMLA as non-streaming
33
hvf: Use cpu_synchronize_state()
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
34
hvf: Make synchronize functions static
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
35
hvf: Remove hvf-accel-ops.h
33
target/arm: Mark string/histo/crypto as non-streaming
36
hvf: Introduce hvf vcpu struct
34
target/arm: Mark gather/scatter load/store as non-streaming
37
hvf: Simplify post reset/init/loadvm hooks
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
38
69
39
Damien Goutte-Gattat (1):
70
docs/system/arm/emulation.rst | 4 +
40
docs: Fix installation of man pages with Sphinx 4.x
71
linux-user/aarch64/target_cpu.h | 5 +-
41
72
linux-user/aarch64/target_prctl.h | 62 +-
42
Jamie Iles (4):
73
target/arm/cpu.h | 7 +
43
target/arm: fix missing exception class
74
target/arm/helper-sme.h | 126 ++++
44
target/arm: fold do_raise_exception into raise_exception
75
target/arm/helper-sve.h | 4 +
45
target/arm: use raise_exception_ra for MTE check failure
76
target/arm/helper.h | 18 +
46
target/arm: use raise_exception_ra for stack limit exception
77
target/arm/translate-a64.h | 45 ++
47
78
target/arm/translate.h | 16 +
48
Peter Maydell (15):
79
target/arm/sme-fa64.decode | 60 ++
49
target/arm: Add isar feature check functions for MVE
80
target/arm/sme.decode | 88 +++
50
target/arm: Update feature checks for insns which are "MVE or FP"
81
target/arm/sve.decode | 41 +-
51
target/arm: Move fpsp/fpdp isar check into callers of do_vfp_2op_sp/dp
82
linux-user/aarch64/cpu_loop.c | 9 +
52
target/arm: Add MVE check to VMOV_reg_sp and VMOV_reg_dp
83
linux-user/aarch64/signal.c | 243 ++++++--
53
target/arm: Fix return values in fp_sysreg_checks()
84
linux-user/elfload.c | 20 +
54
target/arm: Implement M-profile VPR register
85
linux-user/syscall.c | 28 +-
55
target/arm: Make FPSCR.LTPSIZE writable for MVE
86
target/arm/cpu.c | 35 +-
56
target/arm: Allow board models to specify initial NS VTOR
87
target/arm/cpu64.c | 11 +
57
arm: Consistently use "Cortex-Axx", not "Cortex Axx"
88
target/arm/helper.c | 56 +-
58
tests/qtest/bios-tables-test: Check for dup2() failure
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
59
tests/qtest/e1000e-test: Check qemu_recv() succeeded
90
target/arm/sve_helper.c | 28 +
60
tests/qtest/hd-geo-test: Fix checks on mkstemp() return value
91
target/arm/translate-a64.c | 103 +++-
61
tests/qtest/pflash-cfi02-test: Avoid potential integer overflow
92
target/arm/translate-sme.c | 373 ++++++++++++
62
tests/qtest/tpm-tests: Remove unnecessary NULL checks
93
target/arm/translate-sve.c | 393 ++++++++++---
63
tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed
94
target/arm/translate-vfp.c | 12 +
64
95
target/arm/translate.c | 2 +
65
Richard Henderson (13):
96
target/arm/vec_helper.c | 24 +
66
target/arm: Mark LDS{MIN,MAX} as signed operations
97
target/arm/meson.build | 3 +
67
target/arm: Add isar_feature_{aa32, aa64, aa64_sve}_bf16
98
28 files changed, 2821 insertions(+), 135 deletions(-)
68
target/arm: Unify unallocated path in disas_fp_1src
99
create mode 100644 target/arm/sme-fa64.decode
69
target/arm: Implement scalar float32 to bfloat16 conversion
100
create mode 100644 target/arm/sme.decode
70
target/arm: Implement vector float32 to bfloat16 conversion
101
create mode 100644 target/arm/translate-sme.c
71
softfpu: Add float_round_to_odd_inf
72
target/arm: Implement bfloat16 dot product (vector)
73
target/arm: Implement bfloat16 dot product (indexed)
74
target/arm: Implement bfloat16 matrix multiply accumulate
75
target/arm: Implement bfloat widening fma (vector)
76
target/arm: Implement bfloat widening fma (indexed)
77
linux-user/aarch64: Enable hwcap bits for bfloat16
78
target/arm: Enable BFloat16 extensions
79
80
docs/conf.py | 1 +
81
docs/system/arm/aspeed.rst | 4 +-
82
docs/system/arm/nuvoton.rst | 6 +-
83
docs/system/arm/sabrelite.rst | 2 +-
84
include/fpu/softfloat-types.h | 4 +-
85
include/hw/arm/allwinner-h3.h | 2 +-
86
include/hw/arm/armv7m.h | 2 +
87
include/hw/core/cpu.h | 3 +-
88
include/sysemu/hvf_int.h | 58 +++++
89
target/arm/cpu.h | 48 +++-
90
target/arm/helper-sve.h | 4 +
91
target/arm/helper.h | 15 ++
92
target/i386/hvf/hvf-accel-ops.h | 23 --
93
target/i386/hvf/hvf-i386.h | 33 +--
94
target/i386/hvf/vmx.h | 24 +-
95
target/i386/hvf/x86hvf.h | 2 -
96
target/arm/neon-dp.decode | 1 +
97
target/arm/neon-shared.decode | 11 +
98
target/arm/sve.decode | 19 +-
99
target/arm/vfp.decode | 2 +
100
accel/hvf/hvf-accel-ops.c | 471 ++++++++++++++++++++++++++++++++++++++++
101
accel/hvf/hvf-all.c | 47 ++++
102
hw/arm/armv7m.c | 7 +
103
hw/arm/aspeed.c | 6 +-
104
hw/arm/mcimx6ul-evk.c | 2 +-
105
hw/arm/mcimx7d-sabre.c | 2 +-
106
hw/arm/npcm7xx_boards.c | 4 +-
107
hw/arm/sabrelite.c | 2 +-
108
hw/misc/npcm7xx_clk.c | 2 +-
109
linux-user/elfload.c | 2 +
110
target/arm/cpu.c | 13 ++
111
target/arm/cpu64.c | 3 +
112
target/arm/cpu_tcg.c | 1 +
113
target/arm/m_helper.c | 5 +-
114
target/arm/machine.c | 20 ++
115
target/arm/mte_helper.c | 12 +-
116
target/arm/op_helper.c | 32 ++-
117
target/arm/sve_helper.c | 2 +
118
target/arm/translate-a64.c | 155 +++++++++++--
119
target/arm/translate-neon.c | 91 ++++++++
120
target/arm/translate-sve.c | 112 ++++++++++
121
target/arm/translate-vfp.c | 164 ++++++++++----
122
target/arm/vec_helper.c | 140 +++++++++++-
123
target/arm/vfp_helper.c | 21 +-
124
target/i386/hvf/hvf-accel-ops.c | 146 -------------
125
target/i386/hvf/hvf.c | 464 +++++----------------------------------
126
target/i386/hvf/x86.c | 28 +--
127
target/i386/hvf/x86_descr.c | 26 +--
128
target/i386/hvf/x86_emu.c | 62 +++---
129
target/i386/hvf/x86_mmu.c | 4 +-
130
target/i386/hvf/x86_task.c | 12 +-
131
target/i386/hvf/x86hvf.c | 222 +++++++++----------
132
tests/qtest/bios-tables-test.c | 8 +-
133
tests/qtest/e1000e-test.c | 3 +-
134
tests/qtest/hd-geo-test.c | 4 +-
135
tests/qtest/pflash-cfi02-test.c | 2 +-
136
tests/qtest/tpm-tests.c | 12 +-
137
tests/unit/test-vmstate.c | 5 +-
138
fpu/softfloat-parts.c.inc | 6 +-
139
MAINTAINERS | 8 +
140
accel/hvf/meson.build | 7 +
141
accel/meson.build | 1 +
142
target/i386/hvf/meson.build | 1 -
143
63 files changed, 1666 insertions(+), 935 deletions(-)
144
create mode 100644 include/sysemu/hvf_int.h
145
delete mode 100644 target/i386/hvf/hvf-accel-ops.h
146
create mode 100644 accel/hvf/hvf-accel-ops.c
147
create mode 100644 accel/hvf/hvf-all.c
148
delete mode 100644 target/i386/hvf/hvf-accel-ops.c
149
create mode 100644 accel/hvf/meson.build
150
diff view generated by jsdifflib
1
Coverity complains that we don't check for failures from dup()
1
From: Richard Henderson <richard.henderson@linaro.org>
2
and mkstemp(); add asserts that these syscalls succeeded.
3
2
4
Fixes: Coverity CID 1432516, 1432574
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Message-id: 20210525134458.6675-7-peter.maydell@linaro.org
9
---
9
---
10
tests/unit/test-vmstate.c | 5 ++++-
10
target/arm/cpu.c | 17 ++++++++++++++++-
11
1 file changed, 4 insertions(+), 1 deletion(-)
11
1 file changed, 16 insertions(+), 1 deletion(-)
12
12
13
diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/unit/test-vmstate.c
15
--- a/target/arm/cpu.c
16
+++ b/tests/unit/test-vmstate.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static int temp_fd;
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
18
/* Duplicate temp_fd and seek to the beginning of the file */
18
int i;
19
static QEMUFile *open_test_file(bool write)
19
int el = arm_current_el(env);
20
{
20
const char *ns_status;
21
- int fd = dup(temp_fd);
21
+ bool sve;
22
+ int fd;
22
23
QIOChannel *ioc;
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
24
QEMUFile *f;
24
for (i = 0; i < 32; i++) {
25
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
26
+ fd = dup(temp_fd);
26
el,
27
+ g_assert(fd >= 0);
27
psr & PSTATE_SP ? 'h' : 't');
28
lseek(fd, 0, SEEK_SET);
28
29
if (write) {
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
30
g_assert_cmpint(ftruncate(fd, 0), ==, 0);
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
31
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
31
+ env->svcr,
32
g_autofree char *temp_file = g_strdup_printf("%s/vmst.test.XXXXXX",
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
33
g_get_tmp_dir());
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
temp_fd = mkstemp(temp_file);
34
+ }
35
+ g_assert(temp_fd >= 0);
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
37
module_call_init(MODULE_INIT_QOM);
37
}
38
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
41
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
44
+ sve = sme_exception_el(env, el) == 0;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
49
+ }
50
+
51
+ if (sve) {
52
int j, zcr_len = sve_vqm1_for_el(env, el);
53
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
39
--
55
--
40
2.20.1
56
2.25.1
41
42
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Until now, Hypervisor.framework has only been available on x86_64 systems.
3
This includes the build rules for the decoder, and the
4
With Apple Silicon shipping now, it extends its reach to aarch64. To
4
new file for translation, but excludes any instructions.
5
prepare for support for multiple architectures, let's start moving common
6
code out into its own accel directory.
7
5
8
This patch moves assert_hvf_ok() and introduces generic build infrastructure.
9
10
Signed-off-by: Alexander Graf <agraf@csgraf.de>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
12
Message-id: 20210519202253.76782-2-agraf@csgraf.de
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
10
---
16
include/sysemu/hvf_int.h | 18 +++++++++++++++
11
target/arm/translate-a64.h | 1 +
17
accel/hvf/hvf-all.c | 47 ++++++++++++++++++++++++++++++++++++++++
12
target/arm/sme.decode | 20 ++++++++++++++++++++
18
target/i386/hvf/hvf.c | 33 +---------------------------
13
target/arm/translate-a64.c | 7 ++++++-
19
MAINTAINERS | 8 +++++++
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
20
accel/hvf/meson.build | 6 +++++
15
target/arm/meson.build | 2 ++
21
accel/meson.build | 1 +
16
5 files changed, 64 insertions(+), 1 deletion(-)
22
6 files changed, 81 insertions(+), 32 deletions(-)
17
create mode 100644 target/arm/sme.decode
23
create mode 100644 include/sysemu/hvf_int.h
18
create mode 100644 target/arm/translate-sme.c
24
create mode 100644 accel/hvf/hvf-all.c
25
create mode 100644 accel/hvf/meson.build
26
19
27
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/translate-a64.h
23
+++ b/target/arm/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
25
}
26
27
bool disas_sve(DisasContext *, uint32_t);
28
+bool disas_sme(DisasContext *, uint32_t);
29
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
new file mode 100644
33
new file mode 100644
29
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
30
--- /dev/null
35
--- /dev/null
31
+++ b/include/sysemu/hvf_int.h
36
+++ b/target/arm/sme.decode
32
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
33
+/*
38
+# AArch64 SME instruction descriptions
34
+ * QEMU Hypervisor.framework (HVF) support
39
+#
35
+ *
40
+# Copyright (c) 2022 Linaro, Ltd
36
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
41
+#
37
+ * See the COPYING file in the top-level directory.
42
+# This library is free software; you can redistribute it and/or
38
+ *
43
+# modify it under the terms of the GNU Lesser General Public
39
+ */
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
47
+# This library is distributed in the hope that it will be useful,
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
40
+
54
+
41
+/* header to be included in HVF-specific code */
55
+#
42
+
56
+# This file is processed by scripts/decodetree.py
43
+#ifndef HVF_INT_H
57
+#
44
+#define HVF_INT_H
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
45
+
59
index XXXXXXX..XXXXXXX 100644
46
+#include <Hypervisor/hv.h>
60
--- a/target/arm/translate-a64.c
47
+
61
+++ b/target/arm/translate-a64.c
48
+void assert_hvf_ok(hv_return_t ret);
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
49
+
63
}
50
+#endif
64
51
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
65
switch (extract32(insn, 25, 4)) {
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
67
+ case 0x0:
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
69
+ unallocated_encoding(s);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
52
new file mode 100644
77
new file mode 100644
53
index XXXXXXX..XXXXXXX
78
index XXXXXXX..XXXXXXX
54
--- /dev/null
79
--- /dev/null
55
+++ b/accel/hvf/hvf-all.c
80
+++ b/target/arm/translate-sme.c
56
@@ -XXX,XX +XXX,XX @@
81
@@ -XXX,XX +XXX,XX @@
57
+/*
82
+/*
58
+ * QEMU Hypervisor.framework support
83
+ * AArch64 SME translation
59
+ *
84
+ *
60
+ * This work is licensed under the terms of the GNU GPL, version 2. See
85
+ * Copyright (c) 2022 Linaro, Ltd
61
+ * the COPYING file in the top-level directory.
62
+ *
86
+ *
63
+ * Contributions after 2012-01-13 are licensed under the terms of the
87
+ * This library is free software; you can redistribute it and/or
64
+ * GNU GPL, version 2 or (at your option) any later version.
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
91
+ *
92
+ * This library is distributed in the hope that it will be useful,
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
65
+ */
99
+ */
66
+
100
+
67
+#include "qemu/osdep.h"
101
+#include "qemu/osdep.h"
68
+#include "qemu-common.h"
102
+#include "cpu.h"
69
+#include "qemu/error-report.h"
103
+#include "tcg/tcg-op.h"
70
+#include "sysemu/hvf.h"
104
+#include "tcg/tcg-op-gvec.h"
71
+#include "sysemu/hvf_int.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
72
+
110
+
73
+void assert_hvf_ok(hv_return_t ret)
74
+{
75
+ if (ret == HV_SUCCESS) {
76
+ return;
77
+ }
78
+
111
+
79
+ switch (ret) {
112
+/*
80
+ case HV_ERROR:
113
+ * Include the generated decoder.
81
+ error_report("Error: HV_ERROR");
114
+ */
82
+ break;
83
+ case HV_BUSY:
84
+ error_report("Error: HV_BUSY");
85
+ break;
86
+ case HV_BAD_ARGUMENT:
87
+ error_report("Error: HV_BAD_ARGUMENT");
88
+ break;
89
+ case HV_NO_RESOURCES:
90
+ error_report("Error: HV_NO_RESOURCES");
91
+ break;
92
+ case HV_NO_DEVICE:
93
+ error_report("Error: HV_NO_DEVICE");
94
+ break;
95
+ case HV_UNSUPPORTED:
96
+ error_report("Error: HV_UNSUPPORTED");
97
+ break;
98
+ default:
99
+ error_report("Unknown Error");
100
+ }
101
+
115
+
102
+ abort();
116
+#include "decode-sme.c.inc"
103
+}
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
104
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
105
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
106
--- a/target/i386/hvf/hvf.c
119
--- a/target/arm/meson.build
107
+++ b/target/i386/hvf/hvf.c
120
+++ b/target/arm/meson.build
108
@@ -XXX,XX +XXX,XX @@
121
@@ -XXX,XX +XXX,XX @@
109
#include "qemu/error-report.h"
122
gen = [
110
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
111
#include "sysemu/hvf.h"
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
112
+#include "sysemu/hvf_int.h"
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
113
#include "sysemu/runstate.h"
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
114
#include "hvf-i386.h"
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
115
#include "vmcs.h"
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
116
@@ -XXX,XX +XXX,XX @@
129
'sme_helper.c',
117
130
'translate-a64.c',
118
HVFState *hvf_state;
131
'translate-sve.c',
119
132
+ 'translate-sme.c',
120
-static void assert_hvf_ok(hv_return_t ret)
133
))
121
-{
134
122
- if (ret == HV_SUCCESS) {
135
arm_softmmu_ss = ss.source_set()
123
- return;
124
- }
125
-
126
- switch (ret) {
127
- case HV_ERROR:
128
- error_report("Error: HV_ERROR");
129
- break;
130
- case HV_BUSY:
131
- error_report("Error: HV_BUSY");
132
- break;
133
- case HV_BAD_ARGUMENT:
134
- error_report("Error: HV_BAD_ARGUMENT");
135
- break;
136
- case HV_NO_RESOURCES:
137
- error_report("Error: HV_NO_RESOURCES");
138
- break;
139
- case HV_NO_DEVICE:
140
- error_report("Error: HV_NO_DEVICE");
141
- break;
142
- case HV_UNSUPPORTED:
143
- error_report("Error: HV_UNSUPPORTED");
144
- break;
145
- default:
146
- error_report("Unknown Error");
147
- }
148
-
149
- abort();
150
-}
151
-
152
/* Memory slots */
153
hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
154
{
155
diff --git a/MAINTAINERS b/MAINTAINERS
156
index XXXXXXX..XXXXXXX 100644
157
--- a/MAINTAINERS
158
+++ b/MAINTAINERS
159
@@ -XXX,XX +XXX,XX @@ M: Roman Bolshakov <r.bolshakov@yadro.com>
160
W: https://wiki.qemu.org/Features/HVF
161
S: Maintained
162
F: target/i386/hvf/
163
+
164
+HVF
165
+M: Cameron Esfahani <dirty@apple.com>
166
+M: Roman Bolshakov <r.bolshakov@yadro.com>
167
+W: https://wiki.qemu.org/Features/HVF
168
+S: Maintained
169
+F: accel/hvf/
170
F: include/sysemu/hvf.h
171
+F: include/sysemu/hvf_int.h
172
173
WHPX CPUs
174
M: Sunil Muthuswamy <sunilmut@microsoft.com>
175
diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build
176
new file mode 100644
177
index XXXXXXX..XXXXXXX
178
--- /dev/null
179
+++ b/accel/hvf/meson.build
180
@@ -XXX,XX +XXX,XX @@
181
+hvf_ss = ss.source_set()
182
+hvf_ss.add(files(
183
+ 'hvf-all.c',
184
+))
185
+
186
+specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss)
187
diff --git a/accel/meson.build b/accel/meson.build
188
index XXXXXXX..XXXXXXX 100644
189
--- a/accel/meson.build
190
+++ b/accel/meson.build
191
@@ -XXX,XX +XXX,XX @@ specific_ss.add(files('accel-common.c'))
192
softmmu_ss.add(files('accel-softmmu.c'))
193
user_ss.add(files('accel-user.c'))
194
195
+subdir('hvf')
196
subdir('qtest')
197
subdir('kvm')
198
subdir('tcg')
199
--
136
--
200
2.20.1
137
2.25.1
201
202
diff view generated by jsdifflib
1
If MVE is implemented for an M-profile CPU then it has a VPR
1
From: Richard Henderson <richard.henderson@linaro.org>
2
register, which tracks predication information.
2
3
3
This new behaviour is in the ARM pseudocode function
4
Implement the read and write handling of this register, and
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
the migration of its state.
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
6
the trap would be delivered is in AArch64 mode.
7
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
detection ought to be trivially true, but the pseudocode still contains
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210520152840.24453-7-peter.maydell@linaro.org
10
---
24
---
11
target/arm/cpu.h | 6 ++++++
25
target/arm/cpu.h | 7 +++
12
target/arm/machine.c | 19 +++++++++++++++++++
26
target/arm/translate.h | 4 ++
13
target/arm/translate-vfp.c | 38 ++++++++++++++++++++++++++++++++++++++
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
14
3 files changed, 63 insertions(+)
28
target/arm/helper.c | 41 +++++++++++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
30
target/arm/translate-vfp.c | 12 +++++
31
target/arm/translate.c | 2 +
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
15
35
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu.h
38
--- a/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUARMState {
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
21
uint32_t cpacr[M_REG_NUM_BANKS];
41
* the same thing as the current security state of the processor!
22
uint32_t nsacr;
42
*/
23
int ltpsize;
43
FIELD(TBFLAG_A32, NS, 10, 1)
24
+ uint32_t vpr;
44
+/*
25
} v7m;
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
26
46
+ * This requires an SME trap from AArch32 mode when using NEON.
27
/* Information associated with an exception about to be taken:
47
+ */
28
@@ -XXX,XX +XXX,XX @@ FIELD(V7M_FPCCR, ASPEN, 31, 1)
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
29
R_V7M_FPCCR_UFRDY_MASK | \
49
30
R_V7M_FPCCR_ASPEN_MASK)
31
32
+/* v7M VPR bits */
33
+FIELD(V7M_VPR, P0, 0, 16)
34
+FIELD(V7M_VPR, MASK01, 16, 4)
35
+FIELD(V7M_VPR, MASK23, 20, 4)
36
+
37
/*
50
/*
38
* System register ID fields.
51
* Bit usage when in AArch32 state, for M-profile only.
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
55
FIELD(TBFLAG_A64, SVL, 24, 4)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
58
59
/*
60
* Helpers for using the above.
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate.h
64
+++ b/target/arm/translate.h
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
66
bool pstate_sm;
67
/* True if PSTATE.ZA is set. */
68
bool pstate_za;
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
75
/*
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/sme-fa64.decode
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/arm/helper.c
175
+++ b/target/arm/helper.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
178
}
179
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
181
+static bool sme_fa64(CPUARMState *env, int el)
182
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
184
+ return false;
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
39
*/
208
*/
40
diff --git a/target/arm/machine.c b/target/arm/machine.c
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
41
index XXXXXXX..XXXXXXX 100644
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
42
--- a/target/arm/machine.c
211
}
43
+++ b/target/arm/machine.c
212
44
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_m_fp = {
213
+ /*
45
}
214
+ * The SME exception we are testing for is raised via
46
};
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
47
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
48
+static bool mve_needed(void *opaque)
217
+ */
49
+{
218
+ if (el == 0
50
+ ARMCPU *cpu = opaque;
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
51
+
220
+ && (!arm_is_el2_enabled(env)
52
+ return cpu_isar_feature(aa32_mve, cpu);
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
53
+}
222
+ && arm_el_is_aa64(env, 1)
54
+
223
+ && !sme_fa64(env, el)) {
55
+static const VMStateDescription vmstate_m_mve = {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
56
+ .name = "cpu/m/mve",
225
+ }
57
+ .version_id = 1,
226
+
58
+ .minimum_version_id = 1,
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
59
+ .needed = mve_needed,
228
}
60
+ .fields = (VMStateField[]) {
229
61
+ VMSTATE_UINT32(env.v7m.vpr, ARMCPU),
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
62
+ VMSTATE_END_OF_LIST()
231
}
63
+ },
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
64
+};
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
65
+
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
66
static const VMStateDescription vmstate_m = {
235
}
67
.name = "cpu/m",
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
68
.version_id = 4,
237
}
69
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_m = {
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
70
&vmstate_m_other_sp,
239
index XXXXXXX..XXXXXXX 100644
71
&vmstate_m_v8m,
240
--- a/target/arm/translate-a64.c
72
&vmstate_m_fp,
241
+++ b/target/arm/translate-a64.c
73
+ &vmstate_m_mve,
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
74
NULL
243
* unallocated-encoding checks (otherwise the syndrome information
75
}
244
* for the resulting exception will be incorrect).
76
};
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
248
{
249
if (s->fp_excp_el) {
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
253
}
254
255
+static bool fp_access_check(DisasContext *s)
256
+{
257
+ if (!fp_access_check_only(s)) {
258
+ return false;
259
+ }
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
262
+ syn_smetrap(SME_ET_Streaming, false));
263
+ return false;
264
+ }
265
+ return true;
266
+}
267
+
268
/* Check that SVE access is enabled. If it is, return true.
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
277
return;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
279
return;
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
282
}
283
284
+/*
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
77
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
78
index XXXXXXX..XXXXXXX 100644
325
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/translate-vfp.c
326
--- a/target/arm/translate-vfp.c
80
+++ b/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
81
@@ -XXX,XX +XXX,XX @@ static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
82
return FPSysRegCheckFailed;
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/arm/translate.c
350
+++ b/target/arm/translate.c
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
83
}
354
}
84
break;
355
+ dc->sme_trap_nonstreaming =
85
+ case ARM_VFP_VPR:
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
86
+ case ARM_VFP_P0:
357
}
87
+ if (!dc_isar_feature(aa32_mve, s)) {
358
dc->cp_regs = cpu->cp_regs;
88
+ return FPSysRegCheckFailed;
359
dc->features = env->features;
89
+ }
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
90
+ break;
361
index XXXXXXX..XXXXXXX 100644
91
default:
362
--- a/target/arm/meson.build
92
return FPSysRegCheckFailed;
363
+++ b/target/arm/meson.build
93
}
364
@@ -XXX,XX +XXX,XX @@
94
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
365
gen = [
95
tcg_temp_free_i32(sfpa);
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
96
break;
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
97
}
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
98
+ case ARM_VFP_VPR:
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
99
+ /* Behaves as NOP if not privileged */
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
100
+ if (IS_USER(s)) {
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
101
+ break;
102
+ }
103
+ tmp = loadfn(s, opaque);
104
+ store_cpu_field(tmp, v7m.vpr);
105
+ break;
106
+ case ARM_VFP_P0:
107
+ {
108
+ TCGv_i32 vpr;
109
+ tmp = loadfn(s, opaque);
110
+ vpr = load_cpu_field(v7m.vpr);
111
+ tcg_gen_deposit_i32(vpr, vpr, tmp,
112
+ R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
113
+ store_cpu_field(vpr, v7m.vpr);
114
+ tcg_temp_free_i32(tmp);
115
+ break;
116
+ }
117
default:
118
g_assert_not_reached();
119
}
120
@@ -XXX,XX +XXX,XX @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
121
tcg_temp_free_i32(fpscr);
122
break;
123
}
124
+ case ARM_VFP_VPR:
125
+ /* Behaves as NOP if not privileged */
126
+ if (IS_USER(s)) {
127
+ break;
128
+ }
129
+ tmp = load_cpu_field(v7m.vpr);
130
+ storefn(s, opaque, tmp);
131
+ break;
132
+ case ARM_VFP_P0:
133
+ tmp = load_cpu_field(v7m.vpr);
134
+ tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
135
+ storefn(s, opaque, tmp);
136
+ break;
137
default:
138
g_assert_not_reached();
139
}
140
--
372
--
141
2.20.1
373
2.25.1
142
143
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We will need more than a single field for hvf going forward. To keep
3
Mark ADR as a non-streaming instruction, which should trap
4
the global vcpu struct uncluttered, let's allocate a special hvf vcpu
4
if full a64 support is not enabled in streaming mode.
5
struct, similar to how hax does it.
6
5
7
Signed-off-by: Alexander Graf <agraf@csgraf.de>
6
Removing entries from sme-fa64.decode is an easy way to see
8
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
7
what remains to be done.
9
Tested-by: Roman Bolshakov <r.bolshakov@yadro.com>
8
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
12
Message-id: 20210519202253.76782-12-agraf@csgraf.de
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
13
---
16
include/hw/core/cpu.h | 3 +-
14
target/arm/translate.h | 7 +++++++
17
include/sysemu/hvf_int.h | 4 +
15
target/arm/sme-fa64.decode | 1 -
18
target/i386/hvf/vmx.h | 24 +++--
16
target/arm/translate-sve.c | 8 ++++----
19
accel/hvf/hvf-accel-ops.c | 8 +-
17
3 files changed, 11 insertions(+), 5 deletions(-)
20
target/i386/hvf/hvf.c | 104 +++++++++---------
21
target/i386/hvf/x86.c | 28 ++---
22
target/i386/hvf/x86_descr.c | 26 ++---
23
target/i386/hvf/x86_emu.c | 62 +++++------
24
target/i386/hvf/x86_mmu.c | 4 +-
25
target/i386/hvf/x86_task.c | 12 +--
26
target/i386/hvf/x86hvf.c | 210 ++++++++++++++++++------------------
27
11 files changed, 248 insertions(+), 237 deletions(-)
28
18
29
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
30
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
31
--- a/include/hw/core/cpu.h
21
--- a/target/arm/translate.h
32
+++ b/include/hw/core/cpu.h
22
+++ b/target/arm/translate.h
33
@@ -XXX,XX +XXX,XX @@ struct KVMState;
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
34
struct kvm_run;
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
35
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
36
struct hax_vcpu_state;
26
37
+struct hvf_vcpu_state;
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
38
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
39
#define TB_JMP_CACHE_BITS 12
29
+ { \
40
#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
30
+ s->is_nonstreaming = true; \
41
@@ -XXX,XX +XXX,XX @@ struct CPUState {
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
42
32
+ }
43
struct hax_vcpu_state *hax_vcpu;
33
+
44
34
#endif /* TARGET_ARM_TRANSLATE_H */
45
- int hvf_fd;
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
46
+ struct hvf_vcpu_state *hvf;
47
48
/* track IOMMUs whose translations we've cached in the TCG TLB */
49
GArray *iommu_notifiers;
50
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
51
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
52
--- a/include/sysemu/hvf_int.h
37
--- a/target/arm/sme-fa64.decode
53
+++ b/include/sysemu/hvf_int.h
38
+++ b/target/arm/sme-fa64.decode
54
@@ -XXX,XX +XXX,XX @@ struct HVFState {
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
55
};
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
56
extern HVFState *hvf_state;
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
57
42
58
+struct hvf_vcpu_state {
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
59
+ int fd;
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
60
+};
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
61
+
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
62
void assert_hvf_ok(hv_return_t ret);
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
63
int hvf_arch_init_vcpu(CPUState *cpu);
64
void hvf_arch_vcpu_destroy(CPUState *cpu);
65
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
66
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
67
--- a/target/i386/hvf/vmx.h
49
--- a/target/arm/translate-sve.c
68
+++ b/target/i386/hvf/vmx.h
50
+++ b/target/arm/translate-sve.c
69
@@ -XXX,XX +XXX,XX @@
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
70
#include "vmcs.h"
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
71
#include "cpu.h"
72
#include "x86.h"
73
+#include "sysemu/hvf.h"
74
+#include "sysemu/hvf_int.h"
75
76
#include "exec/address-spaces.h"
77
78
@@ -XXX,XX +XXX,XX @@ static inline void macvm_set_rip(CPUState *cpu, uint64_t rip)
79
uint64_t val;
80
81
/* BUG, should take considering overlap.. */
82
- wreg(cpu->hvf_fd, HV_X86_RIP, rip);
83
+ wreg(cpu->hvf->fd, HV_X86_RIP, rip);
84
env->eip = rip;
85
86
/* after moving forward in rip, we need to clean INTERRUPTABILITY */
87
- val = rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
88
+ val = rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
89
if (val & (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
90
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
91
env->hflags &= ~HF_INHIBIT_IRQ_MASK;
92
- wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY,
93
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY,
94
val & ~(VMCS_INTERRUPTIBILITY_STI_BLOCKING |
95
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING));
96
}
97
@@ -XXX,XX +XXX,XX @@ static inline void vmx_clear_nmi_blocking(CPUState *cpu)
98
CPUX86State *env = &x86_cpu->env;
99
100
env->hflags2 &= ~HF2_NMI_MASK;
101
- uint32_t gi = (uint32_t) rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
102
+ uint32_t gi = (uint32_t) rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
103
gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
104
- wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
105
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
106
}
53
}
107
54
108
static inline void vmx_set_nmi_blocking(CPUState *cpu)
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
109
@@ -XXX,XX +XXX,XX @@ static inline void vmx_set_nmi_blocking(CPUState *cpu)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
110
CPUX86State *env = &x86_cpu->env;
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
111
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
112
env->hflags2 |= HF2_NMI_MASK;
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
113
- uint32_t gi = (uint32_t)rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
114
+ uint32_t gi = (uint32_t)rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
115
gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
116
- wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
63
117
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
64
/*
118
}
65
*** SVE Integer Misc - Unpredicated Group
119
120
static inline void vmx_set_nmi_window_exiting(CPUState *cpu)
121
{
122
uint64_t val;
123
- val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
124
- wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val |
125
+ val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
126
+ wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val |
127
VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING);
128
129
}
130
@@ -XXX,XX +XXX,XX @@ static inline void vmx_clear_nmi_window_exiting(CPUState *cpu)
131
{
132
133
uint64_t val;
134
- val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
135
- wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val &
136
+ val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
137
+ wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val &
138
~VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING);
139
}
140
141
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
142
index XXXXXXX..XXXXXXX 100644
143
--- a/accel/hvf/hvf-accel-ops.c
144
+++ b/accel/hvf/hvf-accel-ops.c
145
@@ -XXX,XX +XXX,XX @@ type_init(hvf_type_init);
146
147
static void hvf_vcpu_destroy(CPUState *cpu)
148
{
149
- hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd);
150
+ hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd);
151
assert_hvf_ok(ret);
152
153
hvf_arch_vcpu_destroy(cpu);
154
+ g_free(cpu->hvf);
155
+ cpu->hvf = NULL;
156
}
157
158
static int hvf_init_vcpu(CPUState *cpu)
159
{
160
int r;
161
162
+ cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
163
+
164
/* init cpu signals */
165
sigset_t set;
166
struct sigaction sigact;
167
@@ -XXX,XX +XXX,XX @@ static int hvf_init_vcpu(CPUState *cpu)
168
pthread_sigmask(SIG_BLOCK, NULL, &set);
169
sigdelset(&set, SIG_IPI);
170
171
- r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
172
+ r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT);
173
cpu->vcpu_dirty = 1;
174
assert_hvf_ok(r);
175
176
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/target/i386/hvf/hvf.c
179
+++ b/target/i386/hvf/hvf.c
180
@@ -XXX,XX +XXX,XX @@ void vmx_update_tpr(CPUState *cpu)
181
int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
182
int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
183
184
- wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
185
+ wreg(cpu->hvf->fd, HV_X86_TPR, tpr);
186
if (irr == -1) {
187
- wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
188
+ wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
189
} else {
190
- wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
191
+ wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
192
irr >> 4);
193
}
194
}
195
@@ -XXX,XX +XXX,XX @@ void vmx_update_tpr(CPUState *cpu)
196
static void update_apic_tpr(CPUState *cpu)
197
{
198
X86CPU *x86_cpu = X86_CPU(cpu);
199
- int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
200
+ int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4;
201
cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
202
}
203
204
@@ -XXX,XX +XXX,XX @@ int hvf_arch_init_vcpu(CPUState *cpu)
205
}
206
207
/* set VMCS control fields */
208
- wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
209
+ wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
210
cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
211
VMCS_PIN_BASED_CTLS_EXTINT |
212
VMCS_PIN_BASED_CTLS_NMI |
213
VMCS_PIN_BASED_CTLS_VNMI));
214
- wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
215
+ wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
216
cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
217
VMCS_PRI_PROC_BASED_CTLS_HLT |
218
VMCS_PRI_PROC_BASED_CTLS_MWAIT |
219
VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
220
VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
221
VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
222
- wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
223
+ wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
224
cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
225
VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
226
227
- wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
228
+ wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
229
0));
230
- wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
231
+ wvmcs(cpu->hvf->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
232
233
- wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
234
+ wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
235
236
x86cpu = X86_CPU(cpu);
237
x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
238
239
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
240
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
241
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
242
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
243
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
244
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
245
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
246
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
247
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
248
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
249
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
250
- hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
251
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_STAR, 1);
252
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_LSTAR, 1);
253
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_CSTAR, 1);
254
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FMASK, 1);
255
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FSBASE, 1);
256
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_GSBASE, 1);
257
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_KERNELGSBASE, 1);
258
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_TSC_AUX, 1);
259
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_TSC, 1);
260
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_CS, 1);
261
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_EIP, 1);
262
+ hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_ESP, 1);
263
264
return 0;
265
}
266
@@ -XXX,XX +XXX,XX @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
267
}
268
if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
269
env->has_error_code = true;
270
- env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
271
+ env->error_code = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_ERROR);
272
}
273
}
274
- if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
275
+ if ((rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
276
VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
277
env->hflags2 |= HF2_NMI_MASK;
278
} else {
279
env->hflags2 &= ~HF2_NMI_MASK;
280
}
281
- if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
282
+ if (rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
283
(VMCS_INTERRUPTIBILITY_STI_BLOCKING |
284
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
285
env->hflags |= HF_INHIBIT_IRQ_MASK;
286
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
287
return EXCP_HLT;
288
}
289
290
- hv_return_t r = hv_vcpu_run(cpu->hvf_fd);
291
+ hv_return_t r = hv_vcpu_run(cpu->hvf->fd);
292
assert_hvf_ok(r);
293
294
/* handle VMEXIT */
295
- uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
296
- uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
297
- uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
298
+ uint64_t exit_reason = rvmcs(cpu->hvf->fd, VMCS_EXIT_REASON);
299
+ uint64_t exit_qual = rvmcs(cpu->hvf->fd, VMCS_EXIT_QUALIFICATION);
300
+ uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf->fd,
301
VMCS_EXIT_INSTRUCTION_LENGTH);
302
303
- uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
304
+ uint64_t idtvec_info = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
305
306
hvf_store_events(cpu, ins_len, idtvec_info);
307
- rip = rreg(cpu->hvf_fd, HV_X86_RIP);
308
- env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
309
+ rip = rreg(cpu->hvf->fd, HV_X86_RIP);
310
+ env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS);
311
312
qemu_mutex_lock_iothread();
313
314
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
315
case EXIT_REASON_EPT_FAULT:
316
{
317
hvf_slot *slot;
318
- uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
319
+ uint64_t gpa = rvmcs(cpu->hvf->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
320
321
if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
322
((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
323
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
324
store_regs(cpu);
325
break;
326
} else if (!string && !in) {
327
- RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
328
+ RAX(env) = rreg(cpu->hvf->fd, HV_X86_RAX);
329
hvf_handle_io(env, port, &RAX(env), 1, size, 1);
330
macvm_set_rip(cpu, rip + ins_len);
331
break;
332
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
333
break;
334
}
335
case EXIT_REASON_CPUID: {
336
- uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
337
- uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
338
- uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
339
- uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
340
+ uint32_t rax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
341
+ uint32_t rbx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RBX);
342
+ uint32_t rcx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
343
+ uint32_t rdx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
344
345
if (rax == 1) {
346
/* CPUID1.ecx.OSXSAVE needs to know CR4 */
347
- env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
348
+ env->cr[4] = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
349
}
350
hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
351
352
- wreg(cpu->hvf_fd, HV_X86_RAX, rax);
353
- wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
354
- wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
355
- wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
356
+ wreg(cpu->hvf->fd, HV_X86_RAX, rax);
357
+ wreg(cpu->hvf->fd, HV_X86_RBX, rbx);
358
+ wreg(cpu->hvf->fd, HV_X86_RCX, rcx);
359
+ wreg(cpu->hvf->fd, HV_X86_RDX, rdx);
360
361
macvm_set_rip(cpu, rip + ins_len);
362
break;
363
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
364
case EXIT_REASON_XSETBV: {
365
X86CPU *x86_cpu = X86_CPU(cpu);
366
CPUX86State *env = &x86_cpu->env;
367
- uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
368
- uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
369
- uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
370
+ uint32_t eax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
371
+ uint32_t ecx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
372
+ uint32_t edx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
373
374
if (ecx) {
375
macvm_set_rip(cpu, rip + ins_len);
376
break;
377
}
378
env->xcr0 = ((uint64_t)edx << 32) | eax;
379
- wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
380
+ wreg(cpu->hvf->fd, HV_X86_XCR0, env->xcr0 | 1);
381
macvm_set_rip(cpu, rip + ins_len);
382
break;
383
}
384
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
385
386
switch (cr) {
387
case 0x0: {
388
- macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
389
+ macvm_set_cr0(cpu->hvf->fd, RRX(env, reg));
390
break;
391
}
392
case 4: {
393
- macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
394
+ macvm_set_cr4(cpu->hvf->fd, RRX(env, reg));
395
break;
396
}
397
case 8: {
398
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
399
break;
400
}
401
case EXIT_REASON_TASK_SWITCH: {
402
- uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
403
+ uint64_t vinfo = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
404
x68_segment_selector sel = {.sel = exit_qual & 0xffff};
405
vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
406
vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
407
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
408
break;
409
}
410
case EXIT_REASON_RDPMC:
411
- wreg(cpu->hvf_fd, HV_X86_RAX, 0);
412
- wreg(cpu->hvf_fd, HV_X86_RDX, 0);
413
+ wreg(cpu->hvf->fd, HV_X86_RAX, 0);
414
+ wreg(cpu->hvf->fd, HV_X86_RDX, 0);
415
macvm_set_rip(cpu, rip + ins_len);
416
break;
417
case VMX_REASON_VMCALL:
418
diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c
419
index XXXXXXX..XXXXXXX 100644
420
--- a/target/i386/hvf/x86.c
421
+++ b/target/i386/hvf/x86.c
422
@@ -XXX,XX +XXX,XX @@ bool x86_read_segment_descriptor(struct CPUState *cpu,
423
}
424
425
if (GDT_SEL == sel.ti) {
426
- base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE);
427
- limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
428
+ base = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE);
429
+ limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
430
} else {
431
- base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE);
432
- limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT);
433
+ base = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE);
434
+ limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT);
435
}
436
437
if (sel.index * 8 >= limit) {
438
@@ -XXX,XX +XXX,XX @@ bool x86_write_segment_descriptor(struct CPUState *cpu,
439
uint32_t limit;
440
441
if (GDT_SEL == sel.ti) {
442
- base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE);
443
- limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
444
+ base = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE);
445
+ limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
446
} else {
447
- base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE);
448
- limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT);
449
+ base = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE);
450
+ limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT);
451
}
452
453
if (sel.index * 8 >= limit) {
454
@@ -XXX,XX +XXX,XX @@ bool x86_write_segment_descriptor(struct CPUState *cpu,
455
bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc,
456
int gate)
457
{
458
- target_ulong base = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE);
459
- uint32_t limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT);
460
+ target_ulong base = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_BASE);
461
+ uint32_t limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_LIMIT);
462
463
memset(idt_desc, 0, sizeof(*idt_desc));
464
if (gate * 8 >= limit) {
465
@@ -XXX,XX +XXX,XX @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc,
466
467
bool x86_is_protected(struct CPUState *cpu)
468
{
469
- uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
470
+ uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
471
return cr0 & CR0_PE;
472
}
473
474
@@ -XXX,XX +XXX,XX @@ bool x86_is_v8086(struct CPUState *cpu)
475
476
bool x86_is_long_mode(struct CPUState *cpu)
477
{
478
- return rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA;
479
+ return rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA;
480
}
481
482
bool x86_is_long64_mode(struct CPUState *cpu)
483
@@ -XXX,XX +XXX,XX @@ bool x86_is_long64_mode(struct CPUState *cpu)
484
485
bool x86_is_paging_mode(struct CPUState *cpu)
486
{
487
- uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
488
+ uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
489
return cr0 & CR0_PG;
490
}
491
492
bool x86_is_pae_enabled(struct CPUState *cpu)
493
{
494
- uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
495
+ uint64_t cr4 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
496
return cr4 & CR4_PAE;
497
}
498
499
diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c
500
index XXXXXXX..XXXXXXX 100644
501
--- a/target/i386/hvf/x86_descr.c
502
+++ b/target/i386/hvf/x86_descr.c
503
@@ -XXX,XX +XXX,XX @@ static const struct vmx_segment_field {
504
505
uint32_t vmx_read_segment_limit(CPUState *cpu, X86Seg seg)
506
{
507
- return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit);
508
+ return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit);
509
}
510
511
uint32_t vmx_read_segment_ar(CPUState *cpu, X86Seg seg)
512
{
513
- return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes);
514
+ return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes);
515
}
516
517
uint64_t vmx_read_segment_base(CPUState *cpu, X86Seg seg)
518
{
519
- return rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base);
520
+ return rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base);
521
}
522
523
x68_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg)
524
{
525
x68_segment_selector sel;
526
- sel.sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector);
527
+ sel.sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector);
528
return sel;
529
}
530
531
void vmx_write_segment_selector(struct CPUState *cpu, x68_segment_selector selector, X86Seg seg)
532
{
533
- wvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector, selector.sel);
534
+ wvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector, selector.sel);
535
}
536
537
void vmx_read_segment_descriptor(struct CPUState *cpu, struct vmx_segment *desc, X86Seg seg)
538
{
539
- desc->sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector);
540
- desc->base = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base);
541
- desc->limit = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit);
542
- desc->ar = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes);
543
+ desc->sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector);
544
+ desc->base = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base);
545
+ desc->limit = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit);
546
+ desc->ar = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes);
547
}
548
549
void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Seg seg)
550
{
551
const struct vmx_segment_field *sf = &vmx_segment_fields[seg];
552
553
- wvmcs(cpu->hvf_fd, sf->base, desc->base);
554
- wvmcs(cpu->hvf_fd, sf->limit, desc->limit);
555
- wvmcs(cpu->hvf_fd, sf->selector, desc->sel);
556
- wvmcs(cpu->hvf_fd, sf->ar_bytes, desc->ar);
557
+ wvmcs(cpu->hvf->fd, sf->base, desc->base);
558
+ wvmcs(cpu->hvf->fd, sf->limit, desc->limit);
559
+ wvmcs(cpu->hvf->fd, sf->selector, desc->sel);
560
+ wvmcs(cpu->hvf->fd, sf->ar_bytes, desc->ar);
561
}
562
563
void x86_segment_descriptor_to_vmx(struct CPUState *cpu, x68_segment_selector selector, struct x86_segment_descriptor *desc, struct vmx_segment *vmx_desc)
564
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
565
index XXXXXXX..XXXXXXX 100644
566
--- a/target/i386/hvf/x86_emu.c
567
+++ b/target/i386/hvf/x86_emu.c
568
@@ -XXX,XX +XXX,XX @@ void simulate_rdmsr(struct CPUState *cpu)
569
570
switch (msr) {
571
case MSR_IA32_TSC:
572
- val = rdtscp() + rvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET);
573
+ val = rdtscp() + rvmcs(cpu->hvf->fd, VMCS_TSC_OFFSET);
574
break;
575
case MSR_IA32_APICBASE:
576
val = cpu_get_apic_base(X86_CPU(cpu)->apic_state);
577
@@ -XXX,XX +XXX,XX @@ void simulate_rdmsr(struct CPUState *cpu)
578
val = x86_cpu->ucode_rev;
579
break;
580
case MSR_EFER:
581
- val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER);
582
+ val = rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER);
583
break;
584
case MSR_FSBASE:
585
- val = rvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE);
586
+ val = rvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE);
587
break;
588
case MSR_GSBASE:
589
- val = rvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE);
590
+ val = rvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE);
591
break;
592
case MSR_KERNELGSBASE:
593
- val = rvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE);
594
+ val = rvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE);
595
break;
596
case MSR_STAR:
597
abort();
598
@@ -XXX,XX +XXX,XX @@ void simulate_wrmsr(struct CPUState *cpu)
599
cpu_set_apic_base(X86_CPU(cpu)->apic_state, data);
600
break;
601
case MSR_FSBASE:
602
- wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, data);
603
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE, data);
604
break;
605
case MSR_GSBASE:
606
- wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, data);
607
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE, data);
608
break;
609
case MSR_KERNELGSBASE:
610
- wvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE, data);
611
+ wvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE, data);
612
break;
613
case MSR_STAR:
614
abort();
615
@@ -XXX,XX +XXX,XX @@ void simulate_wrmsr(struct CPUState *cpu)
616
break;
617
case MSR_EFER:
618
/*printf("new efer %llx\n", EFER(cpu));*/
619
- wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, data);
620
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER, data);
621
if (data & MSR_EFER_NXE) {
622
- hv_vcpu_invalidate_tlb(cpu->hvf_fd);
623
+ hv_vcpu_invalidate_tlb(cpu->hvf->fd);
624
}
625
break;
626
case MSR_MTRRphysBase(0):
627
@@ -XXX,XX +XXX,XX @@ void load_regs(struct CPUState *cpu)
628
CPUX86State *env = &x86_cpu->env;
629
630
int i = 0;
631
- RRX(env, R_EAX) = rreg(cpu->hvf_fd, HV_X86_RAX);
632
- RRX(env, R_EBX) = rreg(cpu->hvf_fd, HV_X86_RBX);
633
- RRX(env, R_ECX) = rreg(cpu->hvf_fd, HV_X86_RCX);
634
- RRX(env, R_EDX) = rreg(cpu->hvf_fd, HV_X86_RDX);
635
- RRX(env, R_ESI) = rreg(cpu->hvf_fd, HV_X86_RSI);
636
- RRX(env, R_EDI) = rreg(cpu->hvf_fd, HV_X86_RDI);
637
- RRX(env, R_ESP) = rreg(cpu->hvf_fd, HV_X86_RSP);
638
- RRX(env, R_EBP) = rreg(cpu->hvf_fd, HV_X86_RBP);
639
+ RRX(env, R_EAX) = rreg(cpu->hvf->fd, HV_X86_RAX);
640
+ RRX(env, R_EBX) = rreg(cpu->hvf->fd, HV_X86_RBX);
641
+ RRX(env, R_ECX) = rreg(cpu->hvf->fd, HV_X86_RCX);
642
+ RRX(env, R_EDX) = rreg(cpu->hvf->fd, HV_X86_RDX);
643
+ RRX(env, R_ESI) = rreg(cpu->hvf->fd, HV_X86_RSI);
644
+ RRX(env, R_EDI) = rreg(cpu->hvf->fd, HV_X86_RDI);
645
+ RRX(env, R_ESP) = rreg(cpu->hvf->fd, HV_X86_RSP);
646
+ RRX(env, R_EBP) = rreg(cpu->hvf->fd, HV_X86_RBP);
647
for (i = 8; i < 16; i++) {
648
- RRX(env, i) = rreg(cpu->hvf_fd, HV_X86_RAX + i);
649
+ RRX(env, i) = rreg(cpu->hvf->fd, HV_X86_RAX + i);
650
}
651
652
- env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
653
+ env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS);
654
rflags_to_lflags(env);
655
- env->eip = rreg(cpu->hvf_fd, HV_X86_RIP);
656
+ env->eip = rreg(cpu->hvf->fd, HV_X86_RIP);
657
}
658
659
void store_regs(struct CPUState *cpu)
660
@@ -XXX,XX +XXX,XX @@ void store_regs(struct CPUState *cpu)
661
CPUX86State *env = &x86_cpu->env;
662
663
int i = 0;
664
- wreg(cpu->hvf_fd, HV_X86_RAX, RAX(env));
665
- wreg(cpu->hvf_fd, HV_X86_RBX, RBX(env));
666
- wreg(cpu->hvf_fd, HV_X86_RCX, RCX(env));
667
- wreg(cpu->hvf_fd, HV_X86_RDX, RDX(env));
668
- wreg(cpu->hvf_fd, HV_X86_RSI, RSI(env));
669
- wreg(cpu->hvf_fd, HV_X86_RDI, RDI(env));
670
- wreg(cpu->hvf_fd, HV_X86_RBP, RBP(env));
671
- wreg(cpu->hvf_fd, HV_X86_RSP, RSP(env));
672
+ wreg(cpu->hvf->fd, HV_X86_RAX, RAX(env));
673
+ wreg(cpu->hvf->fd, HV_X86_RBX, RBX(env));
674
+ wreg(cpu->hvf->fd, HV_X86_RCX, RCX(env));
675
+ wreg(cpu->hvf->fd, HV_X86_RDX, RDX(env));
676
+ wreg(cpu->hvf->fd, HV_X86_RSI, RSI(env));
677
+ wreg(cpu->hvf->fd, HV_X86_RDI, RDI(env));
678
+ wreg(cpu->hvf->fd, HV_X86_RBP, RBP(env));
679
+ wreg(cpu->hvf->fd, HV_X86_RSP, RSP(env));
680
for (i = 8; i < 16; i++) {
681
- wreg(cpu->hvf_fd, HV_X86_RAX + i, RRX(env, i));
682
+ wreg(cpu->hvf->fd, HV_X86_RAX + i, RRX(env, i));
683
}
684
685
lflags_to_rflags(env);
686
- wreg(cpu->hvf_fd, HV_X86_RFLAGS, env->eflags);
687
+ wreg(cpu->hvf->fd, HV_X86_RFLAGS, env->eflags);
688
macvm_set_rip(cpu, env->eip);
689
}
690
691
diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
692
index XXXXXXX..XXXXXXX 100644
693
--- a/target/i386/hvf/x86_mmu.c
694
+++ b/target/i386/hvf/x86_mmu.c
695
@@ -XXX,XX +XXX,XX @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
696
pt->err_code |= MMU_PAGE_PT;
697
}
698
699
- uint32_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
700
+ uint32_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
701
/* check protection */
702
if (cr0 & CR0_WP) {
703
if (pt->write_access && !pte_write_access(pte)) {
704
@@ -XXX,XX +XXX,XX @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
705
{
706
int top_level, level;
707
bool is_large = false;
708
- target_ulong cr3 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR3);
709
+ target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3);
710
uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK;
711
712
memset(pt, 0, sizeof(*pt));
713
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
714
index XXXXXXX..XXXXXXX 100644
715
--- a/target/i386/hvf/x86_task.c
716
+++ b/target/i386/hvf/x86_task.c
717
@@ -XXX,XX +XXX,XX @@ static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss)
718
X86CPU *x86_cpu = X86_CPU(cpu);
719
CPUX86State *env = &x86_cpu->env;
720
721
- wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, tss->cr3);
722
+ wvmcs(cpu->hvf->fd, VMCS_GUEST_CR3, tss->cr3);
723
724
env->eip = tss->eip;
725
env->eflags = tss->eflags | 2;
726
@@ -XXX,XX +XXX,XX @@ static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segme
727
728
void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type)
729
{
730
- uint64_t rip = rreg(cpu->hvf_fd, HV_X86_RIP);
731
+ uint64_t rip = rreg(cpu->hvf->fd, HV_X86_RIP);
732
if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION &&
733
gate_type != VMCS_INTR_T_HWINTR &&
734
gate_type != VMCS_INTR_T_NMI)) {
735
- int ins_len = rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
736
+ int ins_len = rvmcs(cpu->hvf->fd, VMCS_EXIT_INSTRUCTION_LENGTH);
737
macvm_set_rip(cpu, rip + ins_len);
738
return;
739
}
740
@@ -XXX,XX +XXX,XX @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea
741
//ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc);
742
VM_PANIC("task_switch_16");
743
744
- macvm_set_cr0(cpu->hvf_fd, rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0) | CR0_TS);
745
+ macvm_set_cr0(cpu->hvf->fd, rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0) | CR0_TS);
746
x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg);
747
vmx_write_segment_descriptor(cpu, &vmx_seg, R_TR);
748
749
store_regs(cpu);
750
751
- hv_vcpu_invalidate_tlb(cpu->hvf_fd);
752
- hv_vcpu_flush(cpu->hvf_fd);
753
+ hv_vcpu_invalidate_tlb(cpu->hvf->fd);
754
+ hv_vcpu_flush(cpu->hvf->fd);
755
}
756
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
757
index XXXXXXX..XXXXXXX 100644
758
--- a/target/i386/hvf/x86hvf.c
759
+++ b/target/i386/hvf/x86hvf.c
760
@@ -XXX,XX +XXX,XX @@ void hvf_put_xsave(CPUState *cpu_state)
761
762
x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave);
763
764
- if (hv_vcpu_write_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) {
765
+ if (hv_vcpu_write_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {
766
abort();
767
}
768
}
769
@@ -XXX,XX +XXX,XX @@ void hvf_put_segments(CPUState *cpu_state)
770
CPUX86State *env = &X86_CPU(cpu_state)->env;
771
struct vmx_segment seg;
772
773
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit);
774
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE, env->idt.base);
775
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit);
776
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE, env->idt.base);
777
778
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit);
779
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE, env->gdt.base);
780
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit);
781
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base);
782
783
- /* wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR2, env->cr[2]); */
784
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3, env->cr[3]);
785
+ /* wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR2, env->cr[2]); */
786
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3, env->cr[3]);
787
vmx_update_tpr(cpu_state);
788
- wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER, env->efer);
789
+ wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER, env->efer);
790
791
- macvm_set_cr4(cpu_state->hvf_fd, env->cr[4]);
792
- macvm_set_cr0(cpu_state->hvf_fd, env->cr[0]);
793
+ macvm_set_cr4(cpu_state->hvf->fd, env->cr[4]);
794
+ macvm_set_cr0(cpu_state->hvf->fd, env->cr[0]);
795
796
hvf_set_segment(cpu_state, &seg, &env->segs[R_CS], false);
797
vmx_write_segment_descriptor(cpu_state, &seg, R_CS);
798
@@ -XXX,XX +XXX,XX @@ void hvf_put_segments(CPUState *cpu_state)
799
hvf_set_segment(cpu_state, &seg, &env->ldt, false);
800
vmx_write_segment_descriptor(cpu_state, &seg, R_LDTR);
801
802
- hv_vcpu_flush(cpu_state->hvf_fd);
803
+ hv_vcpu_flush(cpu_state->hvf->fd);
804
}
805
806
void hvf_put_msrs(CPUState *cpu_state)
807
{
808
CPUX86State *env = &X86_CPU(cpu_state)->env;
809
810
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS,
811
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS,
812
env->sysenter_cs);
813
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP,
814
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP,
815
env->sysenter_esp);
816
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP,
817
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP,
818
env->sysenter_eip);
819
820
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_STAR, env->star);
821
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_STAR, env->star);
822
823
#ifdef TARGET_X86_64
824
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_CSTAR, env->cstar);
825
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, env->kernelgsbase);
826
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FMASK, env->fmask);
827
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_LSTAR, env->lstar);
828
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_CSTAR, env->cstar);
829
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, env->kernelgsbase);
830
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FMASK, env->fmask);
831
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_LSTAR, env->lstar);
832
#endif
833
834
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base);
835
- hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base);
836
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_GSBASE, env->segs[R_GS].base);
837
+ hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FSBASE, env->segs[R_FS].base);
838
}
839
840
841
@@ -XXX,XX +XXX,XX @@ void hvf_get_xsave(CPUState *cpu_state)
842
843
xsave = X86_CPU(cpu_state)->env.xsave_buf;
844
845
- if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) {
846
+ if (hv_vcpu_read_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {
847
abort();
848
}
849
850
@@ -XXX,XX +XXX,XX @@ void hvf_get_segments(CPUState *cpu_state)
851
vmx_read_segment_descriptor(cpu_state, &seg, R_LDTR);
852
hvf_get_segment(&env->ldt, &seg);
853
854
- env->idt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT);
855
- env->idt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE);
856
- env->gdt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
857
- env->gdt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE);
858
+ env->idt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT);
859
+ env->idt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE);
860
+ env->gdt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
861
+ env->gdt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE);
862
863
- env->cr[0] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR0);
864
+ env->cr[0] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR0);
865
env->cr[2] = 0;
866
- env->cr[3] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3);
867
- env->cr[4] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR4);
868
+ env->cr[3] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3);
869
+ env->cr[4] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR4);
870
871
- env->efer = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER);
872
+ env->efer = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER);
873
}
874
875
void hvf_get_msrs(CPUState *cpu_state)
876
@@ -XXX,XX +XXX,XX @@ void hvf_get_msrs(CPUState *cpu_state)
877
CPUX86State *env = &X86_CPU(cpu_state)->env;
878
uint64_t tmp;
879
880
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, &tmp);
881
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS, &tmp);
882
env->sysenter_cs = tmp;
883
884
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, &tmp);
885
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP, &tmp);
886
env->sysenter_esp = tmp;
887
888
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, &tmp);
889
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP, &tmp);
890
env->sysenter_eip = tmp;
891
892
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_STAR, &env->star);
893
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_STAR, &env->star);
894
895
#ifdef TARGET_X86_64
896
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_CSTAR, &env->cstar);
897
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, &env->kernelgsbase);
898
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_FMASK, &env->fmask);
899
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_LSTAR, &env->lstar);
900
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_CSTAR, &env->cstar);
901
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, &env->kernelgsbase);
902
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_FMASK, &env->fmask);
903
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_LSTAR, &env->lstar);
904
#endif
905
906
- hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_APICBASE, &tmp);
907
+ hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_APICBASE, &tmp);
908
909
- env->tsc = rdtscp() + rvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET);
910
+ env->tsc = rdtscp() + rvmcs(cpu_state->hvf->fd, VMCS_TSC_OFFSET);
911
}
912
913
int hvf_put_registers(CPUState *cpu_state)
914
@@ -XXX,XX +XXX,XX @@ int hvf_put_registers(CPUState *cpu_state)
915
X86CPU *x86cpu = X86_CPU(cpu_state);
916
CPUX86State *env = &x86cpu->env;
917
918
- wreg(cpu_state->hvf_fd, HV_X86_RAX, env->regs[R_EAX]);
919
- wreg(cpu_state->hvf_fd, HV_X86_RBX, env->regs[R_EBX]);
920
- wreg(cpu_state->hvf_fd, HV_X86_RCX, env->regs[R_ECX]);
921
- wreg(cpu_state->hvf_fd, HV_X86_RDX, env->regs[R_EDX]);
922
- wreg(cpu_state->hvf_fd, HV_X86_RBP, env->regs[R_EBP]);
923
- wreg(cpu_state->hvf_fd, HV_X86_RSP, env->regs[R_ESP]);
924
- wreg(cpu_state->hvf_fd, HV_X86_RSI, env->regs[R_ESI]);
925
- wreg(cpu_state->hvf_fd, HV_X86_RDI, env->regs[R_EDI]);
926
- wreg(cpu_state->hvf_fd, HV_X86_R8, env->regs[8]);
927
- wreg(cpu_state->hvf_fd, HV_X86_R9, env->regs[9]);
928
- wreg(cpu_state->hvf_fd, HV_X86_R10, env->regs[10]);
929
- wreg(cpu_state->hvf_fd, HV_X86_R11, env->regs[11]);
930
- wreg(cpu_state->hvf_fd, HV_X86_R12, env->regs[12]);
931
- wreg(cpu_state->hvf_fd, HV_X86_R13, env->regs[13]);
932
- wreg(cpu_state->hvf_fd, HV_X86_R14, env->regs[14]);
933
- wreg(cpu_state->hvf_fd, HV_X86_R15, env->regs[15]);
934
- wreg(cpu_state->hvf_fd, HV_X86_RFLAGS, env->eflags);
935
- wreg(cpu_state->hvf_fd, HV_X86_RIP, env->eip);
936
+ wreg(cpu_state->hvf->fd, HV_X86_RAX, env->regs[R_EAX]);
937
+ wreg(cpu_state->hvf->fd, HV_X86_RBX, env->regs[R_EBX]);
938
+ wreg(cpu_state->hvf->fd, HV_X86_RCX, env->regs[R_ECX]);
939
+ wreg(cpu_state->hvf->fd, HV_X86_RDX, env->regs[R_EDX]);
940
+ wreg(cpu_state->hvf->fd, HV_X86_RBP, env->regs[R_EBP]);
941
+ wreg(cpu_state->hvf->fd, HV_X86_RSP, env->regs[R_ESP]);
942
+ wreg(cpu_state->hvf->fd, HV_X86_RSI, env->regs[R_ESI]);
943
+ wreg(cpu_state->hvf->fd, HV_X86_RDI, env->regs[R_EDI]);
944
+ wreg(cpu_state->hvf->fd, HV_X86_R8, env->regs[8]);
945
+ wreg(cpu_state->hvf->fd, HV_X86_R9, env->regs[9]);
946
+ wreg(cpu_state->hvf->fd, HV_X86_R10, env->regs[10]);
947
+ wreg(cpu_state->hvf->fd, HV_X86_R11, env->regs[11]);
948
+ wreg(cpu_state->hvf->fd, HV_X86_R12, env->regs[12]);
949
+ wreg(cpu_state->hvf->fd, HV_X86_R13, env->regs[13]);
950
+ wreg(cpu_state->hvf->fd, HV_X86_R14, env->regs[14]);
951
+ wreg(cpu_state->hvf->fd, HV_X86_R15, env->regs[15]);
952
+ wreg(cpu_state->hvf->fd, HV_X86_RFLAGS, env->eflags);
953
+ wreg(cpu_state->hvf->fd, HV_X86_RIP, env->eip);
954
955
- wreg(cpu_state->hvf_fd, HV_X86_XCR0, env->xcr0);
956
+ wreg(cpu_state->hvf->fd, HV_X86_XCR0, env->xcr0);
957
958
hvf_put_xsave(cpu_state);
959
960
@@ -XXX,XX +XXX,XX @@ int hvf_put_registers(CPUState *cpu_state)
961
962
hvf_put_msrs(cpu_state);
963
964
- wreg(cpu_state->hvf_fd, HV_X86_DR0, env->dr[0]);
965
- wreg(cpu_state->hvf_fd, HV_X86_DR1, env->dr[1]);
966
- wreg(cpu_state->hvf_fd, HV_X86_DR2, env->dr[2]);
967
- wreg(cpu_state->hvf_fd, HV_X86_DR3, env->dr[3]);
968
- wreg(cpu_state->hvf_fd, HV_X86_DR4, env->dr[4]);
969
- wreg(cpu_state->hvf_fd, HV_X86_DR5, env->dr[5]);
970
- wreg(cpu_state->hvf_fd, HV_X86_DR6, env->dr[6]);
971
- wreg(cpu_state->hvf_fd, HV_X86_DR7, env->dr[7]);
972
+ wreg(cpu_state->hvf->fd, HV_X86_DR0, env->dr[0]);
973
+ wreg(cpu_state->hvf->fd, HV_X86_DR1, env->dr[1]);
974
+ wreg(cpu_state->hvf->fd, HV_X86_DR2, env->dr[2]);
975
+ wreg(cpu_state->hvf->fd, HV_X86_DR3, env->dr[3]);
976
+ wreg(cpu_state->hvf->fd, HV_X86_DR4, env->dr[4]);
977
+ wreg(cpu_state->hvf->fd, HV_X86_DR5, env->dr[5]);
978
+ wreg(cpu_state->hvf->fd, HV_X86_DR6, env->dr[6]);
979
+ wreg(cpu_state->hvf->fd, HV_X86_DR7, env->dr[7]);
980
981
return 0;
982
}
983
@@ -XXX,XX +XXX,XX @@ int hvf_get_registers(CPUState *cpu_state)
984
X86CPU *x86cpu = X86_CPU(cpu_state);
985
CPUX86State *env = &x86cpu->env;
986
987
- env->regs[R_EAX] = rreg(cpu_state->hvf_fd, HV_X86_RAX);
988
- env->regs[R_EBX] = rreg(cpu_state->hvf_fd, HV_X86_RBX);
989
- env->regs[R_ECX] = rreg(cpu_state->hvf_fd, HV_X86_RCX);
990
- env->regs[R_EDX] = rreg(cpu_state->hvf_fd, HV_X86_RDX);
991
- env->regs[R_EBP] = rreg(cpu_state->hvf_fd, HV_X86_RBP);
992
- env->regs[R_ESP] = rreg(cpu_state->hvf_fd, HV_X86_RSP);
993
- env->regs[R_ESI] = rreg(cpu_state->hvf_fd, HV_X86_RSI);
994
- env->regs[R_EDI] = rreg(cpu_state->hvf_fd, HV_X86_RDI);
995
- env->regs[8] = rreg(cpu_state->hvf_fd, HV_X86_R8);
996
- env->regs[9] = rreg(cpu_state->hvf_fd, HV_X86_R9);
997
- env->regs[10] = rreg(cpu_state->hvf_fd, HV_X86_R10);
998
- env->regs[11] = rreg(cpu_state->hvf_fd, HV_X86_R11);
999
- env->regs[12] = rreg(cpu_state->hvf_fd, HV_X86_R12);
1000
- env->regs[13] = rreg(cpu_state->hvf_fd, HV_X86_R13);
1001
- env->regs[14] = rreg(cpu_state->hvf_fd, HV_X86_R14);
1002
- env->regs[15] = rreg(cpu_state->hvf_fd, HV_X86_R15);
1003
+ env->regs[R_EAX] = rreg(cpu_state->hvf->fd, HV_X86_RAX);
1004
+ env->regs[R_EBX] = rreg(cpu_state->hvf->fd, HV_X86_RBX);
1005
+ env->regs[R_ECX] = rreg(cpu_state->hvf->fd, HV_X86_RCX);
1006
+ env->regs[R_EDX] = rreg(cpu_state->hvf->fd, HV_X86_RDX);
1007
+ env->regs[R_EBP] = rreg(cpu_state->hvf->fd, HV_X86_RBP);
1008
+ env->regs[R_ESP] = rreg(cpu_state->hvf->fd, HV_X86_RSP);
1009
+ env->regs[R_ESI] = rreg(cpu_state->hvf->fd, HV_X86_RSI);
1010
+ env->regs[R_EDI] = rreg(cpu_state->hvf->fd, HV_X86_RDI);
1011
+ env->regs[8] = rreg(cpu_state->hvf->fd, HV_X86_R8);
1012
+ env->regs[9] = rreg(cpu_state->hvf->fd, HV_X86_R9);
1013
+ env->regs[10] = rreg(cpu_state->hvf->fd, HV_X86_R10);
1014
+ env->regs[11] = rreg(cpu_state->hvf->fd, HV_X86_R11);
1015
+ env->regs[12] = rreg(cpu_state->hvf->fd, HV_X86_R12);
1016
+ env->regs[13] = rreg(cpu_state->hvf->fd, HV_X86_R13);
1017
+ env->regs[14] = rreg(cpu_state->hvf->fd, HV_X86_R14);
1018
+ env->regs[15] = rreg(cpu_state->hvf->fd, HV_X86_R15);
1019
1020
- env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS);
1021
- env->eip = rreg(cpu_state->hvf_fd, HV_X86_RIP);
1022
+ env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
1023
+ env->eip = rreg(cpu_state->hvf->fd, HV_X86_RIP);
1024
1025
hvf_get_xsave(cpu_state);
1026
- env->xcr0 = rreg(cpu_state->hvf_fd, HV_X86_XCR0);
1027
+ env->xcr0 = rreg(cpu_state->hvf->fd, HV_X86_XCR0);
1028
1029
hvf_get_segments(cpu_state);
1030
hvf_get_msrs(cpu_state);
1031
1032
- env->dr[0] = rreg(cpu_state->hvf_fd, HV_X86_DR0);
1033
- env->dr[1] = rreg(cpu_state->hvf_fd, HV_X86_DR1);
1034
- env->dr[2] = rreg(cpu_state->hvf_fd, HV_X86_DR2);
1035
- env->dr[3] = rreg(cpu_state->hvf_fd, HV_X86_DR3);
1036
- env->dr[4] = rreg(cpu_state->hvf_fd, HV_X86_DR4);
1037
- env->dr[5] = rreg(cpu_state->hvf_fd, HV_X86_DR5);
1038
- env->dr[6] = rreg(cpu_state->hvf_fd, HV_X86_DR6);
1039
- env->dr[7] = rreg(cpu_state->hvf_fd, HV_X86_DR7);
1040
+ env->dr[0] = rreg(cpu_state->hvf->fd, HV_X86_DR0);
1041
+ env->dr[1] = rreg(cpu_state->hvf->fd, HV_X86_DR1);
1042
+ env->dr[2] = rreg(cpu_state->hvf->fd, HV_X86_DR2);
1043
+ env->dr[3] = rreg(cpu_state->hvf->fd, HV_X86_DR3);
1044
+ env->dr[4] = rreg(cpu_state->hvf->fd, HV_X86_DR4);
1045
+ env->dr[5] = rreg(cpu_state->hvf->fd, HV_X86_DR5);
1046
+ env->dr[6] = rreg(cpu_state->hvf->fd, HV_X86_DR6);
1047
+ env->dr[7] = rreg(cpu_state->hvf->fd, HV_X86_DR7);
1048
1049
x86_update_hflags(env);
1050
return 0;
1051
@@ -XXX,XX +XXX,XX @@ int hvf_get_registers(CPUState *cpu_state)
1052
static void vmx_set_int_window_exiting(CPUState *cpu)
1053
{
1054
uint64_t val;
1055
- val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
1056
- wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val |
1057
+ val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
1058
+ wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val |
1059
VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
1060
}
1061
1062
void vmx_clear_int_window_exiting(CPUState *cpu)
1063
{
1064
uint64_t val;
1065
- val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
1066
- wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val &
1067
+ val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
1068
+ wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val &
1069
~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
1070
}
1071
1072
@@ -XXX,XX +XXX,XX @@ bool hvf_inject_interrupts(CPUState *cpu_state)
1073
uint64_t info = 0;
1074
if (have_event) {
1075
info = vector | intr_type | VMCS_INTR_VALID;
1076
- uint64_t reason = rvmcs(cpu_state->hvf_fd, VMCS_EXIT_REASON);
1077
+ uint64_t reason = rvmcs(cpu_state->hvf->fd, VMCS_EXIT_REASON);
1078
if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) {
1079
vmx_clear_nmi_blocking(cpu_state);
1080
}
1081
@@ -XXX,XX +XXX,XX @@ bool hvf_inject_interrupts(CPUState *cpu_state)
1082
info &= ~(1 << 12); /* clear undefined bit */
1083
if (intr_type == VMCS_INTR_T_SWINTR ||
1084
intr_type == VMCS_INTR_T_SWEXCEPTION) {
1085
- wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INST_LENGTH, env->ins_len);
1086
+ wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len);
1087
}
1088
1089
if (env->has_error_code) {
1090
- wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR,
1091
+ wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_EXCEPTION_ERROR,
1092
env->error_code);
1093
/* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */
1094
info |= VMCS_INTR_DEL_ERRCODE;
1095
}
1096
/*printf("reinject %lx err %d\n", info, err);*/
1097
- wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
1098
+ wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info);
1099
};
1100
}
1101
1102
@@ -XXX,XX +XXX,XX @@ bool hvf_inject_interrupts(CPUState *cpu_state)
1103
if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) {
1104
cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI;
1105
info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI;
1106
- wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
1107
+ wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info);
1108
} else {
1109
vmx_set_nmi_window_exiting(cpu_state);
1110
}
1111
@@ -XXX,XX +XXX,XX @@ bool hvf_inject_interrupts(CPUState *cpu_state)
1112
int line = cpu_get_pic_interrupt(&x86cpu->env);
1113
cpu_state->interrupt_request &= ~CPU_INTERRUPT_HARD;
1114
if (line >= 0) {
1115
- wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, line |
1116
+ wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, line |
1117
VMCS_INTR_VALID | VMCS_INTR_T_HWINTR);
1118
}
1119
}
1120
@@ -XXX,XX +XXX,XX @@ int hvf_process_events(CPUState *cpu_state)
1121
X86CPU *cpu = X86_CPU(cpu_state);
1122
CPUX86State *env = &cpu->env;
1123
1124
- env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS);
1125
+ env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
1126
1127
if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) {
1128
cpu_synchronize_state(cpu_state);
1129
--
66
--
1130
2.20.1
67
2.25.1
1131
1132
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The hvf accel synchronize functions are only used as input for local
3
Mark these as a non-streaming instructions, which should trap
4
callback functions, so we can make them static.
4
if full a64 support is not enabled in streaming mode.
5
5
6
Signed-off-by: Alexander Graf <agraf@csgraf.de>
7
Reviewed-by: Sergio Lopez <slp@redhat.com>
8
Message-id: 20210519202253.76782-10-agraf@csgraf.de
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
accel/hvf/hvf-accel-ops.h | 3 ---
11
target/arm/sme-fa64.decode | 2 --
13
accel/hvf/hvf-accel-ops.c | 6 +++---
12
target/arm/translate-sve.c | 9 ++++++---
14
2 files changed, 3 insertions(+), 6 deletions(-)
13
2 files changed, 6 insertions(+), 5 deletions(-)
15
14
16
diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/hvf/hvf-accel-ops.h
17
--- a/target/arm/sme-fa64.decode
19
+++ b/accel/hvf/hvf-accel-ops.h
18
+++ b/target/arm/sme-fa64.decode
20
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
#include "sysemu/cpus.h"
20
22
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
23
int hvf_vcpu_exec(CPUState *);
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
24
-void hvf_cpu_synchronize_post_reset(CPUState *);
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
25
-void hvf_cpu_synchronize_post_init(CPUState *);
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
26
-void hvf_cpu_synchronize_pre_loadvm(CPUState *);
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
27
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
28
#endif /* HVF_CPUS_H */
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
29
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
31
--- a/accel/hvf/hvf-accel-ops.c
30
--- a/target/arm/translate-sve.c
32
+++ b/accel/hvf/hvf-accel-ops.c
31
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
34
cpu->vcpu_dirty = false;
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
35
}
49
}
36
50
37
-void hvf_cpu_synchronize_post_reset(CPUState *cpu)
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
38
+static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
39
{
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
40
run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
41
}
55
42
@@ -XXX,XX +XXX,XX @@ static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
43
cpu->vcpu_dirty = false;
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
44
}
45
46
-void hvf_cpu_synchronize_post_init(CPUState *cpu)
47
+static void hvf_cpu_synchronize_post_init(CPUState *cpu)
48
{
49
run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
50
}
51
@@ -XXX,XX +XXX,XX @@ static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
52
cpu->vcpu_dirty = true;
53
}
54
55
-void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
56
+static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
57
{
58
run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
59
}
60
--
58
--
61
2.20.1
59
2.25.1
62
63
diff view generated by jsdifflib
1
Coverity points out that in tpm_test_swtpm_migration_test() we
1
From: Richard Henderson <richard.henderson@linaro.org>
2
assume that src_tpm_addr and dst_tpm_addr are non-NULL (we
3
pass them to tpm_util_migration_start_qemu() which will
4
unconditionally dereference them) but then later explicitly
5
check them for NULL. Remove the pointless checks.
6
2
7
Fixes: Coverity CID 1432367, 1432359
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
8
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
12
Message-id: 20210525134458.6675-6-peter.maydell@linaro.org
13
---
10
---
14
tests/qtest/tpm-tests.c | 12 ++++--------
11
target/arm/sme-fa64.decode | 3 ---
15
1 file changed, 4 insertions(+), 8 deletions(-)
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
16
14
17
diff --git a/tests/qtest/tpm-tests.c b/tests/qtest/tpm-tests.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qtest/tpm-tests.c
17
--- a/target/arm/sme-fa64.decode
20
+++ b/tests/qtest/tpm-tests.c
18
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path,
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
qtest_quit(src_qemu);
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
tpm_util_swtpm_kill(dst_tpm_pid);
22
25
- if (dst_tpm_addr) {
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
26
- g_unlink(dst_tpm_addr->u.q_unix.path);
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
27
- qapi_free_SocketAddress(dst_tpm_addr);
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
28
- }
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
29
+ g_unlink(dst_tpm_addr->u.q_unix.path);
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
30
+ qapi_free_SocketAddress(dst_tpm_addr);
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
31
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
32
tpm_util_swtpm_kill(src_tpm_pid);
30
index XXXXXXX..XXXXXXX 100644
33
- if (src_tpm_addr) {
31
--- a/target/arm/translate-sve.c
34
- g_unlink(src_tpm_addr->u.q_unix.path);
32
+++ b/target/arm/translate-sve.c
35
- qapi_free_SocketAddress(src_tpm_addr);
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
36
- }
34
NULL, gen_helper_sve_fexpa_h,
37
+ g_unlink(src_tpm_addr->u.q_unix.path);
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
38
+ qapi_free_SocketAddress(src_tpm_addr);
36
};
39
}
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
40
--
91
--
41
2.20.1
92
2.25.1
42
43
diff view generated by jsdifflib
1
The do_vfp_2op_sp() and do_vfp_2op_dp() functions currently check
1
From: Richard Henderson <richard.henderson@linaro.org>
2
whether floating point is supported via the aa32_fpdp_v2 and
3
aa32_fpsp_v2 isar checks. For v8.1M MVE support, the VMOV_reg trans
4
functions (but not any of the others) need to update this to also
5
allow the insn if MVE is implemented. Move the check out of the do_
6
function and into its callsites (which are all implemented via the
7
DO_VFP_2OP macro), so we have a place to change the check for the
8
VMOV insns.
9
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20210520152840.24453-4-peter.maydell@linaro.org
13
---
10
---
14
target/arm/translate-vfp.c | 37 +++++++++++++++++++------------------
11
target/arm/sme-fa64.decode | 2 --
15
1 file changed, 19 insertions(+), 18 deletions(-)
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
13
2 files changed, 15 insertions(+), 11 deletions(-)
16
14
17
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/translate-vfp.c
17
--- a/target/arm/sme-fa64.decode
20
+++ b/target/arm/translate-vfp.c
18
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
int veclen = s->vec_len;
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
TCGv_i32 f0, fd;
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
22
25
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
26
- return false;
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
27
- }
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
28
+ /* Note that the caller must check the aa32_fpsp_v2 feature. */
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
29
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
30
if (!dc_isar_feature(aa32_fpshvec, s) &&
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
(veclen != 0 || s->vec_stride != 0)) {
29
index XXXXXXX..XXXXXXX 100644
32
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
30
--- a/target/arm/translate-sve.c
33
*/
31
+++ b/target/arm/translate-sve.c
34
TCGv_i32 f0;
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
35
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
36
+ /* Note that the caller must check the aa32_fp16_arith feature */
34
NULL, gen_helper_sve2_pmull_d,
35
};
36
- if (a->esz == 0
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
38
- : !dc_isar_feature(aa64_sve, s)) {
37
+
39
+
38
if (!dc_isar_feature(aa32_fp16_arith, s)) {
40
+ if (a->esz == 0) {
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
42
+ return false;
43
+ }
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
39
return false;
46
return false;
40
}
47
}
41
@@ -XXX,XX +XXX,XX @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
42
int veclen = s->vec_len;
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
43
TCGv_i64 f0, fd;
50
* SVE Integer Multiply-Add (unpredicated)
44
51
*/
45
- if (!dc_isar_feature(aa32_fpdp_v2, s)) {
52
46
- return false;
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
47
- }
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
48
+ /* Note that the caller must check the aa32_fpdp_v2 feature. */
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
49
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
50
/* UNDEF accesses to D16-D31 if they don't exist */
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
51
if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
52
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
59
+ 0, FPST_FPCR)
53
return true;
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
54
}
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
55
62
+ 0, FPST_FPCR)
56
-#define DO_VFP_2OP(INSN, PREC, FN) \
63
57
+#define DO_VFP_2OP(INSN, PREC, FN, CHECK) \
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
58
static bool trans_##INSN##_##PREC(DisasContext *s, \
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
59
arg_##INSN##_##PREC *a) \
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
60
{ \
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
61
+ if (!dc_isar_feature(CHECK, s)) { \
68
gen_helper_gvec_bfdot_idx, a)
62
+ return false; \
69
63
+ } \
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
64
return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
71
- gen_helper_gvec_bfmmla, a, 0)
65
}
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
66
73
+ gen_helper_gvec_bfmmla, a, 0)
67
-DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
74
68
-DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
69
+DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32, aa32_fpsp_v2)
70
+DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64, aa32_fpdp_v2)
71
72
-DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
73
-DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
74
-DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
75
+DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
76
+DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
77
+DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
78
79
-DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
80
-DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
81
-DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
82
+DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
83
+DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
84
+DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
85
86
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
87
{
88
@@ -XXX,XX +XXX,XX @@ static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
89
gen_helper_vfp_sqrtd(vd, vm, cpu_env);
90
}
91
92
-DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
93
-DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
94
-DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
95
+DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
96
+DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
97
+DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
98
99
static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
100
{
76
{
101
--
77
--
102
2.20.1
78
2.25.1
103
104
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
For Arm BFDOT and BFMMLA, we need a version of round-to-odd
3
Mark these as a non-streaming instructions, which should trap
4
that overflows to infinity, instead of the max normal number.
4
if full a64 support is not enabled in streaming mode.
5
5
6
Cc: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-6-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
include/fpu/softfloat-types.h | 4 +++-
11
target/arm/sme-fa64.decode | 3 ---
13
fpu/softfloat-parts.c.inc | 6 ++++--
12
target/arm/translate-sve.c | 15 +++++++++++----
14
2 files changed, 7 insertions(+), 3 deletions(-)
13
2 files changed, 11 insertions(+), 7 deletions(-)
15
14
16
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/include/fpu/softfloat-types.h
17
--- a/target/arm/sme-fa64.decode
19
+++ b/include/fpu/softfloat-types.h
18
+++ b/target/arm/sme-fa64.decode
20
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
float_round_up = 2,
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
float_round_to_zero = 3,
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
float_round_ties_away = 4,
22
24
- /* Not an IEEE rounding mode: round to the closest odd mantissa value */
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
25
+ /* Not an IEEE rounding mode: round to closest odd, overflow to max */
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
26
float_round_to_odd = 5,
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
27
+ /* Not an IEEE rounding mode: round to closest odd, overflow to inf */
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
28
+ float_round_to_odd_inf = 6,
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
29
} FloatRoundMode;
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
36
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
30
43
31
/*
44
/*
32
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
45
*** SVE Floating Point Accumulating Reduction Group
33
index XXXXXXX..XXXXXXX 100644
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
34
--- a/fpu/softfloat-parts.c.inc
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
35
+++ b/fpu/softfloat-parts.c.inc
48
return false;
36
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
37
g_assert_not_reached();
38
}
49
}
39
50
+ s->is_nonstreaming = true;
40
+ overflow_norm = false;
51
if (!sve_access_check(s)) {
41
switch (s->float_rounding_mode) {
52
return true;
42
case float_round_nearest_even:
53
}
43
- overflow_norm = false;
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
44
inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
55
DO_FP3(FADD_zzz, fadd)
45
break;
56
DO_FP3(FSUB_zzz, fsub)
46
case float_round_ties_away:
57
DO_FP3(FMUL_zzz, fmul)
47
- overflow_norm = false;
58
-DO_FP3(FTSMUL, ftsmul)
48
inc = frac_lsbm1;
59
DO_FP3(FRECPS, recps)
49
break;
60
DO_FP3(FRSQRTS, rsqrts)
50
case float_round_to_zero:
61
51
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
62
#undef DO_FP3
52
break;
63
53
case float_round_to_odd:
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
54
overflow_norm = true;
65
+ NULL, gen_helper_gvec_ftsmul_h,
55
+ /* fall through */
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
56
+ case float_round_to_odd_inf:
67
+};
57
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
58
break;
69
+ ftsmul_fns[a->esz], a, 0)
59
default:
70
+
60
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
71
/*
61
? frac_lsbm1 : 0);
72
*** SVE Floating Point Arithmetic - Predicated Group
62
break;
73
*/
63
case float_round_to_odd:
64
+ case float_round_to_odd_inf:
65
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
66
break;
67
default:
68
--
74
--
69
2.20.1
75
2.25.1
70
71
diff view generated by jsdifflib
1
Coverity notices that the checks against mkstemp() failing in
1
From: Richard Henderson <richard.henderson@linaro.org>
2
create_qcow2_with_mbr() are wrong: mkstemp returns -1 on failure but
3
the check is just "g_assert(fd)". Fix to use "g_assert(fd >= 0)",
4
matching the correct check in create_test_img().
5
2
6
Fixes: Coverity CID 1432274
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
10
Message-id: 20210525134458.6675-4-peter.maydell@linaro.org
11
---
10
---
12
tests/qtest/hd-geo-test.c | 4 ++--
11
target/arm/sme-fa64.decode | 1 -
13
1 file changed, 2 insertions(+), 2 deletions(-)
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
14
15
diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/qtest/hd-geo-test.c
17
--- a/target/arm/sme-fa64.decode
18
+++ b/tests/qtest/hd-geo-test.c
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
}
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
fd = mkstemp(raw_path);
22
23
- g_assert(fd);
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
+ g_assert(fd >= 0);
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
close(fd);
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
fd = open(raw_path, O_WRONLY);
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
@@ -XXX,XX +XXX,XX @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
28
index XXXXXXX..XXXXXXX 100644
29
close(fd);
29
--- a/target/arm/translate-sve.c
30
30
+++ b/target/arm/translate-sve.c
31
fd = mkstemp(qcow2_path);
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
- g_assert(fd);
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
+ g_assert(fd >= 0);
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
close(fd);
34
35
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
qemu_img_path = getenv("QTEST_QEMU_IMG");
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
37
--
50
--
38
2.20.1
51
2.25.1
39
40
diff view generated by jsdifflib
1
The e1000e_send_verify() test calls qemu_recv() but doesn't
1
From: Richard Henderson <richard.henderson@linaro.org>
2
check that the call succeeded, which annoys Coverity. Add
3
an explicit test check for the length of the data.
4
2
5
(This is a test check, not a "we assume this syscall always
3
Mark these as non-streaming instructions, which should trap
6
succeeds", so we use g_assert_cmpint() rather than g_assert().)
4
if full a64 support is not enabled in streaming mode.
7
5
8
Fixes: Coverity CID 1432324
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
11
Message-id: 20210525134458.6675-3-peter.maydell@linaro.org
12
---
10
---
13
tests/qtest/e1000e-test.c | 3 ++-
11
target/arm/sme-fa64.decode | 1 -
14
1 file changed, 2 insertions(+), 1 deletion(-)
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
15
14
16
diff --git a/tests/qtest/e1000e-test.c b/tests/qtest/e1000e-test.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/tests/qtest/e1000e-test.c
17
--- a/target/arm/sme-fa64.decode
19
+++ b/tests/qtest/e1000e-test.c
18
+++ b/target/arm/sme-fa64.decode
20
@@ -XXX,XX +XXX,XX @@ static void e1000e_send_verify(QE1000E *d, int *test_sockets, QGuestAllocator *a
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
21
/* Check data sent to the backend */
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
ret = qemu_recv(test_sockets[0], &recv_len, sizeof(recv_len), 0);
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
g_assert_cmpint(ret, == , sizeof(recv_len));
22
24
- qemu_recv(test_sockets[0], buffer, 64, 0);
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
+ ret = qemu_recv(test_sockets[0], buffer, 64, 0);
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
+ g_assert_cmpint(ret, >=, 5);
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
g_assert_cmpstr(buffer, == , "TEST");
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
28
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
/* Free test data buffer */
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
30
--
92
--
31
2.20.1
93
2.25.1
32
33
diff view generated by jsdifflib
1
Some v8M instructions are present if either the floating point
1
From: Richard Henderson <richard.henderson@linaro.org>
2
extension or MVE is implemented. Update our implementation of them
3
to check for MVE as well as for FP.
4
2
5
This is all the insns which use CheckDecodeFaults(ExtType_MveOrFp) or
3
Mark these as a non-streaming instructions, which should trap
6
CheckDecodeFaults(ExtType_MveOrDpFp) in their pseudocode, which are
4
if full a64 support is not enabled in streaming mode.
7
essentially the loads and stores, moves and sysreg accesses, except
8
for VMOV_reg_sp and VMOV_reg_dp, which we handle in subsequent
9
patches because they need a refactor to provide a place to put the
10
new MVE check.
11
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20210520152840.24453-3-peter.maydell@linaro.org
15
---
10
---
16
target/arm/translate-vfp.c | 48 +++++++++++++++++++++++---------------
11
target/arm/sme-fa64.decode | 9 ---------
17
1 file changed, 29 insertions(+), 19 deletions(-)
12
target/arm/translate-sve.c | 6 ++++++
13
2 files changed, 6 insertions(+), 9 deletions(-)
18
14
19
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate-vfp.c
17
--- a/target/arm/sme-fa64.decode
22
+++ b/target/arm/translate-vfp.c
18
+++ b/target/arm/sme-fa64.decode
23
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
24
/* VMOV scalar to general purpose register */
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
25
TCGv_i32 tmp;
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
26
22
27
- /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
28
- if (a->size == MO_32
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
29
- ? !dc_isar_feature(aa32_fpsp_v2, s)
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
30
- : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
31
- return false;
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
32
+ /*
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
33
+ * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
34
+ * all sizes, whether the CPU has fp or not.
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
35
+ */
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
36
+ if (!dc_isar_feature(aa32_mve, s)) {
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
37
+ if (a->size == MO_32
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
38
+ ? !dc_isar_feature(aa32_fpsp_v2, s)
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
39
+ : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
40
+ return false;
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
41
+ }
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
42
}
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
43
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
/* UNDEF accesses to D16-D31 if they don't exist */
40
index XXXXXXX..XXXXXXX 100644
45
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
41
--- a/target/arm/translate-sve.c
46
/* VMOV general purpose register to scalar */
42
+++ b/target/arm/translate-sve.c
47
TCGv_i32 tmp;
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
48
44
if (!dc_isar_feature(aa64_sve, s)) {
49
- /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
50
- if (a->size == MO_32
51
- ? !dc_isar_feature(aa32_fpsp_v2, s)
52
- : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
53
- return false;
54
+ /*
55
+ * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
56
+ * all sizes, whether the CPU has fp or not.
57
+ */
58
+ if (!dc_isar_feature(aa32_mve, s)) {
59
+ if (a->size == MO_32
60
+ ? !dc_isar_feature(aa32_fpsp_v2, s)
61
+ : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
62
+ return false;
63
+ }
64
}
65
66
/* UNDEF accesses to D16-D31 if they don't exist */
67
@@ -XXX,XX +XXX,XX @@ typedef enum FPSysRegCheckResult {
68
69
static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
70
{
71
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
72
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
73
return FPSysRegCheckFailed;
74
}
75
76
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
77
{
78
TCGv_i32 tmp;
79
80
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
81
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
82
return false;
45
return false;
83
}
46
}
84
47
+ s->is_nonstreaming = true;
85
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
48
if (!sve_access_check(s)) {
86
{
49
return true;
87
TCGv_i32 tmp;
50
}
88
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
89
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
52
if (!dc_isar_feature(aa64_sve, s)) {
90
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
91
return false;
53
return false;
92
}
54
}
93
55
+ s->is_nonstreaming = true;
94
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
56
if (!sve_access_check(s)) {
95
* floating point register. Note that this does not require support
57
return true;
96
* for double precision arithmetic.
58
}
97
*/
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
98
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
60
if (!dc_isar_feature(aa64_sve2, s)) {
99
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
100
return false;
61
return false;
101
}
62
}
102
63
+ s->is_nonstreaming = true;
103
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
64
if (!sve_access_check(s)) {
104
uint32_t offset;
65
return true;
105
TCGv_i32 addr, tmp;
66
}
106
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
107
- if (!dc_isar_feature(aa32_fp16_arith, s)) {
68
if (!dc_isar_feature(aa64_sve, s)) {
108
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
109
return false;
69
return false;
110
}
70
}
111
71
+ s->is_nonstreaming = true;
112
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
72
if (!sve_access_check(s)) {
113
uint32_t offset;
73
return true;
114
TCGv_i32 addr, tmp;
74
}
115
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
116
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
76
if (!dc_isar_feature(aa64_sve, s)) {
117
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
118
return false;
77
return false;
119
}
78
}
120
79
+ s->is_nonstreaming = true;
121
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
80
if (!sve_access_check(s)) {
122
TCGv_i64 tmp;
81
return true;
123
82
}
124
/* Note that this does not require support for double arithmetic. */
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
125
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
84
if (!dc_isar_feature(aa64_sve2, s)) {
126
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
127
return false;
85
return false;
128
}
86
}
129
87
+ s->is_nonstreaming = true;
130
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
88
if (!sve_access_check(s)) {
131
TCGv_i32 addr, tmp;
89
return true;
132
int i, n;
133
134
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
135
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
136
return false;
137
}
90
}
138
139
@@ -XXX,XX +XXX,XX @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
140
int i, n;
141
142
/* Note that this does not require support for double arithmetic. */
143
- if (!dc_isar_feature(aa32_fpsp_v2, s)) {
144
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
145
return false;
146
}
147
148
--
91
--
149
2.20.1
92
2.25.1
150
151
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFDOT for both AArch64 AdvSIMD and SVE,
3
Mark these as a non-streaming instructions, which should trap if full
4
and VDOT.BF16 for AArch32 NEON.
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
5
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210525225817.400336-8-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
---
11
target/arm/helper.h | 2 ++
12
target/arm/sme-fa64.decode | 3 ---
12
target/arm/neon-shared.decode | 2 ++
13
target/arm/sve.decode | 10 +++++-----
13
target/arm/sve.decode | 3 +++
14
target/arm/translate-sve.c | 11 +++++++++++
14
target/arm/translate-a64.c | 41 +++++++++++++++++++++++++++--------
15
3 files changed, 16 insertions(+), 8 deletions(-)
15
target/arm/translate-neon.c | 9 ++++++++
16
target/arm/translate-sve.c | 12 ++++++++++
17
target/arm/vec_helper.c | 20 +++++++++++++++++
18
7 files changed, 80 insertions(+), 9 deletions(-)
19
16
20
diff --git a/target/arm/helper.h b/target/arm/helper.h
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
21
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.h
19
--- a/target/arm/sme-fa64.decode
23
+++ b/target/arm/helper.h
20
+++ b/target/arm/sme-fa64.decode
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
25
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
26
DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
27
void, ptr, ptr, ptr, ptr, i32)
24
28
+DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
29
+ void, ptr, ptr, ptr, ptr, i32)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
30
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
#ifdef TARGET_AARCH64
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
#include "helper-a64.h"
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
index XXXXXXX..XXXXXXX 100644
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
--- a/target/arm/neon-shared.decode
36
+++ b/target/arm/neon-shared.decode
37
@@ -XXX,XX +XXX,XX @@ VUSDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
38
vn=%vn_dp vd=%vd_dp
39
VSUDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 1 vm:4 \
40
vn=%vn_dp vd=%vd_dp
41
+VDOT_b16_scal 1111 1110 0 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
42
+ vn=%vn_dp vd=%vd_dp
43
44
%vfml_scalar_q0_rm 0:3 5:1
45
%vfml_scalar_q1_index 5:1 3:1
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
34
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
51
FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
37
@rpri_load_msz nreg=0
52
FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
38
53
FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
54
+
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
55
+### SVE2 floating-point bfloat16 dot-product (indexed)
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
56
+BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
42
57
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
43
# SVE 32-bit gather prefetch (vector plus immediate)
58
index XXXXXXX..XXXXXXX 100644
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
59
--- a/target/arm/translate-a64.c
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
60
+++ b/target/arm/translate-a64.c
46
61
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
47
# SVE contiguous prefetch (scalar plus immediate)
62
return;
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
63
}
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
64
break;
50
@rpri_g_load esz=3
65
- case 0x0f: /* SUDOT, USDOT */
51
66
- if (is_scalar || (size & 1) || !dc_isar_feature(aa64_i8mm, s)) {
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
67
+ case 0x0f:
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
68
+ switch (size) {
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
69
+ case 0: /* SUDOT */
55
70
+ case 2: /* USDOT */
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
71
+ if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
72
+ unallocated_encoding(s);
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
73
+ return;
59
74
+ }
60
# SVE 64-bit gather prefetch (vector plus immediate)
75
+ break;
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
76
+ case 1: /* BFDOT */
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
77
+ if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
63
78
+ unallocated_encoding(s);
64
### SVE Memory Store Group
79
+ return;
65
80
+ }
81
+ break;
82
+ default:
83
unallocated_encoding(s);
84
return;
85
}
86
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
87
u ? gen_helper_gvec_udot_idx_b
88
: gen_helper_gvec_sdot_idx_b);
89
return;
90
- case 0x0f: /* SUDOT, USDOT */
91
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
92
- extract32(insn, 23, 1)
93
- ? gen_helper_gvec_usdot_idx_b
94
- : gen_helper_gvec_sudot_idx_b);
95
- return;
96
-
97
+ case 0x0f:
98
+ switch (extract32(insn, 22, 2)) {
99
+ case 0: /* SUDOT */
100
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
101
+ gen_helper_gvec_sudot_idx_b);
102
+ return;
103
+ case 1: /* BFDOT */
104
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
105
+ gen_helper_gvec_bfdot_idx);
106
+ return;
107
+ case 2: /* USDOT */
108
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
109
+ gen_helper_gvec_usdot_idx_b);
110
+ return;
111
+ }
112
+ g_assert_not_reached();
113
case 0x11: /* FCMLA #0 */
114
case 0x13: /* FCMLA #90 */
115
case 0x15: /* FCMLA #180 */
116
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/target/arm/translate-neon.c
119
+++ b/target/arm/translate-neon.c
120
@@ -XXX,XX +XXX,XX @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
121
gen_helper_gvec_sudot_idx_b);
122
}
123
124
+static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
125
+{
126
+ if (!dc_isar_feature(aa32_bf16, s)) {
127
+ return false;
128
+ }
129
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
130
+ gen_helper_gvec_bfdot_idx);
131
+}
132
+
133
static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
134
{
135
int opr_sz;
136
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
137
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
138
--- a/target/arm/translate-sve.c
68
--- a/target/arm/translate-sve.c
139
+++ b/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
140
@@ -XXX,XX +XXX,XX @@ static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
141
}
142
return true;
71
return true;
143
}
72
}
144
+
73
145
+static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
146
+{
75
+{
147
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
76
+ if (!dc_isar_feature(aa64_sve, s)) {
148
+ return false;
77
+ return false;
149
+ }
78
+ }
150
+ if (sve_access_check(s)) {
79
+ /* Prefetch is a nop within QEMU. */
151
+ gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
80
+ s->is_nonstreaming = true;
152
+ a->rd, a->rn, a->rm, a->ra, a->index);
81
+ (void)sve_access_check(s);
153
+ }
154
+ return true;
82
+ return true;
155
+}
83
+}
156
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
157
index XXXXXXX..XXXXXXX 100644
158
--- a/target/arm/vec_helper.c
159
+++ b/target/arm/vec_helper.c
160
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
161
}
162
clear_tail(d, opr_sz, simd_maxsz(desc));
163
}
164
+
84
+
165
+void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
85
/*
166
+ void *va, uint32_t desc)
86
* Move Prefix
167
+{
87
*
168
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
169
+ intptr_t index = simd_data(desc);
170
+ intptr_t elements = opr_sz / 4;
171
+ intptr_t eltspersegment = MIN(16 / 4, elements);
172
+ float32 *d = vd, *a = va;
173
+ uint32_t *n = vn, *m = vm;
174
+
175
+ for (i = 0; i < elements; i += eltspersegment) {
176
+ uint32_t m_idx = m[i + H4(index)];
177
+
178
+ for (j = i; j < i + eltspersegment; j++) {
179
+ d[j] = bfdotadd(a[j], n[j], m_idx);
180
+ }
181
+ }
182
+ clear_tail(d, opr_sz, simd_maxsz(desc));
183
+}
184
--
88
--
185
2.20.1
89
2.25.1
186
187
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We can move the definition of hvf_vcpu_exec() into our internal
3
Mark these as a non-streaming instructions, which should trap
4
hvf header, obsoleting the need for hvf-accel-ops.h.
4
if full a64 support is not enabled in streaming mode.
5
5
6
Signed-off-by: Alexander Graf <agraf@csgraf.de>
7
Reviewed-by: Sergio Lopez <slp@redhat.com>
8
Message-id: 20210519202253.76782-11-agraf@csgraf.de
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
accel/hvf/hvf-accel-ops.h | 17 -----------------
11
target/arm/sme-fa64.decode | 2 --
13
include/sysemu/hvf_int.h | 1 +
12
target/arm/translate-sve.c | 2 ++
14
accel/hvf/hvf-accel-ops.c | 2 --
13
2 files changed, 2 insertions(+), 2 deletions(-)
15
target/i386/hvf/hvf.c | 2 --
16
4 files changed, 1 insertion(+), 21 deletions(-)
17
delete mode 100644 accel/hvf/hvf-accel-ops.h
18
14
19
diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
20
deleted file mode 100644
21
index XXXXXXX..XXXXXXX
22
--- a/accel/hvf/hvf-accel-ops.h
23
+++ /dev/null
24
@@ -XXX,XX +XXX,XX @@
25
-/*
26
- * Accelerator CPUS Interface
27
- *
28
- * Copyright 2020 SUSE LLC
29
- *
30
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
31
- * See the COPYING file in the top-level directory.
32
- */
33
-
34
-#ifndef HVF_CPUS_H
35
-#define HVF_CPUS_H
36
-
37
-#include "sysemu/cpus.h"
38
-
39
-int hvf_vcpu_exec(CPUState *);
40
-
41
-#endif /* HVF_CPUS_H */
42
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
43
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
44
--- a/include/sysemu/hvf_int.h
17
--- a/target/arm/sme-fa64.decode
45
+++ b/include/sysemu/hvf_int.h
18
+++ b/target/arm/sme-fa64.decode
46
@@ -XXX,XX +XXX,XX @@ extern HVFState *hvf_state;
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
47
void assert_hvf_ok(hv_return_t ret);
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
48
int hvf_arch_init_vcpu(CPUState *cpu);
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
49
void hvf_arch_vcpu_destroy(CPUState *cpu);
22
50
+int hvf_vcpu_exec(CPUState *);
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
51
hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
52
int hvf_put_registers(CPUState *);
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
53
int hvf_get_registers(CPUState *);
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
54
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
55
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
56
--- a/accel/hvf/hvf-accel-ops.c
29
--- a/target/arm/translate-sve.c
57
+++ b/accel/hvf/hvf-accel-ops.c
30
+++ b/target/arm/translate-sve.c
58
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
59
#include "sysemu/runstate.h"
32
if (!dc_isar_feature(aa64_sve, s)) {
60
#include "qemu/guest-random.h"
33
return false;
61
34
}
62
-#include "hvf-accel-ops.h"
35
+ s->is_nonstreaming = true;
63
-
36
if (sve_access_check(s)) {
64
HVFState *hvf_state;
37
TCGv_i64 addr = new_tmp_a64(s);
65
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
66
/* Memory slots */
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
67
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
40
if (!dc_isar_feature(aa64_sve, s)) {
68
index XXXXXXX..XXXXXXX 100644
41
return false;
69
--- a/target/i386/hvf/hvf.c
42
}
70
+++ b/target/i386/hvf/hvf.c
43
+ s->is_nonstreaming = true;
71
@@ -XXX,XX +XXX,XX @@
44
if (sve_access_check(s)) {
72
#include "qemu/accel.h"
45
int vsz = vec_full_reg_size(s);
73
#include "target/i386/cpu.h"
46
int elements = vsz >> dtype_esz[a->dtype];
74
75
-#include "hvf-accel-ops.h"
76
-
77
void vmx_update_tpr(CPUState *cpu)
78
{
79
/* TODO: need integrate APIC handling */
80
--
47
--
81
2.20.1
48
2.25.1
82
83
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The ARM version of Hypervisor.framework no longer defines these two
3
Mark these as a non-streaming instructions, which should trap
4
types, so let's just revert to standard ones.
4
if full a64 support is not enabled in streaming mode.
5
5
6
Signed-off-by: Alexander Graf <agraf@csgraf.de>
7
Reviewed-by: Sergio Lopez <slp@redhat.com>
8
Message-id: 20210519202253.76782-7-agraf@csgraf.de
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
accel/hvf/hvf-accel-ops.c | 6 +++---
11
target/arm/sme-fa64.decode | 3 ---
13
1 file changed, 3 insertions(+), 3 deletions(-)
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
14
15
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/hvf/hvf-accel-ops.c
17
--- a/target/arm/sme-fa64.decode
18
+++ b/accel/hvf/hvf-accel-ops.c
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
macslot->present = 1;
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
macslot->gpa_start = slot->start;
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
macslot->size = slot->size;
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
- ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
23
-
24
+ ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
assert_hvf_ok(ret);
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
return 0;
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
}
27
index XXXXXXX..XXXXXXX 100644
28
@@ -XXX,XX +XXX,XX @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
28
--- a/target/arm/translate-sve.c
29
/* protect region against writes; begin tracking it */
29
+++ b/target/arm/translate-sve.c
30
if (on) {
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
slot->flags |= HVF_SLOT_LOG;
31
if (a->rm == 31) {
32
- hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
32
return false;
33
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
34
HV_MEMORY_READ);
35
/* stop tracking region*/
36
} else {
37
slot->flags &= ~HVF_SLOT_LOG;
38
- hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
39
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
40
HV_MEMORY_READ | HV_MEMORY_WRITE);
41
}
33
}
42
}
34
+ s->is_nonstreaming = true;
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
43
--
46
--
44
2.20.1
47
2.25.1
45
46
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFDOT for both AArch64 AdvSIMD and SVE,
3
These functions will be used to verify that the cpu
4
and VDOT.BF16 for AArch32 NEON.
4
is in the correct state for a given instruction.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210525225817.400336-7-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.h | 3 +++
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
12
target/arm/neon-shared.decode | 2 ++
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
13
target/arm/sve.decode | 3 +++
13
2 files changed, 55 insertions(+)
14
target/arm/translate-a64.c | 20 ++++++++++++++++++
15
target/arm/translate-neon.c | 9 ++++++++
16
target/arm/translate-sve.c | 12 +++++++++++
17
target/arm/vec_helper.c | 40 +++++++++++++++++++++++++++++++++++
18
7 files changed, 89 insertions(+)
19
14
20
diff --git a/target/arm/helper.h b/target/arm/helper.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.h
17
--- a/target/arm/translate-a64.h
23
+++ b/target/arm/helper.h
18
+++ b/target/arm/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
25
DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
26
void, ptr, ptr, ptr, ptr, i32)
21
unsigned int imms, unsigned int immr);
27
22
bool sve_access_check(DisasContext *s);
28
+DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
23
+bool sme_enabled_check(DisasContext *s);
29
+ void, ptr, ptr, ptr, ptr, i32)
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
30
+
25
+
31
#ifdef TARGET_AARCH64
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
32
#include "helper-a64.h"
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
33
#include "helper-sve.h"
28
+{
34
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
35
index XXXXXXX..XXXXXXX 100644
30
+}
36
--- a/target/arm/neon-shared.decode
37
+++ b/target/arm/neon-shared.decode
38
@@ -XXX,XX +XXX,XX @@ VUDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \
39
vm=%vm_dp vn=%vn_dp vd=%vd_dp
40
VUSDOT 1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \
41
vm=%vm_dp vn=%vn_dp vd=%vd_dp
42
+VDOT_b16 1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \
43
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
44
45
# VFM[AS]L
46
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
47
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/sve.decode
50
+++ b/target/arm/sve.decode
51
@@ -XXX,XX +XXX,XX @@ FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
52
FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0
53
FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0
54
55
+### SVE2 floating-point bfloat16 dot-product
56
+BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
57
+
31
+
58
### SVE2 floating-point multiply-add long (indexed)
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
59
FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
33
+static inline bool sme_za_enabled_check(DisasContext *s)
60
FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
34
+{
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
36
+}
37
+
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
40
+{
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
42
+}
43
+
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
61
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
62
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/translate-a64.c
49
--- a/target/arm/translate-a64.c
64
+++ b/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
65
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
66
}
52
return true;
67
feature = dc_isar_feature(aa64_fcma, s);
53
}
68
break;
54
69
+ case 0x1f: /* BFDOT */
55
+/* This function corresponds to CheckSMEEnabled. */
70
+ switch (size) {
56
+bool sme_enabled_check(DisasContext *s)
71
+ case 1:
57
+{
72
+ feature = dc_isar_feature(aa64_bf16, s);
58
+ /*
73
+ break;
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
74
+ default:
60
+ * to be zero when fp_excp_el has priority. This is because we need
75
+ unallocated_encoding(s);
61
+ * sme_excp_el by itself for cpregs access checks.
76
+ return;
62
+ */
77
+ }
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
78
+ break;
64
+ s->fp_access_checked = true;
79
default:
65
+ return sme_access_check(s);
80
unallocated_encoding(s);
66
+ }
81
return;
67
+ return fp_access_check_only(s);
82
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
68
+}
83
}
84
return;
85
86
+ case 0xf: /* BFDOT */
87
+ switch (size) {
88
+ case 1:
89
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
90
+ break;
91
+ default:
92
+ g_assert_not_reached();
93
+ }
94
+ return;
95
+
69
+
96
default:
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
97
g_assert_not_reached();
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
98
}
99
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/arm/translate-neon.c
102
+++ b/target/arm/translate-neon.c
103
@@ -XXX,XX +XXX,XX @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
104
gen_helper_gvec_usdot_b);
105
}
106
107
+static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
108
+{
72
+{
109
+ if (!dc_isar_feature(aa32_bf16, s)) {
73
+ if (!sme_enabled_check(s)) {
110
+ return false;
74
+ return false;
111
+ }
75
+ }
112
+ return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
113
+ gen_helper_gvec_bfdot);
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
114
+}
78
+ syn_smetrap(SME_ET_NotStreaming, false));
115
+
116
static bool trans_VFML(DisasContext *s, arg_VFML *a)
117
{
118
int opr_sz;
119
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/translate-sve.c
122
+++ b/target/arm/translate-sve.c
123
@@ -XXX,XX +XXX,XX @@ static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
124
{
125
return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
126
}
127
+
128
+static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
129
+{
130
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
131
+ return false;
79
+ return false;
132
+ }
80
+ }
133
+ if (sve_access_check(s)) {
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
134
+ gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
135
+ a->rd, a->rn, a->rm, a->ra, 0);
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
136
+ }
85
+ }
137
+ return true;
86
+ return true;
138
+}
87
+}
139
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/target/arm/vec_helper.c
142
+++ b/target/arm/vec_helper.c
143
@@ -XXX,XX +XXX,XX @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
144
DO_MMLA_B(gvec_smmla_b, do_smmla_b)
145
DO_MMLA_B(gvec_ummla_b, do_ummla_b)
146
DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
147
+
88
+
148
+/*
89
/*
149
+ * BFloat16 Dot Product
90
* This utility function is for doing register extension with an
150
+ */
91
* optional shift. You will likely want to pass a temporary for the
151
+
152
+static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
153
+{
154
+ /* FPCR is ignored for BFDOT and BFMMLA. */
155
+ float_status bf_status = {
156
+ .tininess_before_rounding = float_tininess_before_rounding,
157
+ .float_rounding_mode = float_round_to_odd_inf,
158
+ .flush_to_zero = true,
159
+ .flush_inputs_to_zero = true,
160
+ .default_nan_mode = true,
161
+ };
162
+ float32 t1, t2;
163
+
164
+ /*
165
+ * Extract each BFloat16 from the element pair, and shift
166
+ * them such that they become float32.
167
+ */
168
+ t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
169
+ t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
170
+ t1 = float32_add(t1, t2, &bf_status);
171
+ t1 = float32_add(sum, t1, &bf_status);
172
+
173
+ return t1;
174
+}
175
+
176
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
177
+{
178
+ intptr_t i, opr_sz = simd_oprsz(desc);
179
+ float32 *d = vd, *a = va;
180
+ uint32_t *n = vn, *m = vm;
181
+
182
+ for (i = 0; i < opr_sz / 4; ++i) {
183
+ d[i] = bfdotadd(a[i], n[i], m[i]);
184
+ }
185
+ clear_tail(d, opr_sz, simd_maxsz(desc));
186
+}
187
--
92
--
188
2.20.1
93
2.25.1
189
190
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The operands to tcg_gen_atomic_fetch_s{min,max}_i64 must
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
be signed, so that the inputs are properly extended.
4
SVE mode, and for SME present but SVE absent.
5
Zero extend the result afterward, as needed.
6
5
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/364
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
10
Message-id: 20210602020720.47679-1-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
target/arm/translate-a64.c | 13 ++++++++++---
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
14
1 file changed, 10 insertions(+), 3 deletions(-)
12
1 file changed, 16 insertions(+), 6 deletions(-)
15
13
16
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-a64.c
16
--- a/target/arm/translate-a64.c
19
+++ b/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
20
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
21
int o3_opc = extract32(insn, 12, 4);
19
return true;
22
bool r = extract32(insn, 22, 1);
20
}
23
bool a = extract32(insn, 23, 1);
21
24
- TCGv_i64 tcg_rs, clean_addr;
22
-/* Check that SVE access is enabled. If it is, return true.
25
+ TCGv_i64 tcg_rs, tcg_rt, clean_addr;
23
+/*
26
AtomicThreeOpFn *fn = NULL;
24
+ * Check that SVE access is enabled. If it is, return true.
27
+ MemOp mop = s->be_data | size | MO_ALIGN;
25
* If not, emit code to generate an appropriate exception and return false.
28
26
+ * This function corresponds to CheckSVEEnabled().
29
if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
27
*/
30
unallocated_encoding(s);
28
bool sve_access_check(DisasContext *s)
31
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
29
{
32
break;
30
- if (s->sve_excp_el) {
33
case 004: /* LDSMAX */
31
- assert(!s->sve_access_checked);
34
fn = tcg_gen_atomic_fetch_smax_i64;
32
- s->sve_access_checked = true;
35
+ mop |= MO_SIGN;
33
-
36
break;
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
37
case 005: /* LDSMIN */
35
+ assert(dc_isar_feature(aa64_sme, s));
38
fn = tcg_gen_atomic_fetch_smin_i64;
36
+ if (!sme_sm_enabled_check(s)) {
39
+ mop |= MO_SIGN;
37
+ goto fail_exit;
40
break;
38
+ }
41
case 006: /* LDUMAX */
39
+ } else if (s->sve_excp_el) {
42
fn = tcg_gen_atomic_fetch_umax_i64;
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
43
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
41
syn_sve_access_trap(), s->sve_excp_el);
42
- return false;
43
+ goto fail_exit;
44
}
44
}
45
45
s->sve_access_checked = true;
46
tcg_rs = read_cpu_reg(s, rs, true);
46
return fp_access_check(s);
47
+ tcg_rt = cpu_reg(s, rt);
48
49
if (o3_opc == 1) { /* LDCLR */
50
tcg_gen_not_i64(tcg_rs, tcg_rs);
51
@@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
52
/* The tcg atomic primitives are all full barriers. Therefore we
53
* can ignore the Acquire and Release bits of this instruction.
54
*/
55
- fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
56
- s->be_data | size | MO_ALIGN);
57
+ fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
58
+
47
+
59
+ if ((mop & MO_SIGN) && size != MO_64) {
48
+ fail_exit:
60
+ tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
49
+ /* Assert that we only raise one exception per instruction. */
61
+ }
50
+ assert(!s->sve_access_checked);
51
+ s->sve_access_checked = true;
52
+ return false;
62
}
53
}
63
54
64
/*
55
/*
65
--
56
--
66
2.20.1
57
2.25.1
67
68
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE,
3
These SME instructions are nominally within the SVE decode space,
4
and VFMA{B,T}.BF16 for AArch32 NEON.
4
so we add them to sve.decode and translate-sve.c.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-10-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.h | 3 +++
11
target/arm/translate-a64.h | 12 ++++++++++++
12
target/arm/neon-shared.decode | 3 +++
12
target/arm/sve.decode | 5 ++++-
13
target/arm/sve.decode | 3 +++
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
14
target/arm/translate-a64.c | 13 +++++++++----
14
3 files changed, 54 insertions(+), 1 deletion(-)
15
target/arm/translate-neon.c | 9 +++++++++
16
target/arm/translate-sve.c | 30 ++++++++++++++++++++++++++++++
17
target/arm/vec_helper.c | 16 ++++++++++++++++
18
7 files changed, 73 insertions(+), 4 deletions(-)
19
15
20
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
21
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.h
18
--- a/target/arm/translate-a64.h
23
+++ b/target/arm/helper.h
19
+++ b/target/arm/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
25
DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
21
return s->vl;
26
void, ptr, ptr, ptr, ptr, i32)
22
}
27
23
28
+DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
24
+/* Return the byte size of the vector register, SVL / 8. */
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
25
+static inline int streaming_vec_reg_size(DisasContext *s)
26
+{
27
+ return s->svl;
28
+}
30
+
29
+
31
#ifdef TARGET_AARCH64
30
/*
32
#include "helper-a64.h"
31
* Return the offset info CPUARMState of the predicate vector register Pn.
33
#include "helper-sve.h"
32
* Note for this purpose, FFR is P16.
34
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
35
index XXXXXXX..XXXXXXX 100644
34
return s->vl >> 3;
36
--- a/target/arm/neon-shared.decode
35
}
37
+++ b/target/arm/neon-shared.decode
36
38
@@ -XXX,XX +XXX,XX @@ VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \
37
+/* Return the byte size of the predicate register, SVL / 64. */
39
VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \
38
+static inline int streaming_pred_reg_size(DisasContext *s)
40
vm=%vm_dp vn=%vn_dp vd=%vd_dp
39
+{
41
40
+ return s->svl >> 3;
42
+VFMA_b16 1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \
41
+}
43
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
44
+
42
+
45
VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
43
/*
46
vn=%vn_dp vd=%vd_dp size=1
44
* Round up the size of a register to a size allowed by
47
VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
45
* the tcg vector infrastructure. Any operation which uses this
48
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
49
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/sve.decode
48
--- a/target/arm/sve.decode
51
+++ b/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
52
@@ -XXX,XX +XXX,XX @@ FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
53
FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0
51
# SVE index generation (register start, register increment)
54
FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
55
53
56
+BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
54
-### SVE Stack Allocation Group
57
+BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
55
+### SVE / Streaming SVE Stack Allocation Group
58
+
56
59
### SVE2 floating-point bfloat16 dot-product
57
# SVE stack frame adjustment
60
BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
61
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
62
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
63
index XXXXXXX..XXXXXXX 100644
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
64
--- a/target/arm/translate-a64.c
62
65
+++ b/target/arm/translate-a64.c
63
# SVE stack frame size
66
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
67
}
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
68
feature = dc_isar_feature(aa64_bf16, s);
66
69
break;
67
### SVE Bitwise Shift - Unpredicated Group
70
- case 0x1f: /* BFDOT */
68
71
+ case 0x1f:
72
switch (size) {
73
- case 1:
74
+ case 1: /* BFDOT */
75
+ case 3: /* BFMLAL{B,T} */
76
feature = dc_isar_feature(aa64_bf16, s);
77
break;
78
default:
79
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
80
case 0xd: /* BFMMLA */
81
gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
82
return;
83
- case 0xf: /* BFDOT */
84
+ case 0xf:
85
switch (size) {
86
- case 1:
87
+ case 1: /* BFDOT */
88
gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
89
break;
90
+ case 3: /* BFMLAL{B,T} */
91
+ gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
92
+ gen_helper_gvec_bfmlal);
93
+ break;
94
default:
95
g_assert_not_reached();
96
}
97
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/arm/translate-neon.c
100
+++ b/target/arm/translate-neon.c
101
@@ -XXX,XX +XXX,XX @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
102
return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
103
gen_helper_gvec_bfmmla);
104
}
105
+
106
+static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
107
+{
108
+ if (!dc_isar_feature(aa32_bf16, s)) {
109
+ return false;
110
+ }
111
+ return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
112
+ gen_helper_gvec_bfmlal);
113
+}
114
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
115
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
116
--- a/target/arm/translate-sve.c
71
--- a/target/arm/translate-sve.c
117
+++ b/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
118
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
119
}
120
return true;
74
return true;
121
}
75
}
122
+
76
123
+static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
124
+{
78
+{
125
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
79
+ if (!dc_isar_feature(aa64_sme, s)) {
126
+ return false;
80
+ return false;
127
+ }
81
+ }
128
+ if (sve_access_check(s)) {
82
+ if (sme_enabled_check(s)) {
129
+ TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
130
+ unsigned vsz = vec_full_reg_size(s);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
131
+
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
132
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
133
+ vec_full_reg_offset(s, a->rn),
134
+ vec_full_reg_offset(s, a->rm),
135
+ vec_full_reg_offset(s, a->ra),
136
+ status, vsz, vsz, sel,
137
+ gen_helper_gvec_bfmlal);
138
+ tcg_temp_free_ptr(status);
139
+ }
86
+ }
140
+ return true;
87
+ return true;
141
+}
88
+}
142
+
89
+
143
+static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
91
{
92
if (!dc_isar_feature(aa64_sve, s)) {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
94
return true;
95
}
96
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
144
+{
98
+{
145
+ return do_BFMLAL_zzzw(s, a, false);
99
+ if (!dc_isar_feature(aa64_sme, s)) {
100
+ return false;
101
+ }
102
+ if (sme_enabled_check(s)) {
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
106
+ }
107
+ return true;
146
+}
108
+}
147
+
109
+
148
+static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
149
+{
118
+{
150
+ return do_BFMLAL_zzzw(s, a, true);
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
151
+}
127
+}
152
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/target/arm/vec_helper.c
155
+++ b/target/arm/vec_helper.c
156
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
157
}
158
clear_tail(d, opr_sz, simd_maxsz(desc));
159
}
160
+
128
+
161
+void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
129
/*
162
+ void *stat, uint32_t desc)
130
*** SVE Compute Vector Address Group
163
+{
131
*/
164
+ intptr_t i, opr_sz = simd_oprsz(desc);
165
+ intptr_t sel = simd_data(desc);
166
+ float32 *d = vd, *a = va;
167
+ bfloat16 *n = vn, *m = vm;
168
+
169
+ for (i = 0; i < opr_sz / 4; ++i) {
170
+ float32 nn = n[H2(i * 2 + sel)] << 16;
171
+ float32 mm = m[H2(i * 2 + sel)] << 16;
172
+ d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat);
173
+ }
174
+ clear_tail(d, opr_sz, simd_maxsz(desc));
175
+}
176
--
132
--
177
2.20.1
133
2.25.1
178
179
diff view generated by jsdifflib
1
The official punctuation for Arm CPU names uses a hyphen, like
1
From: Richard Henderson <richard.henderson@linaro.org>
2
"Cortex-A9". We mostly follow this, but in a few places usage
3
without the hyphen has crept in. Fix those so we consistently
4
use the same way of writing the CPU name.
5
2
6
This commit was created with:
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
git grep -z -l 'Cortex ' | xargs -0 sed -i 's/Cortex /Cortex-/'
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 4 ++++
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
12
4 files changed, 44 insertions(+)
8
13
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Message-id: 20210527095152.10968-1-peter.maydell@linaro.org
14
---
15
docs/system/arm/aspeed.rst | 4 ++--
16
docs/system/arm/nuvoton.rst | 6 +++---
17
docs/system/arm/sabrelite.rst | 2 +-
18
include/hw/arm/allwinner-h3.h | 2 +-
19
hw/arm/aspeed.c | 6 +++---
20
hw/arm/mcimx6ul-evk.c | 2 +-
21
hw/arm/mcimx7d-sabre.c | 2 +-
22
hw/arm/npcm7xx_boards.c | 4 ++--
23
hw/arm/sabrelite.c | 2 +-
24
hw/misc/npcm7xx_clk.c | 2 +-
25
10 files changed, 16 insertions(+), 16 deletions(-)
26
27
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
28
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/system/arm/aspeed.rst
16
--- a/target/arm/helper-sme.h
30
+++ b/docs/system/arm/aspeed.rst
17
+++ b/target/arm/helper-sme.h
31
@@ -XXX,XX +XXX,XX @@ The QEMU Aspeed machines model BMCs of various OpenPOWER systems and
18
@@ -XXX,XX +XXX,XX @@
32
Aspeed evaluation boards. They are based on different releases of the
19
33
Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
34
AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
35
-with dual cores ARM Cortex A7 CPUs (1.2GHz).
22
+
36
+with dual cores ARM Cortex-A7 CPUs (1.2GHz).
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
37
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
38
The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C,
39
etc.
40
@@ -XXX,XX +XXX,XX @@ AST2500 SoC based machines :
41
42
AST2600 SoC based machines :
43
44
-- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex A7)
45
+- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex-A7)
46
- ``tacoma-bmc`` OpenPOWER Witherspoon POWER9 AST2600 BMC
47
48
Supported devices
49
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
50
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
51
--- a/docs/system/arm/nuvoton.rst
26
--- a/target/arm/sme.decode
52
+++ b/docs/system/arm/nuvoton.rst
27
+++ b/target/arm/sme.decode
53
@@ -XXX,XX +XXX,XX @@ Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``)
28
@@ -XXX,XX +XXX,XX @@
54
29
#
55
The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are
30
# This file is processed by scripts/decodetree.py
56
designed to be used as Baseboard Management Controllers (BMCs) in various
31
#
57
-servers. They all feature one or two ARM Cortex A9 CPU cores, as well as an
32
+
58
+servers. They all feature one or two ARM Cortex-A9 CPU cores, as well as an
33
+### SME Misc
59
assortment of peripherals targeted for either Enterprise or Data Center /
34
+
60
Hyperscale applications. The former is a superset of the latter, so NPCM750 has
35
+ZERO 11000000 00 001 00000000000 imm:8
61
all the peripherals of NPCM730 and more.
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
62
63
.. _Nuvoton iBMC: https://www.nuvoton.com/products/cloud-computing/ibmc/
64
65
-The NPCM750 SoC has two Cortex A9 cores and is targeted for the Enterprise
66
+The NPCM750 SoC has two Cortex-A9 cores and is targeted for the Enterprise
67
segment. The following machines are based on this chip :
68
69
- ``npcm750-evb`` Nuvoton NPCM750 Evaluation board
70
71
-The NPCM730 SoC has two Cortex A9 cores and is targeted for Data Center and
72
+The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and
73
Hyperscale applications. The following machines are based on this chip :
74
75
- ``quanta-gsj`` Quanta GSJ server BMC
76
diff --git a/docs/system/arm/sabrelite.rst b/docs/system/arm/sabrelite.rst
77
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
78
--- a/docs/system/arm/sabrelite.rst
38
--- a/target/arm/sme_helper.c
79
+++ b/docs/system/arm/sabrelite.rst
39
+++ b/target/arm/sme_helper.c
80
@@ -XXX,XX +XXX,XX @@ Supported devices
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
81
41
memset(env->zarray, 0, sizeof(env->zarray));
82
The SABRE Lite machine supports the following devices:
42
}
83
43
}
84
- * Up to 4 Cortex A9 cores
44
+
85
+ * Up to 4 Cortex-A9 cores
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
86
* Generic Interrupt Controller
46
+{
87
* 1 Clock Controller Module
47
+ uint32_t i;
88
* 1 System Reset Controller
48
+
89
diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h
49
+ /*
50
+ * Special case clearing the entire ZA space.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * parts of the ZA storage outside of SVL.
53
+ */
54
+ if (imm == 0xff) {
55
+ memset(env->zarray, 0, sizeof(env->zarray));
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
90
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
91
--- a/include/hw/arm/allwinner-h3.h
71
--- a/target/arm/translate-sme.c
92
+++ b/include/hw/arm/allwinner-h3.h
72
+++ b/target/arm/translate-sme.c
93
@@ -XXX,XX +XXX,XX @@
73
@@ -XXX,XX +XXX,XX @@
94
*/
74
*/
95
75
96
/*
76
#include "decode-sme.c.inc"
97
- * The Allwinner H3 is a System on Chip containing four ARM Cortex A7
77
+
98
+ * The Allwinner H3 is a System on Chip containing four ARM Cortex-A7
78
+
99
* processor cores. Features and specifications include DDR2/DDR3 memory,
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
100
* SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and
80
+{
101
* various I/O modules.
81
+ if (!dc_isar_feature(aa64_sme, s)) {
102
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
82
+ return false;
103
index XXXXXXX..XXXXXXX 100644
83
+ }
104
--- a/hw/arm/aspeed.c
84
+ if (sme_za_enabled_check(s)) {
105
+++ b/hw/arm/aspeed.c
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
106
@@ -XXX,XX +XXX,XX @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data)
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
107
MachineClass *mc = MACHINE_CLASS(oc);
87
+ }
108
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
88
+ return true;
109
89
+}
110
- mc->desc = "Aspeed AST2600 EVB (Cortex A7)";
111
+ mc->desc = "Aspeed AST2600 EVB (Cortex-A7)";
112
amc->soc_name = "ast2600-a1";
113
amc->hw_strap1 = AST2600_EVB_HW_STRAP1;
114
amc->hw_strap2 = AST2600_EVB_HW_STRAP2;
115
@@ -XXX,XX +XXX,XX @@ static void aspeed_machine_tacoma_class_init(ObjectClass *oc, void *data)
116
MachineClass *mc = MACHINE_CLASS(oc);
117
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
118
119
- mc->desc = "OpenPOWER Tacoma BMC (Cortex A7)";
120
+ mc->desc = "OpenPOWER Tacoma BMC (Cortex-A7)";
121
amc->soc_name = "ast2600-a1";
122
amc->hw_strap1 = TACOMA_BMC_HW_STRAP1;
123
amc->hw_strap2 = TACOMA_BMC_HW_STRAP2;
124
@@ -XXX,XX +XXX,XX @@ static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data)
125
MachineClass *mc = MACHINE_CLASS(oc);
126
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
127
128
- mc->desc = "IBM Rainier BMC (Cortex A7)";
129
+ mc->desc = "IBM Rainier BMC (Cortex-A7)";
130
amc->soc_name = "ast2600-a1";
131
amc->hw_strap1 = RAINIER_BMC_HW_STRAP1;
132
amc->hw_strap2 = RAINIER_BMC_HW_STRAP2;
133
diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/hw/arm/mcimx6ul-evk.c
136
+++ b/hw/arm/mcimx6ul-evk.c
137
@@ -XXX,XX +XXX,XX @@ static void mcimx6ul_evk_init(MachineState *machine)
138
139
static void mcimx6ul_evk_machine_init(MachineClass *mc)
140
{
141
- mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex A7)";
142
+ mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex-A7)";
143
mc->init = mcimx6ul_evk_init;
144
mc->max_cpus = FSL_IMX6UL_NUM_CPUS;
145
mc->default_ram_id = "mcimx6ul-evk.ram";
146
diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/hw/arm/mcimx7d-sabre.c
149
+++ b/hw/arm/mcimx7d-sabre.c
150
@@ -XXX,XX +XXX,XX @@ static void mcimx7d_sabre_init(MachineState *machine)
151
152
static void mcimx7d_sabre_machine_init(MachineClass *mc)
153
{
154
- mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex A7)";
155
+ mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex-A7)";
156
mc->init = mcimx7d_sabre_init;
157
mc->max_cpus = FSL_IMX7_NUM_CPUS;
158
mc->default_ram_id = "mcimx7d-sabre.ram";
159
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/hw/arm/npcm7xx_boards.c
162
+++ b/hw/arm/npcm7xx_boards.c
163
@@ -XXX,XX +XXX,XX @@ static void npcm750_evb_machine_class_init(ObjectClass *oc, void *data)
164
165
npcm7xx_set_soc_type(nmc, TYPE_NPCM750);
166
167
- mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex A9)";
168
+ mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex-A9)";
169
mc->init = npcm750_evb_init;
170
mc->default_ram_size = 512 * MiB;
171
};
172
@@ -XXX,XX +XXX,XX @@ static void gsj_machine_class_init(ObjectClass *oc, void *data)
173
174
npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
175
176
- mc->desc = "Quanta GSJ (Cortex A9)";
177
+ mc->desc = "Quanta GSJ (Cortex-A9)";
178
mc->init = quanta_gsj_init;
179
mc->default_ram_size = 512 * MiB;
180
};
181
diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c
182
index XXXXXXX..XXXXXXX 100644
183
--- a/hw/arm/sabrelite.c
184
+++ b/hw/arm/sabrelite.c
185
@@ -XXX,XX +XXX,XX @@ static void sabrelite_init(MachineState *machine)
186
187
static void sabrelite_machine_init(MachineClass *mc)
188
{
189
- mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex A9)";
190
+ mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex-A9)";
191
mc->init = sabrelite_init;
192
mc->max_cpus = FSL_IMX6_NUM_CPUS;
193
mc->ignore_memory_transaction_failures = true;
194
diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm7xx_clk.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/misc/npcm7xx_clk.c
197
+++ b/hw/misc/npcm7xx_clk.c
198
@@ -XXX,XX +XXX,XX @@
199
#define NPCM7XX_CLOCK_REF_HZ (25000000)
200
201
/* Register Field Definitions */
202
-#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex A9 Cores */
203
+#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex-A9 Cores */
204
205
#define PLLCON_LOKI BIT(31)
206
#define PLLCON_LOKS BIT(30)
207
--
90
--
208
2.20.1
91
2.25.1
209
210
diff view generated by jsdifflib
1
From: Jamie Iles <jamie@nuviainc.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The DAIF and PAC checks used raise_exception_ra to raise an exception
3
We can reuse the SVE functions for implementing moves to/from
4
and unwind CPU state but raise_exception_ra is currently designed for
4
horizontal tile slices, but we need new ones for moves to/from
5
handling data aborts as the syndrome is partially precomputed and
5
vertical tile slices.
6
encoded in the TB and then merged in merge_syn_data_abort when handling
7
the data abort. Using raise_exception_ra for DAIF and PAC checks
8
results in an empty syndrome being retrieved from data[2] in
9
restore_state_to_opc and setting ESR to 0. This manifested as:
10
6
11
kvm [571]: Unknown exception class: esr: 0x000000 –
12
Unknown/Uncategorized
13
14
when launching a KVM guest when the host qemu used a CPU supporting
15
EL2+pointer authentication and enabling pointer authentication in the
16
guest.
17
18
Rework raise_exception_ra such that the state is restored before raising
19
the exception so that the exception is not clobbered by
20
restore_state_to_opc.
21
22
Fixes: 0d43e1a2d29a ("target/arm: Add PAuth helpers")
23
Cc: Richard Henderson <richard.henderson@linaro.org>
24
Cc: Peter Maydell <peter.maydell@linaro.org>
25
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
26
[PMM: added comment]
27
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
28
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
29
---
11
---
30
target/arm/op_helper.c | 11 +++++++++--
12
target/arm/helper-sme.h | 12 +++
31
1 file changed, 9 insertions(+), 2 deletions(-)
13
target/arm/helper-sve.h | 2 +
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
32
21
33
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
34
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/op_helper.c
24
--- a/target/arm/helper-sme.h
36
+++ b/target/arm/op_helper.c
25
+++ b/target/arm/helper-sme.h
37
@@ -XXX,XX +XXX,XX @@ void raise_exception(CPUARMState *env, uint32_t excp,
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
38
void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
39
uint32_t target_el, uintptr_t ra)
28
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
30
+
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
62
63
+/* Return a newly allocated pointer to the predicate register. */
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
65
+{
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
68
+ return ret;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
80
}
81
82
+static inline int plus_12(DisasContext *s, int x)
83
+{
84
+ return x + 12;
85
+}
86
+
87
static inline int times_2(DisasContext *s, int x)
40
{
88
{
41
- CPUState *cs = do_raise_exception(env, excp, syndrome, target_el);
89
return x * 2;
42
- cpu_loop_exit_restore(cs, ra);
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
43
+ CPUState *cs = env_cpu(env);
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
44
+
210
+
45
+ /*
211
+ /*
46
+ * restore_state_to_opc() will set env->exception.syndrome, so
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
47
+ * we must restore CPU state here before setting the syndrome
213
+ * the address arithmetic.
48
+ * the caller passed us, and cannot use cpu_loop_exit_restore().
49
+ */
214
+ */
50
+ cpu_restore_state(cs, ra, true);
215
+ for (i = 0; i < oprsz; i++) {
51
+ raise_exception(env, excp, syndrome, target_el);
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
52
}
286
}
53
287
54
uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
289
+ void *vg, uint32_t desc)
290
+{
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
292
+ Int128 *d = vd, *n = vn, *m = vm;
293
+ uint16_t *pg = vg;
294
+
295
+ for (i = 0; i < opr_sz; i += 1) {
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
297
+ }
298
+}
299
+
300
/* Two operand comparison controlled by a predicate.
301
* ??? It is very tempting to want to be able to expand this inline
302
* with x86 instructions, e.g.
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
306
+++ b/target/arm/translate-sme.c
307
@@ -XXX,XX +XXX,XX @@
308
#include "decode-sme.c.inc"
309
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
55
--
445
--
56
2.20.1
446
2.25.1
57
58
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The hvf_set_phys_mem() function is only called within the same file.
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
4
Make it static.
4
because those functions accept only a Zreg register number.
5
For SME, we want to pass a pointer into ZA storage.
5
6
6
Signed-off-by: Alexander Graf <agraf@csgraf.de>
7
Reviewed-by: Sergio Lopez <slp@redhat.com>
8
Message-id: 20210519202253.76782-6-agraf@csgraf.de
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
11
---
12
include/sysemu/hvf_int.h | 1 -
12
target/arm/helper-sme.h | 82 +++++
13
accel/hvf/hvf-accel-ops.c | 2 +-
13
target/arm/sme.decode | 9 +
14
2 files changed, 1 insertion(+), 2 deletions(-)
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
15
17
16
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
17
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
18
--- a/include/sysemu/hvf_int.h
20
--- a/target/arm/helper-sme.h
19
+++ b/include/sysemu/hvf_int.h
21
+++ b/target/arm/helper-sme.h
20
@@ -XXX,XX +XXX,XX @@ struct HVFState {
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
21
};
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
extern HVFState *hvf_state;
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
-void hvf_set_phys_mem(MemoryRegionSection *, bool);
26
+
25
void assert_hvf_ok(hv_return_t ret);
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
26
hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
27
int hvf_put_registers(CPUState *);
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
28
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
29
index XXXXXXX..XXXXXXX 100644
109
index XXXXXXX..XXXXXXX 100644
30
--- a/accel/hvf/hvf-accel-ops.c
110
--- a/target/arm/sme.decode
31
+++ b/accel/hvf/hvf-accel-ops.c
111
+++ b/target/arm/sme.decode
32
@@ -XXX,XX +XXX,XX @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
33
return 0;
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
129
@@ -XXX,XX +XXX,XX @@
130
131
#include "qemu/osdep.h"
132
#include "cpu.h"
133
+#include "internals.h"
134
#include "tcg/tcg-gvec-desc.h"
135
#include "exec/helper-proto.h"
136
+#include "exec/cpu_ldst.h"
137
+#include "exec/exec-all.h"
138
#include "qemu/int128.h"
139
#include "vec_internal.h"
140
+#include "sve_ldst_internal.h"
141
142
/* ResetSVEState */
143
void arm_reset_sve_state(CPUARMState *env)
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
34
}
145
}
35
146
36
-void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
147
#undef DO_MOVA_Z
37
+static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
148
+
38
{
149
+/*
39
hvf_slot *mem;
150
+ * Clear elements in a tile slice comprising len bytes.
40
MemoryRegion *area = section->mr;
151
+ */
152
+
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
154
+
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
156
+{
157
+ memset(ptr + off, 0, len);
158
+}
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
197
+ */
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
41
--
817
--
42
2.20.1
818
2.25.1
43
44
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
There is no reason to call the hvf specific hvf_cpu_synchronize_state()
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
when we can just use the generic cpu_synchronize_state() instead. This
4
We will reuse this for SME save and restore array insns.
5
allows us to have less dependency on internal function definitions and
6
allows us to make hvf_cpu_synchronize_state() static.
7
5
8
Signed-off-by: Alexander Graf <agraf@csgraf.de>
9
Reviewed-by: Sergio Lopez <slp@redhat.com>
10
Message-id: 20210519202253.76782-9-agraf@csgraf.de
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
accel/hvf/hvf-accel-ops.h | 1 -
11
target/arm/translate-a64.h | 3 +++
15
accel/hvf/hvf-accel-ops.c | 2 +-
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
16
target/i386/hvf/x86hvf.c | 9 ++++-----
13
2 files changed, 39 insertions(+), 12 deletions(-)
17
3 files changed, 5 insertions(+), 7 deletions(-)
18
14
19
diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/accel/hvf/hvf-accel-ops.h
17
--- a/target/arm/translate-a64.h
22
+++ b/accel/hvf/hvf-accel-ops.h
18
+++ b/target/arm/translate-a64.h
23
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
24
#include "sysemu/cpus.h"
20
uint32_t rm_ofs, int64_t shift,
25
21
uint32_t opr_sz, uint32_t max_sz);
26
int hvf_vcpu_exec(CPUState *);
22
27
-void hvf_cpu_synchronize_state(CPUState *);
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
28
void hvf_cpu_synchronize_post_reset(CPUState *);
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
29
void hvf_cpu_synchronize_post_init(CPUState *);
25
+
30
void hvf_cpu_synchronize_pre_loadvm(CPUState *);
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
31
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
32
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
33
--- a/accel/hvf/hvf-accel-ops.c
29
--- a/target/arm/translate-sve.c
34
+++ b/accel/hvf/hvf-accel-ops.c
30
+++ b/target/arm/translate-sve.c
35
@@ -XXX,XX +XXX,XX @@ static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
32
* The load should begin at the address Rn + IMM.
33
*/
34
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
37
+ int len, int rn, int imm)
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
81
}
82
83
/*
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
85
default:
86
g_assert_not_reached();
87
}
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
36
}
91
}
37
}
92
}
38
93
39
-void hvf_cpu_synchronize_state(CPUState *cpu)
94
/* Similarly for stores. */
40
+static void hvf_cpu_synchronize_state(CPUState *cpu)
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
97
+ int len, int rn, int imm)
41
{
98
{
42
if (!cpu->vcpu_dirty) {
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
43
run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
100
int len_remain = len % 8;
44
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
45
index XXXXXXX..XXXXXXX 100644
102
46
--- a/target/i386/hvf/x86hvf.c
103
t0 = tcg_temp_new_i64();
47
+++ b/target/i386/hvf/x86hvf.c
104
for (i = 0; i < len_align; i += 8) {
48
@@ -XXX,XX +XXX,XX @@
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
49
#include "cpu.h"
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
50
#include "x86_descr.h"
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
51
#include "x86_decode.h"
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
52
+#include "sysemu/hw_accel.h"
109
}
53
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
54
#include "hw/i386/apic_internal.h"
111
clean_addr = new_tmp_a64_local(s);
55
112
tcg_gen_mov_i64(clean_addr, t0);
56
#include <Hypervisor/hv.h>
113
57
#include <Hypervisor/hv_vmx.h>
114
+ if (base != cpu_env) {
58
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
59
-#include "accel/hvf/hvf-accel-ops.h"
116
+ tcg_gen_mov_ptr(b, base);
60
-
117
+ base = b;
61
void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
118
+ }
62
SegmentCache *qseg, bool is_tr)
119
+
63
{
120
gen_set_label(loop);
64
@@ -XXX,XX +XXX,XX @@ int hvf_process_events(CPUState *cpu_state)
121
65
env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS);
122
t0 = tcg_temp_new_i64();
66
123
tp = tcg_temp_new_ptr();
67
if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) {
124
- tcg_gen_add_ptr(tp, cpu_env, i);
68
- hvf_cpu_synchronize_state(cpu_state);
125
+ tcg_gen_add_ptr(tp, base, i);
69
+ cpu_synchronize_state(cpu_state);
126
tcg_gen_ld_i64(t0, tp, vofs);
70
do_cpu_init(cpu);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
71
}
138
}
72
139
73
@@ -XXX,XX +XXX,XX @@ int hvf_process_events(CPUState *cpu_state)
140
/* Predicate register stores can be any multiple of 2. */
74
cpu_state->halted = 0;
141
if (len_remain) {
142
t0 = tcg_temp_new_i64();
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
145
146
switch (len_remain) {
147
case 2:
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
149
if (sve_access_check(s)) {
150
int size = vec_full_reg_size(s);
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
75
}
154
}
76
if (cpu_state->interrupt_request & CPU_INTERRUPT_SIPI) {
155
return true;
77
- hvf_cpu_synchronize_state(cpu_state);
156
}
78
+ cpu_synchronize_state(cpu_state);
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
79
do_cpu_sipi(cpu);
158
if (sve_access_check(s)) {
159
int size = pred_full_reg_size(s);
160
int off = pred_full_reg_offset(s, a->rd);
161
- do_ldr(s, off, size, a->rn, a->imm * size);
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
80
}
163
}
81
if (cpu_state->interrupt_request & CPU_INTERRUPT_TPR) {
164
return true;
82
cpu_state->interrupt_request &= ~CPU_INTERRUPT_TPR;
165
}
83
- hvf_cpu_synchronize_state(cpu_state);
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
84
+ cpu_synchronize_state(cpu_state);
167
if (sve_access_check(s)) {
85
apic_handle_tpr_access_report(cpu->apic_state, env->eip,
168
int size = vec_full_reg_size(s);
86
env->tpr_access_type);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
87
}
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
88
--
184
--
89
2.20.1
185
2.25.1
90
91
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Until now, Hypervisor.framework has only been available on x86_64 systems.
3
We can reuse the SVE functions for LDR and STR, passing in the
4
With Apple Silicon shipping now, it extends its reach to aarch64. To
4
base of the ZA vector and a zero offset.
5
prepare for support for multiple architectures, let's start moving common
6
code out into its own accel directory.
7
5
8
This patch moves a few internal struct and constant defines over.
9
10
Signed-off-by: Alexander Graf <agraf@csgraf.de>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
12
Message-id: 20210519202253.76782-5-agraf@csgraf.de
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
10
---
16
include/sysemu/hvf_int.h | 30 ++++++++++++++++++++++++++++++
11
target/arm/sme.decode | 7 +++++++
17
target/i386/hvf/hvf-i386.h | 31 +------------------------------
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
18
2 files changed, 31 insertions(+), 30 deletions(-)
13
2 files changed, 31 insertions(+)
19
14
20
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/include/sysemu/hvf_int.h
17
--- a/target/arm/sme.decode
23
+++ b/include/sysemu/hvf_int.h
18
+++ b/target/arm/sme.decode
24
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
25
20
&ldst rs=%mova_rs
26
#include <Hypervisor/hv.h>
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
27
22
&ldst esz=4 rs=%mova_rs
28
+/* hvf_slot flags */
29
+#define HVF_SLOT_LOG (1 << 0)
30
+
23
+
31
+typedef struct hvf_slot {
24
+&ldstr rv rn imm
32
+ uint64_t start;
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
33
+ uint64_t size;
26
+ &ldstr rv=%mova_rs
34
+ uint8_t *mem;
35
+ int slot_id;
36
+ uint32_t flags;
37
+ MemoryRegion *region;
38
+} hvf_slot;
39
+
27
+
40
+typedef struct hvf_vcpu_caps {
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
41
+ uint64_t vmx_cap_pinbased;
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
42
+ uint64_t vmx_cap_procbased;
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
43
+ uint64_t vmx_cap_procbased2;
31
index XXXXXXX..XXXXXXX 100644
44
+ uint64_t vmx_cap_entry;
32
--- a/target/arm/translate-sme.c
45
+ uint64_t vmx_cap_exit;
33
+++ b/target/arm/translate-sme.c
46
+ uint64_t vmx_cap_preemption_timer;
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
47
+} hvf_vcpu_caps;
35
tcg_temp_free_i64(addr);
36
return true;
37
}
48
+
38
+
49
+struct HVFState {
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
50
+ AccelState parent;
51
+ hvf_slot slots[32];
52
+ int num_slots;
53
+
40
+
54
+ hvf_vcpu_caps *hvf_caps;
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
55
+};
42
+{
56
+extern HVFState *hvf_state;
43
+ int svl = streaming_vec_reg_size(s);
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
57
+
46
+
58
void hvf_set_phys_mem(MemoryRegionSection *, bool);
47
+ if (!sme_za_enabled_check(s)) {
59
void assert_hvf_ok(hv_return_t ret);
48
+ return true;
60
hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
49
+ }
61
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
50
+
62
index XXXXXXX..XXXXXXX 100644
51
+ /* ZA[n] equates to ZA0H.B[n]. */
63
--- a/target/i386/hvf/hvf-i386.h
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
64
+++ b/target/i386/hvf/hvf-i386.h
53
+
65
@@ -XXX,XX +XXX,XX @@
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
66
55
+
67
#include "qemu/accel.h"
56
+ tcg_temp_free_ptr(base);
68
#include "sysemu/hvf.h"
57
+ return true;
69
+#include "sysemu/hvf_int.h"
58
+}
70
#include "cpu.h"
59
+
71
#include "x86.h"
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
72
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
73
-/* hvf_slot flags */
74
-#define HVF_SLOT_LOG (1 << 0)
75
-
76
-typedef struct hvf_slot {
77
- uint64_t start;
78
- uint64_t size;
79
- uint8_t *mem;
80
- int slot_id;
81
- uint32_t flags;
82
- MemoryRegion *region;
83
-} hvf_slot;
84
-
85
-typedef struct hvf_vcpu_caps {
86
- uint64_t vmx_cap_pinbased;
87
- uint64_t vmx_cap_procbased;
88
- uint64_t vmx_cap_procbased2;
89
- uint64_t vmx_cap_entry;
90
- uint64_t vmx_cap_exit;
91
- uint64_t vmx_cap_preemption_timer;
92
-} hvf_vcpu_caps;
93
-
94
-struct HVFState {
95
- AccelState parent;
96
- hvf_slot slots[32];
97
- int num_slots;
98
-
99
- hvf_vcpu_caps *hvf_caps;
100
-};
101
-extern HVFState *hvf_state;
102
-
103
void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
104
105
#ifdef NEED_CPU_H
106
--
62
--
107
2.20.1
63
2.25.1
108
109
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Until now, Hypervisor.framework has only been available on x86_64 systems.
4
With Apple Silicon shipping now, it extends its reach to aarch64. To
5
prepare for support for multiple architectures, let's start moving common
6
code out into its own accel directory.
7
8
This patch moves CPU and memory operations over. While at it, make sure
9
the code is consumable on non-i386 systems.
10
11
Signed-off-by: Alexander Graf <agraf@csgraf.de>
12
Reviewed-by: Sergio Lopez <slp@redhat.com>
13
Message-id: 20210519202253.76782-4-agraf@csgraf.de
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
7
---
17
include/sysemu/hvf_int.h | 4 +
8
target/arm/helper-sme.h | 5 +++
18
target/i386/hvf/hvf-i386.h | 2 -
9
target/arm/sme.decode | 11 +++++
19
target/i386/hvf/x86hvf.h | 2 -
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
20
accel/hvf/hvf-accel-ops.c | 308 ++++++++++++++++++++++++++++++++++++-
11
target/arm/translate-sme.c | 31 +++++++++++++
21
target/i386/hvf/hvf.c | 302 ------------------------------------
12
4 files changed, 137 insertions(+)
22
5 files changed, 311 insertions(+), 307 deletions(-)
23
13
24
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
25
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
26
--- a/include/sysemu/hvf_int.h
16
--- a/target/arm/helper-sme.h
27
+++ b/include/sysemu/hvf_int.h
17
+++ b/target/arm/helper-sme.h
28
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
29
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
#include <Hypervisor/hv.h>
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
32
+void hvf_set_phys_mem(MemoryRegionSection *, bool);
22
+
33
void assert_hvf_ok(hv_return_t ret);
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
34
+hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
35
+int hvf_put_registers(CPUState *);
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
36
+int hvf_get_registers(CPUState *);
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
37
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
38
#endif
39
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
40
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
41
--- a/target/i386/hvf/hvf-i386.h
29
--- a/target/arm/sme.decode
42
+++ b/target/i386/hvf/hvf-i386.h
30
+++ b/target/arm/sme.decode
43
@@ -XXX,XX +XXX,XX @@ struct HVFState {
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
44
};
32
45
extern HVFState *hvf_state;
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
46
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
47
-void hvf_set_phys_mem(MemoryRegionSection *, bool);
35
+
48
void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
36
+### SME Add Vector to Array
49
-hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
37
+
50
38
+&adda zad zn pm pn
51
#ifdef NEED_CPU_H
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
52
/* Functions exported to host specific mode */
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
53
diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h
41
+
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
54
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
55
--- a/target/i386/hvf/x86hvf.h
48
--- a/target/arm/sme_helper.c
56
+++ b/target/i386/hvf/x86hvf.h
49
+++ b/target/arm/sme_helper.c
57
@@ -XXX,XX +XXX,XX @@
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
58
#include "x86_descr.h"
51
DO_ST(q, _le, MO_128)
59
52
60
int hvf_process_events(CPUState *);
53
#undef DO_ST
61
-int hvf_put_registers(CPUState *);
62
-int hvf_get_registers(CPUState *);
63
bool hvf_inject_interrupts(CPUState *);
64
void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
65
SegmentCache *qseg, bool is_tr);
66
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/accel/hvf/hvf-accel-ops.c
69
+++ b/accel/hvf/hvf-accel-ops.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "qemu/osdep.h"
72
#include "qemu/error-report.h"
73
#include "qemu/main-loop.h"
74
+#include "exec/address-spaces.h"
75
+#include "exec/exec-all.h"
76
+#include "sysemu/cpus.h"
77
#include "sysemu/hvf.h"
78
+#include "sysemu/hvf_int.h"
79
#include "sysemu/runstate.h"
80
-#include "target/i386/cpu.h"
81
#include "qemu/guest-random.h"
82
83
#include "hvf-accel-ops.h"
84
85
+HVFState *hvf_state;
86
+
54
+
87
+/* Memory slots */
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
56
+ void *vpm, uint32_t desc)
57
+{
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
59
+ uint64_t *pn = vpn, *pm = vpm;
60
+ uint32_t *zda = vzda, *zn = vzn;
88
+
61
+
89
+hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
62
+ for (row = 0; row < oprsz; ) {
90
+{
63
+ uint64_t pa = pn[row >> 4];
91
+ hvf_slot *slot;
64
+ do {
92
+ int x;
65
+ if (pa & 1) {
93
+ for (x = 0; x < hvf_state->num_slots; ++x) {
66
+ for (col = 0; col < oprsz; ) {
94
+ slot = &hvf_state->slots[x];
67
+ uint64_t pb = pm[col >> 4];
95
+ if (slot->size && start < (slot->start + slot->size) &&
68
+ do {
96
+ (start + size) > slot->start) {
69
+ if (pb & 1) {
97
+ return slot;
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
98
+ }
71
+ }
99
+ }
72
+ pb >>= 4;
100
+ return NULL;
73
+ } while (++col & 15);
101
+}
74
+ }
102
+
75
+ }
103
+struct mac_slot {
76
+ pa >>= 4;
104
+ int present;
77
+ } while (++row & 15);
105
+ uint64_t size;
106
+ uint64_t gpa_start;
107
+ uint64_t gva;
108
+};
109
+
110
+struct mac_slot mac_slots[32];
111
+
112
+static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
113
+{
114
+ struct mac_slot *macslot;
115
+ hv_return_t ret;
116
+
117
+ macslot = &mac_slots[slot->slot_id];
118
+
119
+ if (macslot->present) {
120
+ if (macslot->size != slot->size) {
121
+ macslot->present = 0;
122
+ ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
123
+ assert_hvf_ok(ret);
124
+ }
125
+ }
126
+
127
+ if (!slot->size) {
128
+ return 0;
129
+ }
130
+
131
+ macslot->present = 1;
132
+ macslot->gpa_start = slot->start;
133
+ macslot->size = slot->size;
134
+ ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
135
+ assert_hvf_ok(ret);
136
+ return 0;
137
+}
138
+
139
+void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
140
+{
141
+ hvf_slot *mem;
142
+ MemoryRegion *area = section->mr;
143
+ bool writeable = !area->readonly && !area->rom_device;
144
+ hv_memory_flags_t flags;
145
+
146
+ if (!memory_region_is_ram(area)) {
147
+ if (writeable) {
148
+ return;
149
+ } else if (!memory_region_is_romd(area)) {
150
+ /*
151
+ * If the memory device is not in romd_mode, then we actually want
152
+ * to remove the hvf memory slot so all accesses will trap.
153
+ */
154
+ add = false;
155
+ }
156
+ }
157
+
158
+ mem = hvf_find_overlap_slot(
159
+ section->offset_within_address_space,
160
+ int128_get64(section->size));
161
+
162
+ if (mem && add) {
163
+ if (mem->size == int128_get64(section->size) &&
164
+ mem->start == section->offset_within_address_space &&
165
+ mem->mem == (memory_region_get_ram_ptr(area) +
166
+ section->offset_within_region)) {
167
+ return; /* Same region was attempted to register, go away. */
168
+ }
169
+ }
170
+
171
+ /* Region needs to be reset. set the size to 0 and remap it. */
172
+ if (mem) {
173
+ mem->size = 0;
174
+ if (do_hvf_set_memory(mem, 0)) {
175
+ error_report("Failed to reset overlapping slot");
176
+ abort();
177
+ }
178
+ }
179
+
180
+ if (!add) {
181
+ return;
182
+ }
183
+
184
+ if (area->readonly ||
185
+ (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
186
+ flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
187
+ } else {
188
+ flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
189
+ }
190
+
191
+ /* Now make a new slot. */
192
+ int x;
193
+
194
+ for (x = 0; x < hvf_state->num_slots; ++x) {
195
+ mem = &hvf_state->slots[x];
196
+ if (!mem->size) {
197
+ break;
198
+ }
199
+ }
200
+
201
+ if (x == hvf_state->num_slots) {
202
+ error_report("No free slots");
203
+ abort();
204
+ }
205
+
206
+ mem->size = int128_get64(section->size);
207
+ mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
208
+ mem->start = section->offset_within_address_space;
209
+ mem->region = area;
210
+
211
+ if (do_hvf_set_memory(mem, flags)) {
212
+ error_report("Error registering new memory slot");
213
+ abort();
214
+ }
78
+ }
215
+}
79
+}
216
+
80
+
217
+static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
218
+{
83
+{
219
+ if (!cpu->vcpu_dirty) {
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
220
+ hvf_get_registers(cpu);
85
+ uint8_t *pn = vpn, *pm = vpm;
221
+ cpu->vcpu_dirty = true;
86
+ uint64_t *zda = vzda, *zn = vzn;
87
+
88
+ for (row = 0; row < oprsz; ++row) {
89
+ if (pn[H1(row)] & 1) {
90
+ for (col = 0; col < oprsz; ++col) {
91
+ if (pm[H1(col)] & 1) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
222
+ }
96
+ }
223
+}
97
+}
224
+
98
+
225
+void hvf_cpu_synchronize_state(CPUState *cpu)
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
226
+{
101
+{
227
+ if (!cpu->vcpu_dirty) {
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
228
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
103
+ uint64_t *pn = vpn, *pm = vpm;
104
+ uint32_t *zda = vzda, *zn = vzn;
105
+
106
+ for (row = 0; row < oprsz; ) {
107
+ uint64_t pa = pn[row >> 4];
108
+ do {
109
+ if (pa & 1) {
110
+ uint32_t zn_row = zn[H4(row)];
111
+ for (col = 0; col < oprsz; ) {
112
+ uint64_t pb = pm[col >> 4];
113
+ do {
114
+ if (pb & 1) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
116
+ }
117
+ pb >>= 4;
118
+ } while (++col & 15);
119
+ }
120
+ }
121
+ pa >>= 4;
122
+ } while (++row & 15);
229
+ }
123
+ }
230
+}
124
+}
231
+
125
+
232
+static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
233
+ run_on_cpu_data arg)
127
+ void *vpm, uint32_t desc)
234
+{
128
+{
235
+ hvf_put_registers(cpu);
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
236
+ cpu->vcpu_dirty = false;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
132
+
133
+ for (row = 0; row < oprsz; ++row) {
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
237
+}
177
+}
238
+
178
+
239
+void hvf_cpu_synchronize_post_reset(CPUState *cpu)
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
240
+{
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
241
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
242
+}
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
243
+
244
+static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
245
+ run_on_cpu_data arg)
246
+{
247
+ hvf_put_registers(cpu);
248
+ cpu->vcpu_dirty = false;
249
+}
250
+
251
+void hvf_cpu_synchronize_post_init(CPUState *cpu)
252
+{
253
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
254
+}
255
+
256
+static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
257
+ run_on_cpu_data arg)
258
+{
259
+ cpu->vcpu_dirty = true;
260
+}
261
+
262
+void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
263
+{
264
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
265
+}
266
+
267
+static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
268
+{
269
+ hvf_slot *slot;
270
+
271
+ slot = hvf_find_overlap_slot(
272
+ section->offset_within_address_space,
273
+ int128_get64(section->size));
274
+
275
+ /* protect region against writes; begin tracking it */
276
+ if (on) {
277
+ slot->flags |= HVF_SLOT_LOG;
278
+ hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
279
+ HV_MEMORY_READ);
280
+ /* stop tracking region*/
281
+ } else {
282
+ slot->flags &= ~HVF_SLOT_LOG;
283
+ hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
284
+ HV_MEMORY_READ | HV_MEMORY_WRITE);
285
+ }
286
+}
287
+
288
+static void hvf_log_start(MemoryListener *listener,
289
+ MemoryRegionSection *section, int old, int new)
290
+{
291
+ if (old != 0) {
292
+ return;
293
+ }
294
+
295
+ hvf_set_dirty_tracking(section, 1);
296
+}
297
+
298
+static void hvf_log_stop(MemoryListener *listener,
299
+ MemoryRegionSection *section, int old, int new)
300
+{
301
+ if (new != 0) {
302
+ return;
303
+ }
304
+
305
+ hvf_set_dirty_tracking(section, 0);
306
+}
307
+
308
+static void hvf_log_sync(MemoryListener *listener,
309
+ MemoryRegionSection *section)
310
+{
311
+ /*
312
+ * sync of dirty pages is handled elsewhere; just make sure we keep
313
+ * tracking the region.
314
+ */
315
+ hvf_set_dirty_tracking(section, 1);
316
+}
317
+
318
+static void hvf_region_add(MemoryListener *listener,
319
+ MemoryRegionSection *section)
320
+{
321
+ hvf_set_phys_mem(section, true);
322
+}
323
+
324
+static void hvf_region_del(MemoryListener *listener,
325
+ MemoryRegionSection *section)
326
+{
327
+ hvf_set_phys_mem(section, false);
328
+}
329
+
330
+static MemoryListener hvf_memory_listener = {
331
+ .priority = 10,
332
+ .region_add = hvf_region_add,
333
+ .region_del = hvf_region_del,
334
+ .log_start = hvf_log_start,
335
+ .log_stop = hvf_log_stop,
336
+ .log_sync = hvf_log_sync,
337
+};
338
+
339
+static void dummy_signal(int sig)
340
+{
341
+}
342
+
343
+bool hvf_allowed;
344
+
345
+static int hvf_accel_init(MachineState *ms)
346
+{
347
+ int x;
348
+ hv_return_t ret;
349
+ HVFState *s;
350
+
351
+ ret = hv_vm_create(HV_VM_DEFAULT);
352
+ assert_hvf_ok(ret);
353
+
354
+ s = g_new0(HVFState, 1);
355
+
356
+ s->num_slots = 32;
357
+ for (x = 0; x < s->num_slots; ++x) {
358
+ s->slots[x].size = 0;
359
+ s->slots[x].slot_id = x;
360
+ }
361
+
362
+ hvf_state = s;
363
+ memory_listener_register(&hvf_memory_listener, &address_space_memory);
364
+ return 0;
365
+}
366
+
367
+static void hvf_accel_class_init(ObjectClass *oc, void *data)
368
+{
369
+ AccelClass *ac = ACCEL_CLASS(oc);
370
+ ac->name = "HVF";
371
+ ac->init_machine = hvf_accel_init;
372
+ ac->allowed = &hvf_allowed;
373
+}
374
+
375
+static const TypeInfo hvf_accel_type = {
376
+ .name = TYPE_HVF_ACCEL,
377
+ .parent = TYPE_ACCEL,
378
+ .class_init = hvf_accel_class_init,
379
+};
380
+
381
+static void hvf_type_init(void)
382
+{
383
+ type_register_static(&hvf_accel_type);
384
+}
385
+
386
+type_init(hvf_type_init);
387
+
388
/*
389
* The HVF-specific vCPU thread function. This one should only run when the host
390
* CPU supports the VMX "unrestricted guest" feature.
391
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
392
index XXXXXXX..XXXXXXX 100644
393
--- a/target/i386/hvf/hvf.c
394
+++ b/target/i386/hvf/hvf.c
395
@@ -XXX,XX +XXX,XX @@
396
397
#include "hvf-accel-ops.h"
398
399
-HVFState *hvf_state;
400
-
401
-/* Memory slots */
402
-hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
403
-{
404
- hvf_slot *slot;
405
- int x;
406
- for (x = 0; x < hvf_state->num_slots; ++x) {
407
- slot = &hvf_state->slots[x];
408
- if (slot->size && start < (slot->start + slot->size) &&
409
- (start + size) > slot->start) {
410
- return slot;
411
- }
412
- }
413
- return NULL;
414
-}
415
-
416
-struct mac_slot {
417
- int present;
418
- uint64_t size;
419
- uint64_t gpa_start;
420
- uint64_t gva;
421
-};
422
-
423
-struct mac_slot mac_slots[32];
424
-
425
-static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
426
-{
427
- struct mac_slot *macslot;
428
- hv_return_t ret;
429
-
430
- macslot = &mac_slots[slot->slot_id];
431
-
432
- if (macslot->present) {
433
- if (macslot->size != slot->size) {
434
- macslot->present = 0;
435
- ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
436
- assert_hvf_ok(ret);
437
- }
438
- }
439
-
440
- if (!slot->size) {
441
- return 0;
442
- }
443
-
444
- macslot->present = 1;
445
- macslot->gpa_start = slot->start;
446
- macslot->size = slot->size;
447
- ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
448
- assert_hvf_ok(ret);
449
- return 0;
450
-}
451
-
452
-void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
453
-{
454
- hvf_slot *mem;
455
- MemoryRegion *area = section->mr;
456
- bool writeable = !area->readonly && !area->rom_device;
457
- hv_memory_flags_t flags;
458
-
459
- if (!memory_region_is_ram(area)) {
460
- if (writeable) {
461
- return;
462
- } else if (!memory_region_is_romd(area)) {
463
- /*
464
- * If the memory device is not in romd_mode, then we actually want
465
- * to remove the hvf memory slot so all accesses will trap.
466
- */
467
- add = false;
468
- }
469
- }
470
-
471
- mem = hvf_find_overlap_slot(
472
- section->offset_within_address_space,
473
- int128_get64(section->size));
474
-
475
- if (mem && add) {
476
- if (mem->size == int128_get64(section->size) &&
477
- mem->start == section->offset_within_address_space &&
478
- mem->mem == (memory_region_get_ram_ptr(area) +
479
- section->offset_within_region)) {
480
- return; /* Same region was attempted to register, go away. */
481
- }
482
- }
483
-
484
- /* Region needs to be reset. set the size to 0 and remap it. */
485
- if (mem) {
486
- mem->size = 0;
487
- if (do_hvf_set_memory(mem, 0)) {
488
- error_report("Failed to reset overlapping slot");
489
- abort();
490
- }
491
- }
492
-
493
- if (!add) {
494
- return;
495
- }
496
-
497
- if (area->readonly ||
498
- (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
499
- flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
500
- } else {
501
- flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
502
- }
503
-
504
- /* Now make a new slot. */
505
- int x;
506
-
507
- for (x = 0; x < hvf_state->num_slots; ++x) {
508
- mem = &hvf_state->slots[x];
509
- if (!mem->size) {
510
- break;
511
- }
512
- }
513
-
514
- if (x == hvf_state->num_slots) {
515
- error_report("No free slots");
516
- abort();
517
- }
518
-
519
- mem->size = int128_get64(section->size);
520
- mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
521
- mem->start = section->offset_within_address_space;
522
- mem->region = area;
523
-
524
- if (do_hvf_set_memory(mem, flags)) {
525
- error_report("Error registering new memory slot");
526
- abort();
527
- }
528
-}
529
-
530
void vmx_update_tpr(CPUState *cpu)
531
{
532
/* TODO: need integrate APIC handling */
533
@@ -XXX,XX +XXX,XX @@ void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
534
}
535
}
536
537
-static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
538
-{
539
- if (!cpu->vcpu_dirty) {
540
- hvf_get_registers(cpu);
541
- cpu->vcpu_dirty = true;
542
- }
543
-}
544
-
545
-void hvf_cpu_synchronize_state(CPUState *cpu)
546
-{
547
- if (!cpu->vcpu_dirty) {
548
- run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
549
- }
550
-}
551
-
552
-static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
553
- run_on_cpu_data arg)
554
-{
555
- hvf_put_registers(cpu);
556
- cpu->vcpu_dirty = false;
557
-}
558
-
559
-void hvf_cpu_synchronize_post_reset(CPUState *cpu)
560
-{
561
- run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
562
-}
563
-
564
-static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
565
- run_on_cpu_data arg)
566
-{
567
- hvf_put_registers(cpu);
568
- cpu->vcpu_dirty = false;
569
-}
570
-
571
-void hvf_cpu_synchronize_post_init(CPUState *cpu)
572
-{
573
- run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
574
-}
575
-
576
-static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
577
- run_on_cpu_data arg)
578
-{
579
- cpu->vcpu_dirty = true;
580
-}
581
-
582
-void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
583
-{
584
- run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
585
-}
586
-
587
static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
588
{
589
int read, write;
590
@@ -XXX,XX +XXX,XX @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
591
return false;
592
}
593
594
-static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
595
-{
596
- hvf_slot *slot;
597
-
598
- slot = hvf_find_overlap_slot(
599
- section->offset_within_address_space,
600
- int128_get64(section->size));
601
-
602
- /* protect region against writes; begin tracking it */
603
- if (on) {
604
- slot->flags |= HVF_SLOT_LOG;
605
- hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
606
- HV_MEMORY_READ);
607
- /* stop tracking region*/
608
- } else {
609
- slot->flags &= ~HVF_SLOT_LOG;
610
- hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
611
- HV_MEMORY_READ | HV_MEMORY_WRITE);
612
- }
613
-}
614
-
615
-static void hvf_log_start(MemoryListener *listener,
616
- MemoryRegionSection *section, int old, int new)
617
-{
618
- if (old != 0) {
619
- return;
620
- }
621
-
622
- hvf_set_dirty_tracking(section, 1);
623
-}
624
-
625
-static void hvf_log_stop(MemoryListener *listener,
626
- MemoryRegionSection *section, int old, int new)
627
-{
628
- if (new != 0) {
629
- return;
630
- }
631
-
632
- hvf_set_dirty_tracking(section, 0);
633
-}
634
-
635
-static void hvf_log_sync(MemoryListener *listener,
636
- MemoryRegionSection *section)
637
-{
638
- /*
639
- * sync of dirty pages is handled elsewhere; just make sure we keep
640
- * tracking the region.
641
- */
642
- hvf_set_dirty_tracking(section, 1);
643
-}
644
-
645
-static void hvf_region_add(MemoryListener *listener,
646
- MemoryRegionSection *section)
647
-{
648
- hvf_set_phys_mem(section, true);
649
-}
650
-
651
-static void hvf_region_del(MemoryListener *listener,
652
- MemoryRegionSection *section)
653
-{
654
- hvf_set_phys_mem(section, false);
655
-}
656
-
657
-static MemoryListener hvf_memory_listener = {
658
- .priority = 10,
659
- .region_add = hvf_region_add,
660
- .region_del = hvf_region_del,
661
- .log_start = hvf_log_start,
662
- .log_stop = hvf_log_stop,
663
- .log_sync = hvf_log_sync,
664
-};
665
-
666
void hvf_vcpu_destroy(CPUState *cpu)
667
{
668
X86CPU *x86_cpu = X86_CPU(cpu);
669
@@ -XXX,XX +XXX,XX @@ void hvf_vcpu_destroy(CPUState *cpu)
670
assert_hvf_ok(ret);
671
}
672
673
-static void dummy_signal(int sig)
674
-{
675
-}
676
-
677
static void init_tsc_freq(CPUX86State *env)
678
{
679
size_t length;
680
@@ -XXX,XX +XXX,XX @@ int hvf_vcpu_exec(CPUState *cpu)
681
682
return ret;
683
}
684
-
685
-bool hvf_allowed;
686
-
687
-static int hvf_accel_init(MachineState *ms)
688
-{
689
- int x;
690
- hv_return_t ret;
691
- HVFState *s;
692
-
693
- ret = hv_vm_create(HV_VM_DEFAULT);
694
- assert_hvf_ok(ret);
695
-
696
- s = g_new0(HVFState, 1);
697
-
698
- s->num_slots = 32;
699
- for (x = 0; x < s->num_slots; ++x) {
700
- s->slots[x].size = 0;
701
- s->slots[x].slot_id = x;
702
- }
703
-
704
- hvf_state = s;
705
- memory_listener_register(&hvf_memory_listener, &address_space_memory);
706
- return 0;
707
-}
708
-
709
-static void hvf_accel_class_init(ObjectClass *oc, void *data)
710
-{
711
- AccelClass *ac = ACCEL_CLASS(oc);
712
- ac->name = "HVF";
713
- ac->init_machine = hvf_accel_init;
714
- ac->allowed = &hvf_allowed;
715
-}
716
-
717
-static const TypeInfo hvf_accel_type = {
718
- .name = TYPE_HVF_ACCEL,
719
- .parent = TYPE_ACCEL,
720
- .class_init = hvf_accel_class_init,
721
-};
722
-
723
-static void hvf_type_init(void)
724
-{
725
- type_register_static(&hvf_accel_type);
726
-}
727
-
728
-type_init(hvf_type_init);
729
--
183
--
730
2.20.1
184
2.25.1
731
732
diff view generated by jsdifflib
1
Coverity points out that we calculate a 64-bit value using 32-bit
1
From: Richard Henderson <richard.henderson@linaro.org>
2
arithmetic; add the cast to force the multiply to be done as 64-bits.
3
(The overflow will never happen with the current test data.)
4
2
5
Fixes: Coverity CID 1432320
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
9
Message-id: 20210525134458.6675-5-peter.maydell@linaro.org
10
---
7
---
11
tests/qtest/pflash-cfi02-test.c | 2 +-
8
target/arm/helper-sme.h | 5 +++
12
1 file changed, 1 insertion(+), 1 deletion(-)
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
13
13
14
diff --git a/tests/qtest/pflash-cfi02-test.c b/tests/qtest/pflash-cfi02-test.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qtest/pflash-cfi02-test.c
16
--- a/target/arm/helper-sme.h
17
+++ b/tests/qtest/pflash-cfi02-test.c
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ static void test_geometry(const void *opaque)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
for (int region = 0; region < nb_erase_regions; ++region) {
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
for (uint32_t i = 0; i < c->nb_blocs[region]; ++i) {
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
- uint64_t byte_addr = i * c->sector_len[region];
22
+
23
+ uint64_t byte_addr = (uint64_t)i * c->sector_len[region];
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
24
g_assert_cmphex(flash_read(c, byte_addr), ==, bank_mask(c));
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
25
}
57
}
26
}
58
}
59
}
60
+
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
62
+ void *vpm, void *vst, uint32_t desc)
63
+{
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
65
+ uint32_t neg = simd_data(desc) << 31;
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+ float_status fpst;
68
+
69
+ /*
70
+ * Make a copy of float_status because this operation does not
71
+ * update the cumulative fp exception status. It also produces
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
27
--
168
--
28
2.20.1
169
2.25.1
29
30
diff view generated by jsdifflib
1
Add the isar feature check functions we will need for v8.1M MVE:
1
From: Richard Henderson <richard.henderson@linaro.org>
2
* a check for MVE present: this corresponds to the pseudocode's
3
CheckDecodeFaults(ExtType_Mve)
4
* a check for the optional floating-point part of MVE: this
5
corresponds to CheckDecodeFaults(ExtType_MveFp)
6
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-id: 20210520152840.24453-2-peter.maydell@linaro.org
10
---
7
---
11
target/arm/cpu.h | 22 ++++++++++++++++++++++
8
target/arm/helper-sme.h | 2 ++
12
1 file changed, 22 insertions(+)
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
13
13
14
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/cpu.h
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/cpu.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
19
}
40
}
20
}
41
}
21
42
+
22
+static inline bool isar_feature_aa32_mve(const ARMISARegisters *id)
43
+/*
44
+ * Alter PAIR as needed for controlling predicates being false,
45
+ * and for NEG on an enabled row element.
46
+ */
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
23
+{
48
+{
24
+ /*
49
+ /*
25
+ * Return true if MVE is supported (either integer or floating point).
50
+ * The pseudocode uses a conditional negate after the conditional zero.
26
+ * We must check for M-profile as the MVFR1 field means something
51
+ * It is simpler here to unconditionally negate before conditional zero.
27
+ * else for A-profile.
28
+ */
52
+ */
29
+ return isar_feature_aa32_mprofile(id) &&
53
+ pair ^= neg;
30
+ FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
31
+}
61
+}
32
+
62
+
33
+static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id)
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
64
+ void *vpm, uint32_t desc)
34
+{
65
+{
35
+ /*
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
36
+ * Return true if MVE is supported (either integer or floating point).
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
37
+ * We must check for M-profile as the MVFR1 field means something
68
+ uint16_t *pn = vpn, *pm = vpm;
38
+ * else for A-profile.
69
+
39
+ */
70
+ for (row = 0; row < oprsz; ) {
40
+ return isar_feature_aa32_mprofile(id) &&
71
+ uint16_t prow = pn[H2(row >> 4)];
41
+ FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2;
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
114
+ return true;
115
+ }
116
+
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
42
+}
131
+}
43
+
132
+
44
static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id)
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
45
{
135
{
46
/*
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
137
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
140
+
141
+/* TODO: FEAT_EBF16 */
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
47
--
143
--
48
2.20.1
144
2.25.1
49
50
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Until now, Hypervisor.framework has only been available on x86_64 systems.
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
With Apple Silicon shipping now, it extends its reach to aarch64. To
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
5
prepare for support for multiple architectures, let's start moving common
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
code out into its own accel directory.
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
7
13
8
This patch splits the vcpu init and destroy functions into a generic and
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
9
an architecture specific portion. This also allows us to move the generic
10
functions into the generic hvf code, removing exported functions.
11
12
Signed-off-by: Alexander Graf <agraf@csgraf.de>
13
Reviewed-by: Sergio Lopez <slp@redhat.com>
14
Message-id: 20210519202253.76782-8-agraf@csgraf.de
15
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
accel/hvf/hvf-accel-ops.h | 2 --
19
include/sysemu/hvf_int.h | 2 ++
20
accel/hvf/hvf-accel-ops.c | 30 ++++++++++++++++++++++++++++++
21
target/i386/hvf/hvf.c | 23 ++---------------------
22
4 files changed, 34 insertions(+), 23 deletions(-)
23
24
diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h
25
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/hvf/hvf-accel-ops.h
16
--- a/target/arm/helper-sme.h
27
+++ b/accel/hvf/hvf-accel-ops.h
17
+++ b/target/arm/helper-sme.h
28
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
#include "sysemu/cpus.h"
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
21
32
-int hvf_init_vcpu(CPUState *);
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
33
int hvf_vcpu_exec(CPUState *);
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
34
void hvf_cpu_synchronize_state(CPUState *);
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
35
void hvf_cpu_synchronize_post_reset(CPUState *);
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
36
void hvf_cpu_synchronize_post_init(CPUState *);
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
37
void hvf_cpu_synchronize_pre_loadvm(CPUState *);
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
38
-void hvf_vcpu_destroy(CPUState *);
39
40
#endif /* HVF_CPUS_H */
41
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
42
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
43
--- a/include/sysemu/hvf_int.h
29
--- a/target/arm/sme.decode
44
+++ b/include/sysemu/hvf_int.h
30
+++ b/target/arm/sme.decode
45
@@ -XXX,XX +XXX,XX @@ struct HVFState {
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
46
extern HVFState *hvf_state;
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
47
33
48
void assert_hvf_ok(hv_return_t ret);
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
49
+int hvf_arch_init_vcpu(CPUState *cpu);
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
50
+void hvf_arch_vcpu_destroy(CPUState *cpu);
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
51
hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
52
int hvf_put_registers(CPUState *);
53
int hvf_get_registers(CPUState *);
54
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
55
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
56
--- a/accel/hvf/hvf-accel-ops.c
38
--- a/target/arm/sme_helper.c
57
+++ b/accel/hvf/hvf-accel-ops.c
39
+++ b/target/arm/sme_helper.c
58
@@ -XXX,XX +XXX,XX @@ static void hvf_type_init(void)
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
59
41
return pair;
60
type_init(hvf_type_init);
42
}
61
43
62
+static void hvf_vcpu_destroy(CPUState *cpu)
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
45
+ float_status *s_std, float_status *s_odd)
63
+{
46
+{
64
+ hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd);
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
65
+ assert_hvf_ok(ret);
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
66
+
53
+
67
+ hvf_arch_vcpu_destroy(cpu);
54
+ /*
55
+ * The ARM pseudocode function FPDot performs both multiplies
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
68
+}
69
+}
69
+
70
+
70
+static int hvf_init_vcpu(CPUState *cpu)
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
71
+{
73
+{
72
+ int r;
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
73
+
78
+
74
+ /* init cpu signals */
79
+ /*
75
+ sigset_t set;
80
+ * Make a copy of float_status because this operation does not
76
+ struct sigaction sigact;
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
77
+
88
+
78
+ memset(&sigact, 0, sizeof(sigact));
89
+ for (row = 0; row < oprsz; ) {
79
+ sigact.sa_handler = dummy_signal;
90
+ uint16_t prow = pn[H2(row >> 4)];
80
+ sigaction(SIG_IPI, &sigact, NULL);
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
81
+
94
+
82
+ pthread_sigmask(SIG_BLOCK, NULL, &set);
95
+ n = f16mop_adj_pair(n, prow, neg);
83
+ sigdelset(&set, SIG_IPI);
84
+
96
+
85
+ r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
97
+ for (col = 0; col < oprsz; ) {
86
+ cpu->vcpu_dirty = 1;
98
+ uint16_t pcol = pm[H2(col >> 4)];
87
+ assert_hvf_ok(r);
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
88
+
103
+
89
+ return hvf_arch_init_vcpu(cpu);
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
90
+}
116
+}
91
+
117
+
92
/*
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
93
* The HVF-specific vCPU thread function. This one should only run when the host
119
void *vpm, uint32_t desc)
94
* CPU supports the VMX "unrestricted guest" feature.
120
{
95
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
96
index XXXXXXX..XXXXXXX 100644
122
index XXXXXXX..XXXXXXX 100644
97
--- a/target/i386/hvf/hvf.c
123
--- a/target/arm/translate-sme.c
98
+++ b/target/i386/hvf/hvf.c
124
+++ b/target/arm/translate-sme.c
99
@@ -XXX,XX +XXX,XX @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
100
return false;
126
return true;
101
}
127
}
102
128
103
-void hvf_vcpu_destroy(CPUState *cpu)
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
104
+void hvf_arch_vcpu_destroy(CPUState *cpu)
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
105
{
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
106
X86CPU *x86_cpu = X86_CPU(cpu);
132
107
CPUX86State *env = &x86_cpu->env;
108
109
- hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
110
g_free(env->hvf_mmio_buf);
111
- assert_hvf_ok(ret);
112
}
113
114
static void init_tsc_freq(CPUX86State *env)
115
@@ -XXX,XX +XXX,XX @@ static inline bool apic_bus_freq_is_known(CPUX86State *env)
116
return env->apic_bus_freq != 0;
117
}
118
119
-int hvf_init_vcpu(CPUState *cpu)
120
+int hvf_arch_init_vcpu(CPUState *cpu)
121
{
122
-
123
X86CPU *x86cpu = X86_CPU(cpu);
124
CPUX86State *env = &x86cpu->env;
125
- int r;
126
-
127
- /* init cpu signals */
128
- sigset_t set;
129
- struct sigaction sigact;
130
-
131
- memset(&sigact, 0, sizeof(sigact));
132
- sigact.sa_handler = dummy_signal;
133
- sigaction(SIG_IPI, &sigact, NULL);
134
-
135
- pthread_sigmask(SIG_BLOCK, NULL, &set);
136
- sigdelset(&set, SIG_IPI);
137
138
init_emu();
139
init_decoder();
140
@@ -XXX,XX +XXX,XX @@ int hvf_init_vcpu(CPUState *cpu)
141
}
142
}
143
144
- r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
145
- cpu->vcpu_dirty = 1;
146
- assert_hvf_ok(r);
147
-
148
if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
149
&hvf_state->hvf_caps->vmx_cap_pinbased)) {
150
abort();
151
--
133
--
152
2.20.1
134
2.25.1
153
154
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Note that the SVE BFLOAT16 support does not require SVE2,
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
it is an independent extension.
5
4
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-2-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/cpu.h | 15 +++++++++++++++
10
target/arm/helper-sme.h | 16 ++++++++
12
1 file changed, 15 insertions(+)
11
target/arm/sme.decode | 10 +++++
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
13
15
14
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/cpu.h
18
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/cpu.h
19
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id)
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
19
return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0;
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, ptr, i32)
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
48
+
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
53
+
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/sme_helper.c
61
+++ b/target/arm/sme_helper.c
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
63
} while (row & 15);
64
}
20
}
65
}
21
66
+
22
+static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id)
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
68
+
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
23
+{
72
+{
24
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0;
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
75
+
76
+ for (row = 0; row < oprsz; ++row) {
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
80
+
81
+ for (col = 0; col < oprsz; ++col) {
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
86
+ }
87
+ }
25
+}
88
+}
26
+
89
+
27
static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id)
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
28
{
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
29
return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0;
92
+{ \
30
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
93
+ uint32_t sum0 = 0, sum1 = 0; \
31
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2;
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
32
}
95
+ n &= expand_pred_b(p); \
33
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
34
+static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id)
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
35
+{
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
36
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
37
+}
110
+}
38
+
111
+
39
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
40
{
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
41
/* We always set the AdvSIMD and FP fields identically. */
114
+{ \
42
@@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
115
+ uint64_t sum = 0; \
43
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
44
}
117
+ n &= expand_pred_h(p); \
45
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
46
+static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id)
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
47
+{
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
48
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0;
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
49
+}
123
+}
50
+
124
+
51
static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id)
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
52
{
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
53
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0;
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
129
+
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
134
+
135
+#define DEF_IMOPH(NAME) \
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
137
+ void *vpm, uint32_t desc) \
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
139
+
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
54
--
166
--
55
2.20.1
167
2.25.1
56
57
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE,
3
This is an SVE instruction that operates using the SVE vector
4
and VFMA{B,T}.BF16 for AArch32 NEON.
4
length but that it is present only if SME is implemented.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-11-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.h | 2 ++
11
target/arm/sve.decode | 20 +++++++++++++
12
target/arm/neon-shared.decode | 2 ++
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
13
target/arm/sve.decode | 2 ++
13
2 files changed, 77 insertions(+)
14
target/arm/translate-a64.c | 15 ++++++++++++++-
15
target/arm/translate-neon.c | 10 ++++++++++
16
target/arm/translate-sve.c | 30 ++++++++++++++++++++++++++++++
17
target/arm/vec_helper.c | 22 ++++++++++++++++++++++
18
7 files changed, 82 insertions(+), 1 deletion(-)
19
14
20
diff --git a/target/arm/helper.h b/target/arm/helper.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.h
23
+++ b/target/arm/helper.h
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
25
26
DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
27
void, ptr, ptr, ptr, ptr, ptr, i32)
28
+DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
31
#ifdef TARGET_AARCH64
32
#include "helper-a64.h"
33
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/neon-shared.decode
36
+++ b/target/arm/neon-shared.decode
37
@@ -XXX,XX +XXX,XX @@ VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
38
rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
39
VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
40
index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
41
+VFMA_b16_scal 1111 1110 0.11 .... .... 1000 . q:1 . 1 . vm:3 \
42
+ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp
43
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
44
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/sve.decode
17
--- a/target/arm/sve.decode
46
+++ b/target/arm/sve.decode
18
+++ b/target/arm/sve.decode
47
@@ -XXX,XX +XXX,XX @@ FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
48
FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
49
FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
50
FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
51
+BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
52
+BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
53
20
54
### SVE2 floating-point bfloat16 dot-product (indexed)
21
### SVE2 floating-point bfloat16 dot-product (indexed)
55
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
56
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/translate-a64.c
59
+++ b/target/arm/translate-a64.c
60
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
61
unallocated_encoding(s);
62
return;
63
}
64
+ size = MO_32;
65
break;
66
case 1: /* BFDOT */
67
if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
68
unallocated_encoding(s);
69
return;
70
}
71
+ size = MO_32;
72
+ break;
73
+ case 3: /* BFMLAL{B,T} */
74
+ if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
75
+ unallocated_encoding(s);
76
+ return;
77
+ }
78
+ /* can't set is_fp without other incorrect size checks */
79
+ size = MO_16;
80
break;
81
default:
82
unallocated_encoding(s);
83
return;
84
}
85
- size = MO_32;
86
break;
87
case 0x11: /* FCMLA #0 */
88
case 0x13: /* FCMLA #90 */
89
@@ -XXX,XX +XXX,XX @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
90
gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
91
gen_helper_gvec_usdot_idx_b);
92
return;
93
+ case 3: /* BFMLAL{B,T} */
94
+ gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
95
+ gen_helper_gvec_bfmlal_idx);
96
+ return;
97
}
98
g_assert_not_reached();
99
case 0x11: /* FCMLA #0 */
100
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/arm/translate-neon.c
103
+++ b/target/arm/translate-neon.c
104
@@ -XXX,XX +XXX,XX @@ static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
105
return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
106
gen_helper_gvec_bfmlal);
107
}
108
+
23
+
109
+static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
24
+### SVE broadcast predicate element
110
+{
25
+
111
+ if (!dc_isar_feature(aa32_bf16, s)) {
26
+&psel esz pd pn pm rv imm
112
+ return false;
27
+%psel_rv 16:2 !function=plus_12
113
+ }
28
+%psel_imm_b 22:2 19:2
114
+ return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
29
+%psel_imm_h 22:2 20:1
115
+ (a->index << 1) | a->q, FPST_STD,
30
+%psel_imm_s 22:2
116
+ gen_helper_gvec_bfmlal_idx);
31
+%psel_imm_d 23:1
117
+}
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
118
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
119
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
120
--- a/target/arm/translate-sve.c
45
--- a/target/arm/translate-sve.c
121
+++ b/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
122
@@ -XXX,XX +XXX,XX @@ static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
123
{
48
124
return do_BFMLAL_zzzw(s, a, true);
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
125
}
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
126
+
51
+
127
+static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
128
+{
53
+{
129
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
130
+ return false;
61
+ return false;
131
+ }
62
+ }
132
+ if (sve_access_check(s)) {
63
+ if (!sve_access_check(s)) {
133
+ TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
64
+ return true;
134
+ unsigned vsz = vec_full_reg_size(s);
65
+ }
135
+
66
+
136
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
67
+ tmp = tcg_temp_new_i64();
137
+ vec_full_reg_offset(s, a->rn),
68
+ dbit = tcg_temp_new_i64();
138
+ vec_full_reg_offset(s, a->rm),
69
+ didx = tcg_temp_new_i64();
139
+ vec_full_reg_offset(s, a->ra),
70
+ ptr = tcg_temp_new_ptr();
140
+ status, vsz, vsz, (a->index << 1) | sel,
71
+
141
+ gen_helper_gvec_bfmlal_idx);
72
+ /* Compute the predicate element. */
142
+ tcg_temp_free_ptr(status);
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
74
+ if (is_power_of_2(elements)) {
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
143
+ }
78
+ }
79
+
80
+ /* Extract the predicate byte and bit indices. */
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
86
+ }
87
+
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
144
+ return true;
106
+ return true;
145
+}
107
+}
146
+
147
+static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
148
+{
149
+ return do_BFMLAL_zzxw(s, a, false);
150
+}
151
+
152
+static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
153
+{
154
+ return do_BFMLAL_zzxw(s, a, true);
155
+}
156
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
157
index XXXXXXX..XXXXXXX 100644
158
--- a/target/arm/vec_helper.c
159
+++ b/target/arm/vec_helper.c
160
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
161
}
162
clear_tail(d, opr_sz, simd_maxsz(desc));
163
}
164
+
165
+void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
166
+ void *va, void *stat, uint32_t desc)
167
+{
168
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
169
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
170
+ intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3);
171
+ intptr_t elements = opr_sz / 4;
172
+ intptr_t eltspersegment = MIN(16 / 4, elements);
173
+ float32 *d = vd, *a = va;
174
+ bfloat16 *n = vn, *m = vm;
175
+
176
+ for (i = 0; i < elements; i += eltspersegment) {
177
+ float32 m_idx = m[H2(2 * i + index)] << 16;
178
+
179
+ for (j = i; j < i + eltspersegment; j++) {
180
+ float32 n_j = n[H2(2 * j + sel)] << 16;
181
+ d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat);
182
+ }
183
+ }
184
+ clear_tail(d, opr_sz, simd_maxsz(desc));
185
+}
186
--
108
--
187
2.20.1
109
2.25.1
188
189
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFCVT{N,T} for both AArch64 AdvSIMD and SVE,
3
This is an SVE instruction that operates using the SVE vector
4
and VCVT.BF16.F32 for AArch32 NEON.
4
length but that it is present only if SME is implemented.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-5-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper-sve.h | 4 ++++
11
target/arm/helper-sve.h | 2 ++
12
target/arm/helper.h | 1 +
12
target/arm/sve.decode | 1 +
13
target/arm/neon-dp.decode | 1 +
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/sve.decode | 2 ++
14
target/arm/translate-sve.c | 2 ++
15
target/arm/sve_helper.c | 2 ++
15
4 files changed, 21 insertions(+)
16
target/arm/translate-a64.c | 17 ++++++++++++++
17
target/arm/translate-neon.c | 45 +++++++++++++++++++++++++++++++++++++
18
target/arm/translate-sve.c | 16 +++++++++++++
19
target/arm/vfp_helper.c | 7 ++++++
20
9 files changed, 95 insertions(+)
21
16
22
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
23
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
24
--- a/target/arm/helper-sve.h
19
--- a/target/arm/helper-sve.h
25
+++ b/target/arm/helper-sve.h
20
+++ b/target/arm/helper-sve.h
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG,
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
27
void, ptr, ptr, ptr, ptr, i32)
22
28
DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG,
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
void, ptr, ptr, ptr, ptr, i32)
24
30
+DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG,
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
+ void, ptr, ptr, ptr, ptr, i32)
26
+
32
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG,
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
void, ptr, ptr, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG,
36
void, ptr, ptr, ptr, ptr, i32)
37
DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG,
38
void, ptr, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG,
40
+ void, ptr, ptr, ptr, ptr, i32)
41
42
DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG,
43
void, ptr, ptr, ptr, ptr, i32)
44
diff --git a/target/arm/helper.h b/target/arm/helper.h
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/helper.h
47
+++ b/target/arm/helper.h
48
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
49
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
50
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
51
DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr)
52
+DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr)
53
54
DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
55
DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
56
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
57
index XXXXXXX..XXXXXXX 100644
58
--- a/target/arm/neon-dp.decode
59
+++ b/target/arm/neon-dp.decode
60
@@ -XXX,XX +XXX,XX @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
61
VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
62
63
VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
64
+ VCVT_B16_F32 1111 001 11 . 11 .. 10 .... 0 1100 1 . 0 .... @2misc_q0
65
66
VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
67
68
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
69
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
70
--- a/target/arm/sve.decode
32
--- a/target/arm/sve.decode
71
+++ b/target/arm/sve.decode
33
+++ b/target/arm/sve.decode
72
@@ -XXX,XX +XXX,XX @@ FNMLS_zpzzz 01100101 .. 1 ..... 111 ... ..... ..... @rdn_pg_rm_ra
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
73
# SVE floating-point convert precision
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
74
FCVT_sh 01100101 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
75
FCVT_hs 01100101 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
76
+BFCVT 01100101 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
77
FCVT_dh 01100101 11 0010 00 101 ... ..... ..... @rd_pg_rn_e0
39
78
FCVT_hd 01100101 11 0010 01 101 ... ..... ..... @rd_pg_rn_e0
40
# SVE vector splice (predicated, destructive)
79
FCVT_ds 01100101 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
80
@@ -XXX,XX +XXX,XX @@ RAX1 01000101 00 1 ..... 11110 1 ..... ..... @rd_rn_rm_e0
81
FCVTXNT_ds 01100100 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0
82
FCVTX_ds 01100101 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0
83
FCVTNT_sh 01100100 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0
84
+BFCVTNT 01100100 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0
85
FCVTLT_hs 01100100 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0
86
FCVTNT_ds 01100100 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0
87
FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0
88
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
89
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
90
--- a/target/arm/sve_helper.c
44
--- a/target/arm/sve_helper.c
91
+++ b/target/arm/sve_helper.c
45
+++ b/target/arm/sve_helper.c
92
@@ -XXX,XX +XXX,XX @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
93
47
94
DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
95
DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
49
96
+DO_ZPZ_FP(sve_bfcvt, uint32_t, H1_4, float32_to_bfloat16)
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
97
DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16)
98
DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64)
99
DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32)
100
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
101
} while (i != 0); \
102
}
103
104
+DO_FCVTNT(sve_bfcvtnt, uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16)
105
DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
106
DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, , H1_4, float64_to_float32)
107
108
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/translate-a64.c
111
+++ b/target/arm/translate-a64.c
112
@@ -XXX,XX +XXX,XX @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
113
tcg_temp_free_i32(ahp);
114
}
115
break;
116
+ case 0x36: /* BFCVTN, BFCVTN2 */
117
+ {
118
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
119
+ gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
120
+ tcg_temp_free_ptr(fpst);
121
+ }
122
+ break;
123
case 0x56: /* FCVTXN, FCVTXN2 */
124
/* 64 bit to 32 bit float conversion
125
* with von Neumann rounding (round to odd)
126
@@ -XXX,XX +XXX,XX @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
127
}
128
handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
129
return;
130
+ case 0x36: /* BFCVTN, BFCVTN2 */
131
+ if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
132
+ unallocated_encoding(s);
133
+ return;
134
+ }
135
+ if (!fp_access_check(s)) {
136
+ return;
137
+ }
138
+ handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
139
+ return;
140
case 0x17: /* FCVTL, FCVTL2 */
141
if (!fp_access_check(s)) {
142
return;
143
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/target/arm/translate-neon.c
146
+++ b/target/arm/translate-neon.c
147
@@ -XXX,XX +XXX,XX @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
148
return true;
149
}
150
151
+static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
152
+{
51
+{
153
+ TCGv_ptr fpst;
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
154
+ TCGv_i64 tmp;
53
+ uint64_t *d = vd, *n = vn;
155
+ TCGv_i32 dst0, dst1;
54
+ uint8_t *pg = vg;
156
+
55
+
157
+ if (!dc_isar_feature(aa32_bf16, s)) {
56
+ for (i = 0; i < opr_sz; i += 2) {
158
+ return false;
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
159
+ }
63
+ }
160
+
161
+ /* UNDEF accesses to D16-D31 if they don't exist. */
162
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
163
+ ((a->vd | a->vm) & 0x10)) {
164
+ return false;
165
+ }
166
+
167
+ if ((a->vm & 1) || (a->size != 1)) {
168
+ return false;
169
+ }
170
+
171
+ if (!vfp_access_check(s)) {
172
+ return true;
173
+ }
174
+
175
+ fpst = fpstatus_ptr(FPST_STD);
176
+ tmp = tcg_temp_new_i64();
177
+ dst0 = tcg_temp_new_i32();
178
+ dst1 = tcg_temp_new_i32();
179
+
180
+ read_neon_element64(tmp, a->vm, 0, MO_64);
181
+ gen_helper_bfcvt_pair(dst0, tmp, fpst);
182
+
183
+ read_neon_element64(tmp, a->vm, 1, MO_64);
184
+ gen_helper_bfcvt_pair(dst1, tmp, fpst);
185
+
186
+ write_neon_element32(dst0, a->vd, 0, MO_32);
187
+ write_neon_element32(dst1, a->vd, 1, MO_32);
188
+
189
+ tcg_temp_free_i64(tmp);
190
+ tcg_temp_free_i32(dst0);
191
+ tcg_temp_free_i32(dst1);
192
+ tcg_temp_free_ptr(fpst);
193
+ return true;
194
+}
64
+}
195
+
65
+
196
static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
197
{
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
198
TCGv_ptr fpst;
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
199
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
200
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
201
--- a/target/arm/translate-sve.c
71
--- a/target/arm/translate-sve.c
202
+++ b/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
203
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
204
return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
205
}
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
206
76
207
+static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
208
+{
209
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
210
+ return false;
211
+ }
212
+ return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
213
+}
214
+
78
+
215
static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
216
{
80
gen_helper_sve_splice, a, a->esz)
217
return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
81
218
@@ -XXX,XX +XXX,XX @@ static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
219
return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
220
}
221
222
+static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
223
+{
224
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
225
+ return false;
226
+ }
227
+ return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
228
+}
229
+
230
static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
231
{
232
if (!dc_isar_feature(aa64_sve2, s)) {
233
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
234
index XXXXXXX..XXXXXXX 100644
235
--- a/target/arm/vfp_helper.c
236
+++ b/target/arm/vfp_helper.c
237
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(bfcvt)(float32 x, void *status)
238
return float32_to_bfloat16(x, status);
239
}
240
241
+uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
242
+{
243
+ bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
244
+ bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
245
+ return deposit32(lo, 16, 16, hi);
246
+}
247
+
248
/*
249
* VFP3 fixed point conversion. The AArch32 versions of fix-to-float
250
* must always round-to-nearest; the AArch64 ones honour the FPSCR
251
--
82
--
252
2.20.1
83
2.25.1
253
254
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is BFMMLA for both AArch64 AdvSIMD and SVE,
3
This is an SVE instruction that operates using the SVE vector
4
and VMMLA.BF16 for AArch32 NEON.
4
length but that it is present only if SME is implemented.
5
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20210525225817.400336-9-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.h | 3 +++
11
target/arm/helper.h | 18 +++++++
12
target/arm/neon-shared.decode | 2 ++
12
target/arm/sve.decode | 5 ++
13
target/arm/sve.decode | 6 +++--
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/translate-a64.c | 10 +++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
target/arm/translate-neon.c | 9 ++++++++
15
4 files changed, 149 insertions(+)
16
target/arm/translate-sve.c | 12 ++++++++++
17
target/arm/vec_helper.c | 42 ++++++++++++++++++++++++++++++++++-
18
7 files changed, 81 insertions(+), 3 deletions(-)
19
16
20
diff --git a/target/arm/helper.h b/target/arm/helper.h
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
21
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.h
19
--- a/target/arm/helper.h
23
+++ b/target/arm/helper.h
20
+++ b/target/arm/helper.h
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
25
DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
26
void, ptr, ptr, ptr, ptr, i32)
23
void, ptr, ptr, ptr, ptr, ptr, i32)
27
24
28
+DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
29
+ void, ptr, ptr, ptr, ptr, i32)
41
+ void, ptr, ptr, ptr, ptr, i32)
30
+
42
+
31
#ifdef TARGET_AARCH64
43
#ifdef TARGET_AARCH64
32
#include "helper-a64.h"
44
#include "helper-a64.h"
33
#include "helper-sve.h"
45
#include "helper-sve.h"
34
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/neon-shared.decode
37
+++ b/target/arm/neon-shared.decode
38
@@ -XXX,XX +XXX,XX @@ VUMMLA 1111 1100 0.10 .... .... 1100 .1.1 .... \
39
vm=%vm_dp vn=%vn_dp vd=%vd_dp
40
VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \
41
vm=%vm_dp vn=%vn_dp vd=%vd_dp
42
+VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \
43
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
44
45
VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
46
vn=%vn_dp vd=%vd_dp size=1
47
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
48
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/sve.decode
48
--- a/target/arm/sve.decode
50
+++ b/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
51
@@ -XXX,XX +XXX,XX @@ SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
52
USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm
51
@psel esz=2 imm=%psel_imm_s
53
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
54
### SVE2 floating point matrix multiply accumulate
53
@psel esz=3 imm=%psel_imm_d
55
-
54
+
56
-FMMLA 01100100 .. 1 ..... 111001 ..... ..... @rda_rn_rm
55
+### SVE clamp
57
+{
56
+
58
+ BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
59
+ FMMLA 01100100 .. 1 ..... 111 001 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
60
+}
61
62
### SVE2 Memory Gather Load Group
63
64
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/arm/translate-a64.c
67
+++ b/target/arm/translate-a64.c
68
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
69
}
70
feature = dc_isar_feature(aa64_fcma, s);
71
break;
72
+ case 0x1d: /* BFMMLA */
73
+ if (size != MO_16 || !is_q) {
74
+ unallocated_encoding(s);
75
+ return;
76
+ }
77
+ feature = dc_isar_feature(aa64_bf16, s);
78
+ break;
79
case 0x1f: /* BFDOT */
80
switch (size) {
81
case 1:
82
@@ -XXX,XX +XXX,XX @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
83
}
84
return;
85
86
+ case 0xd: /* BFMMLA */
87
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
88
+ return;
89
case 0xf: /* BFDOT */
90
switch (size) {
91
case 1:
92
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/arm/translate-neon.c
95
+++ b/target/arm/translate-neon.c
96
@@ -XXX,XX +XXX,XX @@ static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
97
return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
98
gen_helper_gvec_usmmla_b);
99
}
100
+
101
+static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
102
+{
103
+ if (!dc_isar_feature(aa32_bf16, s)) {
104
+ return false;
105
+ }
106
+ return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
107
+ gen_helper_gvec_bfmmla);
108
+}
109
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
110
index XXXXXXX..XXXXXXX 100644
60
index XXXXXXX..XXXXXXX 100644
111
--- a/target/arm/translate-sve.c
61
--- a/target/arm/translate-sve.c
112
+++ b/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
113
@@ -XXX,XX +XXX,XX @@ static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
114
}
64
tcg_temp_free_ptr(ptr);
115
return true;
65
return true;
116
}
66
}
117
+
67
+
118
+static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
119
+{
69
+{
120
+ if (!dc_isar_feature(aa64_sve_bf16, s)) {
70
+ tcg_gen_smax_i32(d, a, n);
121
+ return false;
71
+ tcg_gen_smin_i32(d, d, m);
122
+ }
72
+}
123
+ if (sve_access_check(s)) {
73
+
124
+ gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
125
+ a->rd, a->rn, a->rm, a->ra, 0);
75
+{
126
+ }
76
+ tcg_gen_smax_i64(d, a, n);
127
+ return true;
77
+ tcg_gen_smin_i64(d, d, m);
128
+}
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
129
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
130
index XXXXXXX..XXXXXXX 100644
170
index XXXXXXX..XXXXXXX 100644
131
--- a/target/arm/vec_helper.c
171
--- a/target/arm/vec_helper.c
132
+++ b/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
133
@@ -XXX,XX +XXX,XX @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
134
* Process the entire segment at once, writing back the
135
* results only after we've consumed all of the inputs.
136
*
137
- * Key to indicies by column:
138
+ * Key to indices by column:
139
* i j i j
140
*/
141
sum0 = a[H4(0 + 0)];
142
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
143
}
174
}
144
clear_tail(d, opr_sz, simd_maxsz(desc));
175
clear_tail(d, opr_sz, simd_maxsz(desc));
145
}
176
}
146
+
177
+
147
+void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
178
+#define DO_CLAMP(NAME, TYPE) \
148
+{
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
149
+ intptr_t s, opr_sz = simd_oprsz(desc);
180
+{ \
150
+ float32 *d = vd, *a = va;
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
151
+ uint32_t *n = vn, *m = vm;
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
152
+
183
+ TYPE aa = *(TYPE *)(a + i); \
153
+ for (s = 0; s < opr_sz / 4; s += 4) {
184
+ TYPE nn = *(TYPE *)(n + i); \
154
+ float32 sum00, sum01, sum10, sum11;
185
+ TYPE mm = *(TYPE *)(m + i); \
155
+
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
156
+ /*
187
+ *(TYPE *)(d + i) = dd; \
157
+ * Process the entire segment at once, writing back the
188
+ } \
158
+ * results only after we've consumed all of the inputs.
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
159
+ *
190
+}
160
+ * Key to indicies by column:
191
+
161
+ * i j i k j k
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
162
+ */
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
163
+ sum00 = a[s + H4(0 + 0)];
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
164
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
165
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
196
+
166
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
167
+ sum01 = a[s + H4(0 + 1)];
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
168
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
169
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
170
+
171
+ sum10 = a[s + H4(2 + 0)];
172
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
173
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
174
+
175
+ sum11 = a[s + H4(2 + 1)];
176
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
177
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
178
+
179
+ d[s + H4(0 + 0)] = sum00;
180
+ d[s + H4(0 + 1)] = sum01;
181
+ d[s + H4(2 + 0)] = sum10;
182
+ d[s + H4(2 + 1)] = sum11;
183
+ }
184
+ clear_tail(d, opr_sz, simd_maxsz(desc));
185
+}
186
--
201
--
187
2.20.1
202
2.25.1
188
189
diff view generated by jsdifflib
1
Split out the handling of VMOV_reg_sp and VMOV_reg_dp so that we can
1
From: Richard Henderson <richard.henderson@linaro.org>
2
permit the insns if either FP or MVE are present.
3
2
3
We can handle both exception entry and exception return by
4
hooking into aarch64_sve_change_el.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
4
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210520152840.24453-5-peter.maydell@linaro.org
7
---
10
---
8
target/arm/translate-vfp.c | 15 +++++++++++++--
11
target/arm/helper.c | 15 +++++++++++++--
9
1 file changed, 13 insertions(+), 2 deletions(-)
12
1 file changed, 13 insertions(+), 2 deletions(-)
10
13
11
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-vfp.c
16
--- a/target/arm/helper.c
14
+++ b/target/arm/translate-vfp.c
17
+++ b/target/arm/helper.c
15
@@ -XXX,XX +XXX,XX @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
16
return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
19
return;
17
}
20
}
18
21
19
-DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32, aa32_fpsp_v2)
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
20
-DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64, aa32_fpdp_v2)
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
21
+#define DO_VFP_VMOV(INSN, PREC, FN) \
24
+
22
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
25
+ /*
23
+ arg_##INSN##_##PREC *a) \
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
24
+ { \
27
+ * invoke ResetSVEState when taking an exception from, or
25
+ if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
26
+ !dc_isar_feature(aa32_mve, s)) { \
29
+ */
27
+ return false; \
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
28
+ } \
31
+ arm_reset_sve_state(env);
29
+ return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
32
+ return;
30
+ }
33
+ }
31
+
34
+
32
+DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
35
/*
33
+DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
34
37
* at ELx, or not available because the EL is in AArch32 state, then
35
DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
36
DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
39
* we already have the correct register contents when encountering the
40
* vq0->vq0 transition between EL0->EL1.
41
*/
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
44
? sve_vqm1_for_el(env, old_el) : 0);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
47
? sve_vqm1_for_el(env, new_el) : 0);
48
37
--
49
--
38
2.20.1
50
2.25.1
39
40
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Disable BF16 again for !have_neon and !have_vfp during realize.
3
Note that SME remains effectively disabled for user-only,
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
4
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20210525225817.400336-13-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
11
---
10
target/arm/cpu.c | 3 +++
12
docs/system/arm/emulation.rst | 4 ++++
11
target/arm/cpu64.c | 3 +++
13
target/arm/cpu64.c | 11 +++++++++++
12
target/arm/cpu_tcg.c | 1 +
14
2 files changed, 15 insertions(+)
13
3 files changed, 7 insertions(+)
14
15
15
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/cpu.c
18
--- a/docs/system/arm/emulation.rst
18
+++ b/target/arm/cpu.c
19
+++ b/docs/system/arm/emulation.rst
19
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
20
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
21
u = cpu->isar.id_isar6;
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
22
u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0);
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
23
+ u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
24
+- FEAT_SME (Scalable Matrix Extension)
24
cpu->isar.id_isar6 = u;
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
25
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
26
u = cpu->isar.mvfr0;
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
27
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
28
- FEAT_SPECRES (Speculation restriction instructions)
28
29
- FEAT_SSBS (Speculative Store Bypass Safe)
29
t = cpu->isar.id_aa64isar1;
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
30
t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0);
31
+ t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0);
32
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0);
33
cpu->isar.id_aa64isar1 = t;
34
35
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
36
u = cpu->isar.id_isar6;
37
u = FIELD_DP32(u, ID_ISAR6, DP, 0);
38
u = FIELD_DP32(u, ID_ISAR6, FHM, 0);
39
+ u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
40
u = FIELD_DP32(u, ID_ISAR6, I8MM, 0);
41
cpu->isar.id_isar6 = u;
42
43
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
44
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/cpu64.c
33
--- a/target/arm/cpu64.c
46
+++ b/target/arm/cpu64.c
34
+++ b/target/arm/cpu64.c
47
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
48
t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
36
*/
49
t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1);
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
50
t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1);
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
51
+ t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1);
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
52
t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1);
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
53
t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* ARMv8.4-RCPC */
41
cpu->isar.id_aa64pfr1 = t;
54
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1);
42
55
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
56
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
57
t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* PMULL */
45
cpu->isar.id_aa64dfr0 = t;
58
t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1);
46
59
+ t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1);
47
+ t = cpu->isar.id_aa64smfr0;
60
t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1);
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
61
t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1);
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
62
t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1);
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
63
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
64
u = FIELD_DP32(u, ID_ISAR6, FHM, 1);
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
65
u = FIELD_DP32(u, ID_ISAR6, SB, 1);
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
66
u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1);
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
67
+ u = FIELD_DP32(u, ID_ISAR6, BF16, 1);
55
+ cpu->isar.id_aa64smfr0 = t;
68
u = FIELD_DP32(u, ID_ISAR6, I8MM, 1);
56
+
69
cpu->isar.id_isar6 = u;
57
/* Replicate the same data to the 32-bit id registers. */
70
58
aa32_max_features(cpu);
71
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/arm/cpu_tcg.c
74
+++ b/target/arm/cpu_tcg.c
75
@@ -XXX,XX +XXX,XX @@ static void arm_max_initfn(Object *obj)
76
t = FIELD_DP32(t, ID_ISAR6, FHM, 1);
77
t = FIELD_DP32(t, ID_ISAR6, SB, 1);
78
t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1);
79
+ t = FIELD_DP32(t, ID_ISAR6, BF16, 1);
80
t = FIELD_DP32(t, ID_ISAR6, I8MM, 1);
81
cpu->isar.id_isar6 = t;
82
59
83
--
60
--
84
2.20.1
61
2.25.1
85
86
diff view generated by jsdifflib
1
From: Jamie Iles <jamie@nuviainc.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The sequence cpu_restore_state() + raise_exception() is equivalent to
4
raise_exception_ra(), so use that instead. (In this case we never
5
cared about the syndrome value, because M-profile doesn't use the
6
syndrome; the old code was just written unnecessarily awkwardly.)
7
8
Cc: Richard Henderson <richard.henderson@linaro.org>
9
Cc: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
11
[PMM: Retain edited version of comment; rewrite commit message]
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
7
---
15
target/arm/m_helper.c | 5 +----
8
linux-user/aarch64/target_cpu.h | 5 ++++-
16
target/arm/op_helper.c | 9 +++------
9
1 file changed, 4 insertions(+), 1 deletion(-)
17
2 files changed, 4 insertions(+), 10 deletions(-)
18
10
19
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/m_helper.c
13
--- a/linux-user/aarch64/target_cpu.h
22
+++ b/target/arm/m_helper.c
14
+++ b/linux-user/aarch64/target_cpu.h
23
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
24
limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
16
25
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
26
if (val < limit) {
18
{
27
- CPUState *cs = env_cpu(env);
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
28
-
20
+ /*
29
- cpu_restore_state(cs, GETPC(), true);
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
30
- raise_exception(env, EXCP_STKOF, 0, 1);
22
* different from AArch32 Linux, which uses TPIDRRO.
31
+ raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
32
}
33
34
if (is_psp) {
35
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/op_helper.c
38
+++ b/target/arm/op_helper.c
39
@@ -XXX,XX +XXX,XX @@ void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
40
* raising an exception if the limit is breached.
41
*/
23
*/
42
if (newvalue < v7m_sp_limit(env)) {
24
env->cp15.tpidr_el[0] = newtls;
43
- CPUState *cs = env_cpu(env);
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
44
-
26
+ env->cp15.tpidr2_el0 = 0;
45
/*
46
* Stack limit exceptions are a rare case, so rather than syncing
47
- * PC/condbits before the call, we use cpu_restore_state() to
48
- * get them right before raising the exception.
49
+ * PC/condbits before the call, we use raise_exception_ra() so
50
+ * that cpu_restore_state() will sort them out.
51
*/
52
- cpu_restore_state(cs, GETPC(), true);
53
- raise_exception(env, EXCP_STKOF, 0, 1);
54
+ raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
55
}
56
}
27
}
57
28
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
58
--
30
--
59
2.20.1
31
2.25.1
60
61
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Until now, Hypervisor.framework has only been available on x86_64 systems.
4
With Apple Silicon shipping now, it extends its reach to aarch64. To
5
prepare for support for multiple architectures, let's start moving common
6
code out into its own accel directory.
7
8
This patch moves the vCPU thread loop over.
9
10
Signed-off-by: Alexander Graf <agraf@csgraf.de>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
12
Message-id: 20210519202253.76782-3-agraf@csgraf.de
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
7
---
16
{target/i386 => accel}/hvf/hvf-accel-ops.h | 0
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
17
{target/i386 => accel}/hvf/hvf-accel-ops.c | 0
9
1 file changed, 9 insertions(+)
18
target/i386/hvf/x86hvf.c | 2 +-
19
accel/hvf/meson.build | 1 +
20
target/i386/hvf/meson.build | 1 -
21
5 files changed, 2 insertions(+), 2 deletions(-)
22
rename {target/i386 => accel}/hvf/hvf-accel-ops.h (100%)
23
rename {target/i386 => accel}/hvf/hvf-accel-ops.c (100%)
24
10
25
diff --git a/target/i386/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
26
similarity index 100%
27
rename from target/i386/hvf/hvf-accel-ops.h
28
rename to accel/hvf/hvf-accel-ops.h
29
diff --git a/target/i386/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
30
similarity index 100%
31
rename from target/i386/hvf/hvf-accel-ops.c
32
rename to accel/hvf/hvf-accel-ops.c
33
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
34
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
35
--- a/target/i386/hvf/x86hvf.c
13
--- a/linux-user/aarch64/cpu_loop.c
36
+++ b/target/i386/hvf/x86hvf.c
14
+++ b/linux-user/aarch64/cpu_loop.c
37
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
38
#include <Hypervisor/hv.h>
16
39
#include <Hypervisor/hv_vmx.h>
17
switch (trapnr) {
40
18
case EXCP_SWI:
41
-#include "hvf-accel-ops.h"
19
+ /*
42
+#include "accel/hvf/hvf-accel-ops.h"
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
43
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
44
void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
22
+ */
45
SegmentCache *qseg, bool is_tr)
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
46
diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
47
index XXXXXXX..XXXXXXX 100644
25
+ arm_rebuild_hflags(env);
48
--- a/accel/hvf/meson.build
26
+ arm_reset_sve_state(env);
49
+++ b/accel/hvf/meson.build
27
+ }
50
@@ -XXX,XX +XXX,XX @@
28
ret = do_syscall(env,
51
hvf_ss = ss.source_set()
29
env->xregs[8],
52
hvf_ss.add(files(
30
env->xregs[0],
53
'hvf-all.c',
54
+ 'hvf-accel-ops.c',
55
))
56
57
specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss)
58
diff --git a/target/i386/hvf/meson.build b/target/i386/hvf/meson.build
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/i386/hvf/meson.build
61
+++ b/target/i386/hvf/meson.build
62
@@ -XXX,XX +XXX,XX @@
63
i386_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
64
'hvf.c',
65
- 'hvf-accel-ops.c',
66
'x86.c',
67
'x86_cpuid.c',
68
'x86_decode.c',
69
--
31
--
70
2.20.1
32
2.25.1
71
72
diff view generated by jsdifflib
1
From: Jamie Iles <jamie@nuviainc.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Now that raise_exception_ra restores the state before raising the
3
Make sure to zero the currently reserved fields.
4
exception we can use restore_exception_ra to perform the state restore +
5
exception raising without clobbering the syndrome.
6
4
7
Cc: Richard Henderson <richard.henderson@linaro.org>
8
Cc: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
10
[PMM: Keep the one line of the comment that is still relevant]
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
9
---
14
target/arm/mte_helper.c | 12 +++---------
10
linux-user/aarch64/signal.c | 9 ++++++++-
15
1 file changed, 3 insertions(+), 9 deletions(-)
11
1 file changed, 8 insertions(+), 1 deletion(-)
16
12
17
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
18
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/mte_helper.c
15
--- a/linux-user/aarch64/signal.c
20
+++ b/target/arm/mte_helper.c
16
+++ b/linux-user/aarch64/signal.c
21
@@ -XXX,XX +XXX,XX @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
22
18
struct target_sve_context {
23
switch (tcf) {
19
struct target_aarch64_ctx head;
24
case 1:
20
uint16_t vl;
25
- /*
21
- uint16_t reserved[3];
26
- * Tag check fail causes a synchronous exception.
22
+ uint16_t flags;
27
- *
23
+ uint16_t reserved[2];
28
- * In restore_state_to_opc, we set the exception syndrome
24
/* The actual SVE data immediately follows. It is laid out
29
- * for the load or store operation. Unwind first so we
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
30
- * may overwrite that with the syndrome for the tag check.
26
* the original struct pointer.
31
- */
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
32
- cpu_restore_state(env_cpu(env), ra, true);
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
33
+ /* Tag check fail causes a synchronous exception. */
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
34
env->exception.vaddress = dirty_ptr;
30
35
31
+#define TARGET_SVE_SIG_FLAG_SM 1
36
is_write = FIELD_EX32(desc, MTEDESC, WRITE);
32
+
37
syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
33
struct target_rt_sigframe {
38
is_write, 0x11);
34
struct target_siginfo info;
39
- raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
35
struct target_ucontext uc;
40
+ raise_exception_ra(env, EXCP_DATA_ABORT, syn,
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
41
+ exception_target_el(env), ra);
37
{
42
/* noreturn, but fall through to the assert anyway */
38
int i, j;
43
39
44
case 0:
40
+ memset(sve, 0, sizeof(*sve));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
42
__put_user(size, &sve->head.size);
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
46
+ }
47
48
/* Note that SVE regs are stored as a byte stream, with each byte element
49
* at a subsequent address. This corresponds to a little-endian store
45
--
50
--
46
2.20.1
51
2.25.1
47
48
diff view generated by jsdifflib
1
From: Jamie Iles <jamie@nuviainc.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Now that there are no other users of do_raise_exception, fold it into
3
Fold the return value setting into the goto, so each
4
raise_exception.
4
point of failure need not do both.
5
5
6
Cc: Richard Henderson <richard.henderson@linaro.org>
7
Cc: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/op_helper.c | 12 ++----------
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
13
1 file changed, 2 insertions(+), 10 deletions(-)
12
1 file changed, 11 insertions(+), 15 deletions(-)
14
13
15
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/op_helper.c
16
--- a/linux-user/aarch64/signal.c
18
+++ b/target/arm/op_helper.c
17
+++ b/linux-user/aarch64/signal.c
19
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
20
#define SIGNBIT (uint32_t)0x80000000
19
struct target_sve_context *sve = NULL;
21
#define SIGNBIT64 ((uint64_t)1 << 63)
20
uint64_t extra_datap = 0;
22
21
bool used_extra = false;
23
-static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp,
22
- bool err = false;
24
- uint32_t syndrome, uint32_t target_el)
23
int vq = 0, sve_size = 0;
25
+void raise_exception(CPUARMState *env, uint32_t excp,
24
26
+ uint32_t syndrome, uint32_t target_el)
25
target_restore_general_frame(env, sf);
27
{
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
28
CPUState *cs = env_cpu(env);
27
switch (magic) {
29
28
case 0:
30
@@ -XXX,XX +XXX,XX @@ static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp,
29
if (size != 0) {
31
cs->exception_index = excp;
30
- err = true;
32
env->exception.syndrome = syndrome;
31
- goto exit;
33
env->exception.target_el = target_el;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
34
-
84
-
35
- return cs;
85
- exit:
36
-}
86
unlock_user(extra, extra_datap, 0);
37
-
87
- return err;
38
-void raise_exception(CPUARMState *env, uint32_t excp,
88
+ return 0;
39
- uint32_t syndrome, uint32_t target_el)
89
+
40
-{
90
+ err:
41
- CPUState *cs = do_raise_exception(env, excp, syndrome, target_el);
91
+ unlock_user(extra, extra_datap, 0);
42
cpu_loop_exit(cs);
92
+ return 1;
43
}
93
}
44
94
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
45
--
96
--
46
2.20.1
97
2.25.1
47
48
diff view generated by jsdifflib
1
Coverity notes that we don't check for dup2() failing. Add some
1
From: Richard Henderson <richard.henderson@linaro.org>
2
assertions so that if it does ever happen we get some indication.
3
(This is similar to how we handle other "don't expect this syscall to
4
fail" checks in this test code.)
5
2
6
Fixes: Coverity CID 1432346
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
or records that are smaller than the header. We were silently
5
allowing these cases to pass, dropping the record.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
9
Message-id: 20210525134458.6675-2-peter.maydell@linaro.org
10
---
11
---
11
tests/qtest/bios-tables-test.c | 8 ++++++--
12
linux-user/aarch64/signal.c | 5 ++++-
12
1 file changed, 6 insertions(+), 2 deletions(-)
13
1 file changed, 4 insertions(+), 1 deletion(-)
13
14
14
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qtest/bios-tables-test.c
17
--- a/linux-user/aarch64/signal.c
17
+++ b/tests/qtest/bios-tables-test.c
18
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@ static void test_acpi_asl(test_data *data)
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
19
exp_sdt->asl_file, sdt->asl_file);
20
break;
20
int out = dup(STDOUT_FILENO);
21
21
int ret G_GNUC_UNUSED;
22
case TARGET_SVE_MAGIC:
22
+ int dupret;
23
+ if (sve || size < sizeof(struct target_sve_context)) {
23
24
+ goto err;
24
- dup2(STDERR_FILENO, STDOUT_FILENO);
25
+ }
25
+ g_assert(out >= 0);
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
26
+ dupret = dup2(STDERR_FILENO, STDOUT_FILENO);
27
vq = sve_vq(env);
27
+ g_assert(dupret >= 0);
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
28
ret = system(diff) ;
29
- if (!sve && size == sve_size) {
29
- dup2(out, STDOUT_FILENO);
30
+ if (size == sve_size) {
30
+ dupret = dup2(out, STDOUT_FILENO);
31
sve = (struct target_sve_context *)ctx;
31
+ g_assert(dupret >= 0);
32
break;
32
close(out);
33
g_free(diff);
34
}
33
}
35
--
34
--
36
2.20.1
35
2.25.1
37
38
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20210525225817.400336-3-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
7
---
8
target/arm/translate-a64.c | 15 ++++++---------
8
linux-user/aarch64/signal.c | 3 +++
9
1 file changed, 6 insertions(+), 9 deletions(-)
9
1 file changed, 3 insertions(+)
10
10
11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-a64.c
13
--- a/linux-user/aarch64/signal.c
14
+++ b/target/arm/translate-a64.c
14
+++ b/linux-user/aarch64/signal.c
15
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
16
int rd = extract32(insn, 0, 5);
16
__get_user(extra_size,
17
17
&((struct target_extra_context *)ctx)->size);
18
if (mos) {
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
19
- unallocated_encoding(s);
19
+ if (!extra) {
20
- return;
20
+ return 1;
21
+ goto do_unallocated;
21
+ }
22
}
23
24
switch (opcode) {
25
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
26
/* FCVT between half, single and double precision */
27
int dtype = extract32(opcode, 0, 2);
28
if (type == 2 || dtype == type) {
29
- unallocated_encoding(s);
30
- return;
31
+ goto do_unallocated;
32
}
33
if (!fp_access_check(s)) {
34
return;
35
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
36
37
case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
38
if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
39
- unallocated_encoding(s);
40
- return;
41
+ goto do_unallocated;
42
}
43
/* fall through */
44
case 0x0 ... 0x3:
45
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
46
break;
22
break;
47
case 3:
23
48
if (!dc_isar_feature(aa64_fp16, s)) {
49
- unallocated_encoding(s);
50
- return;
51
+ goto do_unallocated;
52
}
53
54
if (!fp_access_check(s)) {
55
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
56
handle_fp_1src_half(s, opcode, rd, rn);
57
break;
58
default:
24
default:
59
- unallocated_encoding(s);
60
+ goto do_unallocated;
61
}
62
break;
63
64
default:
65
+ do_unallocated:
66
unallocated_encoding(s);
67
break;
68
}
69
--
25
--
70
2.20.1
26
2.25.1
71
72
diff view generated by jsdifflib
1
From: Alexander Graf <agraf@csgraf.de>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The hooks we have that call us after reset, init and loadvm really all
3
Move the checks out of the parsing loop and into the
4
just want to say "The reference of all register state is in the QEMU
4
restore function. This more closely mirrors the code
5
vcpu struct, please push it".
5
structure in the kernel, and is slightly clearer.
6
6
7
We already have a working pushing mechanism though called cpu->vcpu_dirty,
7
Reject rather than silently skip incorrect VL and SVE record sizes,
8
so we can just reuse that for all of the above, syncing state properly the
8
bringing our checks in to line with those the kernel does.
9
next time we actually execute a vCPU.
10
9
11
This fixes PSCI resets on ARM, as they modify CPU state even after the
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
post init call has completed, but before we execute the vCPU again.
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
14
To also make the scheme work for x86, we have to make sure we don't
15
move stale eflags into our env when the vcpu state is dirty.
16
17
Signed-off-by: Alexander Graf <agraf@csgraf.de>
18
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
19
Tested-by: Roman Bolshakov <r.bolshakov@yadro.com>
20
Reviewed-by: Sergio Lopez <slp@redhat.com>
21
Message-id: 20210519202253.76782-13-agraf@csgraf.de
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
---
14
---
24
accel/hvf/hvf-accel-ops.c | 27 +++++++--------------------
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
25
target/i386/hvf/x86hvf.c | 5 ++++-
16
1 file changed, 35 insertions(+), 16 deletions(-)
26
2 files changed, 11 insertions(+), 21 deletions(-)
27
17
28
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
29
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
30
--- a/accel/hvf/hvf-accel-ops.c
20
--- a/linux-user/aarch64/signal.c
31
+++ b/accel/hvf/hvf-accel-ops.c
21
+++ b/linux-user/aarch64/signal.c
32
@@ -XXX,XX +XXX,XX @@ static void hvf_cpu_synchronize_state(CPUState *cpu)
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
33
}
23
}
34
}
24
}
35
25
36
-static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
26
-static void target_restore_sve_record(CPUARMState *env,
37
- run_on_cpu_data arg)
27
- struct target_sve_context *sve, int vq)
38
+static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu,
28
+static bool target_restore_sve_record(CPUARMState *env,
39
+ run_on_cpu_data arg)
29
+ struct target_sve_context *sve,
30
+ int size)
40
{
31
{
41
- hvf_put_registers(cpu);
32
- int i, j;
42
- cpu->vcpu_dirty = false;
33
+ int i, j, vl, vq;
43
+ /* QEMU state is the reference, push it to HVF now and on next entry */
34
44
+ cpu->vcpu_dirty = true;
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
37
+ return false;
38
+ }
39
+
40
+ __get_user(vl, &sve->vl);
41
+ vq = sve_vq(env);
42
+
43
+ /* Reject mismatched VL. */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
45
+ return false;
46
+ }
47
+
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
49
+ if (size <= sizeof(*sve)) {
50
+ return true;
51
+ }
52
+
53
+ /* Reject non-empty but incomplete record. */
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
55
+ return false;
56
+ }
57
+
58
+ /*
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
60
* at a subsequent address. This corresponds to a little-endian load
61
* of our 64-bit hunks.
62
*/
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
64
}
65
}
66
}
67
+ return true;
45
}
68
}
46
69
47
static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
70
static int target_restore_sigframe(CPUARMState *env,
48
{
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
49
- run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
72
struct target_sve_context *sve = NULL;
50
-}
73
uint64_t extra_datap = 0;
51
-
74
bool used_extra = false;
52
-static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
75
- int vq = 0, sve_size = 0;
53
- run_on_cpu_data arg)
76
+ int sve_size = 0;
54
-{
77
55
- hvf_put_registers(cpu);
78
target_restore_general_frame(env, sf);
56
- cpu->vcpu_dirty = false;
79
57
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
58
}
81
if (sve || size < sizeof(struct target_sve_context)) {
59
82
goto err;
60
static void hvf_cpu_synchronize_post_init(CPUState *cpu)
83
}
61
{
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
62
- run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
85
- vq = sve_vq(env);
63
-}
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
64
-
87
- if (size == sve_size) {
65
-static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
88
- sve = (struct target_sve_context *)ctx;
66
- run_on_cpu_data arg)
89
- break;
67
-{
90
- }
68
- cpu->vcpu_dirty = true;
91
- }
69
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
92
- goto err;
70
}
93
+ sve = (struct target_sve_context *)ctx;
71
94
+ sve_size = size;
72
static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
95
+ break;
73
{
96
74
- run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
97
case TARGET_EXTRA_MAGIC:
75
+ run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
98
if (extra || size != sizeof(struct target_extra_context)) {
76
}
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
77
100
}
78
static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
101
79
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
102
/* SVE data, if present, overwrites FPSIMD data. */
80
index XXXXXXX..XXXXXXX 100644
103
- if (sve) {
81
--- a/target/i386/hvf/x86hvf.c
104
- target_restore_sve_record(env, sve, vq);
82
+++ b/target/i386/hvf/x86hvf.c
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
83
@@ -XXX,XX +XXX,XX @@ int hvf_process_events(CPUState *cpu_state)
106
+ goto err;
84
X86CPU *cpu = X86_CPU(cpu_state);
107
}
85
CPUX86State *env = &cpu->env;
108
unlock_user(extra, extra_datap, 0);
86
109
return 0;
87
- env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
88
+ if (!cpu_state->vcpu_dirty) {
89
+ /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
90
+ env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
91
+ }
92
93
if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) {
94
cpu_synchronize_state(cpu_state);
95
--
110
--
96
2.20.1
111
2.25.1
97
98
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is the 64-bit BFCVT and the 32-bit VCVT{B,T}.BF16.F32.
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
Restore SM and ZA state according to the records present on return.
4
5
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20210525225817.400336-4-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
---
10
target/arm/helper.h | 1 +
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
11
target/arm/vfp.decode | 2 ++
12
1 file changed, 154 insertions(+), 13 deletions(-)
12
target/arm/translate-a64.c | 19 +++++++++++++++++++
13
target/arm/translate-vfp.c | 24 ++++++++++++++++++++++++
14
target/arm/vfp_helper.c | 5 +++++
15
5 files changed, 51 insertions(+)
16
13
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
16
--- a/linux-user/aarch64/signal.c
20
+++ b/target/arm/helper.h
17
+++ b/linux-user/aarch64/signal.c
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
22
19
23
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
20
#define TARGET_SVE_SIG_FLAG_SM 1
24
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
21
25
+DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr)
22
+#define TARGET_ZA_MAGIC 0x54366345
26
23
+
27
DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
24
+struct target_za_context {
28
DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
25
+ struct target_aarch64_ctx head;
29
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
26
+ uint16_t vl;
30
index XXXXXXX..XXXXXXX 100644
27
+ uint16_t reserved[3];
31
--- a/target/arm/vfp.decode
28
+ /* The actual ZA data immediately follows. */
32
+++ b/target/arm/vfp.decode
29
+};
33
@@ -XXX,XX +XXX,XX @@ VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
30
+
34
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
35
# VCVTB and VCVTT to f16: Vd format is always vd_sp;
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
36
# Vm format depends on size bit
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
37
+VCVT_b16_f32 ---- 1110 1.11 0011 .... 1001 t:1 1.0 .... \
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
38
+ vd=%vd_sp vm=%vm_sp
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
39
VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
40
vd=%vd_sp vm=%vm_sp
37
+
41
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
38
struct target_rt_sigframe {
42
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
39
struct target_siginfo info;
43
index XXXXXXX..XXXXXXX 100644
40
struct target_ucontext uc;
44
--- a/target/arm/translate-a64.c
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
45
+++ b/target/arm/translate-a64.c
42
}
46
@@ -XXX,XX +XXX,XX @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
43
47
case 0x3: /* FSQRT */
44
static void target_setup_sve_record(struct target_sve_context *sve,
48
gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
45
- CPUARMState *env, int vq, int size)
49
goto done;
46
+ CPUARMState *env, int size)
50
+ case 0x6: /* BFCVT */
47
{
51
+ gen_fpst = gen_helper_bfcvt;
48
- int i, j;
52
+ break;
49
+ int i, j, vq = sve_vq(env);
53
case 0x8: /* FRINTN */
50
54
case 0x9: /* FRINTP */
51
memset(sve, 0, sizeof(*sve));
55
case 0xa: /* FRINTM */
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
56
@@ -XXX,XX +XXX,XX @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
57
}
54
}
58
break;
55
}
59
56
60
+ case 0x6:
57
+static void target_setup_za_record(struct target_za_context *za,
61
+ switch (type) {
58
+ CPUARMState *env, int size)
62
+ case 1: /* BFCVT */
59
+{
63
+ if (!dc_isar_feature(aa64_bf16, s)) {
60
+ int vq = sme_vq(env);
64
+ goto do_unallocated;
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
65
+ }
62
+ int i, j;
66
+ if (!fp_access_check(s)) {
63
+
67
+ return;
64
+ memset(za, 0, sizeof(*za));
68
+ }
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
69
+ handle_fp_1src_single(s, opcode, rd, rn);
66
+ __put_user(size, &za->head.size);
70
+ break;
67
+ __put_user(vl, &za->vl);
71
+ default:
68
+
72
+ goto do_unallocated;
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
70
+ return;
71
+ }
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
73
+
74
+ /*
75
+ * Note that ZA vectors are stored as a byte stream,
76
+ * with each byte element at a subsequent address.
77
+ */
78
+ for (i = 0; i < vl; ++i) {
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
80
+ for (j = 0; j < vq * 2; ++j) {
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
73
+ }
82
+ }
74
+ break;
83
+ }
75
+
84
+}
76
default:
85
+
77
do_unallocated:
86
static void target_restore_general_frame(CPUARMState *env,
78
unallocated_encoding(s);
87
struct target_rt_sigframe *sf)
79
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
88
{
80
index XXXXXXX..XXXXXXX 100644
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
81
--- a/target/arm/translate-vfp.c
90
82
+++ b/target/arm/translate-vfp.c
91
static bool target_restore_sve_record(CPUARMState *env,
83
@@ -XXX,XX +XXX,XX @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
112
113
- __get_user(vl, &sve->vl);
114
- vq = sve_vq(env);
115
+ /*
116
+ * Note that we cannot use sve_vq() because that depends on the
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
84
return true;
133
return true;
85
}
134
}
86
135
87
+static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
136
+static bool target_restore_za_record(CPUARMState *env,
137
+ struct target_za_context *za,
138
+ int size, int *svcr)
88
+{
139
+{
89
+ TCGv_ptr fpst;
140
+ int i, j, vl, vq;
90
+ TCGv_i32 tmp;
141
+
91
+
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
92
+ if (!dc_isar_feature(aa32_bf16, s)) {
93
+ return false;
143
+ return false;
94
+ }
144
+ }
95
+
145
+
96
+ if (!vfp_access_check(s)) {
146
+ __get_user(vl, &za->vl);
147
+ vq = sme_vq(env);
148
+
149
+ /* Reject mismatched VL. */
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
151
+ return false;
152
+ }
153
+
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
97
+ return true;
156
+ return true;
98
+ }
157
+ }
99
+
158
+
100
+ fpst = fpstatus_ptr(FPST_FPCR);
159
+ /* Reject non-empty but incomplete record. */
101
+ tmp = tcg_temp_new_i32();
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
102
+
161
+ return false;
103
+ vfp_load_reg32(tmp, a->vm);
162
+ }
104
+ gen_helper_bfcvt(tmp, tmp, fpst);
163
+
105
+ tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
106
+ tcg_temp_free_ptr(fpst);
165
+
107
+ tcg_temp_free_i32(tmp);
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
108
+ return true;
172
+ return true;
109
+}
173
+}
110
+
174
+
111
static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
175
static int target_restore_sigframe(CPUARMState *env,
112
{
176
struct target_rt_sigframe *sf)
113
TCGv_ptr fpst;
177
{
114
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
178
struct target_aarch64_ctx *ctx, *extra = NULL;
115
index XXXXXXX..XXXXXXX 100644
179
struct target_fpsimd_context *fpsimd = NULL;
116
--- a/target/arm/vfp_helper.c
180
struct target_sve_context *sve = NULL;
117
+++ b/target/arm/vfp_helper.c
181
+ struct target_za_context *za = NULL;
118
@@ -XXX,XX +XXX,XX @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
182
uint64_t extra_datap = 0;
119
return float64_to_float32(x, &env->vfp.fp_status);
183
bool used_extra = false;
120
}
184
int sve_size = 0;
121
185
+ int za_size = 0;
122
+uint32_t HELPER(bfcvt)(float32 x, void *status)
186
+ int svcr = 0;
123
+{
187
124
+ return float32_to_bfloat16(x, status);
188
target_restore_general_frame(env, sf);
125
+}
189
126
+
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
127
/*
191
sve_size = size;
128
* VFP3 fixed point conversion. The AArch32 versions of fix-to-float
192
break;
129
* must always round-to-nearest; the AArch64 ones honour the FPSCR
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
130
--
288
--
131
2.20.1
289
2.25.1
132
133
diff view generated by jsdifflib
1
From: Damien Goutte-Gattat <dgouttegattat@incenp.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The 4.x branch of Sphinx introduces a breaking change, as generated man
3
Add "sve" to the sve prctl functions, to distinguish
4
pages are now written to subdirectories corresponding to the manual
4
them from the coming "sme" prctls with similar names.
5
section they belong to. This results in `make install` erroring out when
6
attempting to install the man pages, because they are not where it
7
expects to find them.
8
5
9
This patch restores the behavior of Sphinx 3.x regarding man pages.
10
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/256
12
Signed-off-by: Damien Goutte-Gattat <dgouttegattat@incenp.org>
13
Message-id: 20210503161422.15028-1-dgouttegattat@incenp.org
14
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
10
---
17
docs/conf.py | 1 +
11
linux-user/aarch64/target_prctl.h | 8 ++++----
18
1 file changed, 1 insertion(+)
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
19
14
20
diff --git a/docs/conf.py b/docs/conf.py
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/docs/conf.py
17
--- a/linux-user/aarch64/target_prctl.h
23
+++ b/docs/conf.py
18
+++ b/linux-user/aarch64/target_prctl.h
24
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
25
['Stefan Hajnoczi <stefanha@redhat.com>',
20
#ifndef AARCH64_TARGET_PRCTL_H
26
'Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>'], 1),
21
#define AARCH64_TARGET_PRCTL_H
27
]
22
28
+man_make_section_directory = False
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
29
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
30
# -- Options for Texinfo output -------------------------------------------
25
{
31
26
ARMCPU *cpu = env_archcpu(env);
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
29
}
30
return -TARGET_EINVAL;
31
}
32
-#define do_prctl_get_vl do_prctl_get_vl
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/linux-user/syscall.c
52
+++ b/linux-user/syscall.c
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
54
#ifndef do_prctl_set_fp_mode
55
#define do_prctl_set_fp_mode do_prctl_inval1
56
#endif
57
-#ifndef do_prctl_get_vl
58
-#define do_prctl_get_vl do_prctl_inval0
59
+#ifndef do_prctl_sve_get_vl
60
+#define do_prctl_sve_get_vl do_prctl_inval0
61
#endif
62
-#ifndef do_prctl_set_vl
63
-#define do_prctl_set_vl do_prctl_inval1
64
+#ifndef do_prctl_sve_set_vl
65
+#define do_prctl_sve_set_vl do_prctl_inval1
66
#endif
67
#ifndef do_prctl_reset_keys
68
#define do_prctl_reset_keys do_prctl_inval1
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
case PR_SET_FP_MODE:
71
return do_prctl_set_fp_mode(env, arg2);
72
case PR_SVE_GET_VL:
73
- return do_prctl_get_vl(env);
74
+ return do_prctl_sve_get_vl(env);
75
case PR_SVE_SET_VL:
76
- return do_prctl_set_vl(env, arg2);
77
+ return do_prctl_sve_set_vl(env, arg2);
78
case PR_PAC_RESET_KEYS:
79
if (arg3 || arg4 || arg5) {
80
return -TARGET_EINVAL;
32
--
81
--
33
2.20.1
82
2.25.1
34
35
diff view generated by jsdifflib
1
The M-profile FPSCR has an LTPSIZE field, but if MVE is not
1
From: Richard Henderson <richard.henderson@linaro.org>
2
implemented it is read-only and always reads as 4; this is how QEMU
3
currently handles it.
4
2
5
Make the field writable when MVE is implemented.
3
These prctl set the Streaming SVE vector length, which may
4
be completely different from the Normal SVE vector length.
6
5
7
We can safely add the field to the MVE migration struct because
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
currently no CPUs enable MVE and so the migration struct is never
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
used.
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
12
linux-user/syscall.c | 16 +++++++++
13
2 files changed, 70 insertions(+)
10
14
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20210520152840.24453-8-peter.maydell@linaro.org
14
---
15
target/arm/cpu.h | 3 ++-
16
target/arm/machine.c | 1 +
17
target/arm/vfp_helper.c | 9 ++++++---
18
3 files changed, 9 insertions(+), 4 deletions(-)
19
20
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/cpu.h
17
--- a/linux-user/aarch64/target_prctl.h
23
+++ b/target/arm/cpu.h
18
+++ b/linux-user/aarch64/target_prctl.h
24
@@ -XXX,XX +XXX,XX @@ typedef struct CPUARMState {
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
uint32_t fpdscr[M_REG_NUM_BANKS];
20
{
26
uint32_t cpacr[M_REG_NUM_BANKS];
21
ARMCPU *cpu = env_archcpu(env);
27
uint32_t nsacr;
22
if (cpu_isar_feature(aa64_sve, cpu)) {
28
- int ltpsize;
23
+ /* PSTATE.SM is always unset on syscall entry. */
29
+ uint32_t ltpsize;
24
return sve_vq(env) * 16;
30
uint32_t vpr;
25
}
31
} v7m;
26
return -TARGET_EINVAL;
32
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
33
@@ -XXX,XX +XXX,XX @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
34
29
uint32_t vq, old_vq;
35
#define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */
30
36
#define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
31
+ /* PSTATE.SM is always unset on syscall entry. */
37
+#define FPCR_LTPSIZE_LENGTH 3
32
old_vq = sve_vq(env);
38
33
39
#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
34
/*
40
#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
41
diff --git a/target/arm/machine.c b/target/arm/machine.c
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
42
index XXXXXXX..XXXXXXX 100644
95
index XXXXXXX..XXXXXXX 100644
43
--- a/target/arm/machine.c
96
--- a/linux-user/syscall.c
44
+++ b/target/arm/machine.c
97
+++ b/linux-user/syscall.c
45
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_m_mve = {
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
46
.needed = mve_needed,
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
47
.fields = (VMStateField[]) {
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
48
VMSTATE_UINT32(env.v7m.vpr, ARMCPU),
101
#endif
49
+ VMSTATE_UINT32(env.v7m.ltpsize, ARMCPU),
102
+#ifndef PR_SME_SET_VL
50
VMSTATE_END_OF_LIST()
103
+# define PR_SME_SET_VL 63
51
},
104
+# define PR_SME_GET_VL 64
52
};
105
+# define PR_SME_VL_LEN_MASK 0xffff
53
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
106
+# define PR_SME_VL_INHERIT (1 << 17)
54
index XXXXXXX..XXXXXXX 100644
107
+#endif
55
--- a/target/arm/vfp_helper.c
108
56
+++ b/target/arm/vfp_helper.c
109
#include "target_prctl.h"
57
@@ -XXX,XX +XXX,XX @@ uint32_t vfp_get_fpscr(CPUARMState *env)
110
58
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
59
void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
112
#ifndef do_prctl_set_unalign
60
{
113
#define do_prctl_set_unalign do_prctl_inval1
61
+ ARMCPU *cpu = env_archcpu(env);
114
#endif
62
+
115
+#ifndef do_prctl_sme_get_vl
63
/* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
116
+#define do_prctl_sme_get_vl do_prctl_inval0
64
- if (!cpu_isar_feature(any_fp16, env_archcpu(env))) {
117
+#endif
65
+ if (!cpu_isar_feature(any_fp16, cpu)) {
118
+#ifndef do_prctl_sme_set_vl
66
val &= ~FPCR_FZ16;
119
+#define do_prctl_sme_set_vl do_prctl_inval1
67
}
120
+#endif
68
121
69
@@ -XXX,XX +XXX,XX @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
* because in v7A no-short-vector-support cores still had to
123
abi_long arg3, abi_long arg4, abi_long arg5)
71
* allow Stride/Len to be written with the only effect that
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
72
* some insns are required to UNDEF if the guest sets them.
125
return do_prctl_sve_get_vl(env);
73
- *
126
case PR_SVE_SET_VL:
74
- * TODO: if M-profile MVE implemented, set LTPSIZE.
127
return do_prctl_sve_set_vl(env, arg2);
75
*/
128
+ case PR_SME_GET_VL:
76
env->vfp.vec_len = extract32(val, 16, 3);
129
+ return do_prctl_sme_get_vl(env);
77
env->vfp.vec_stride = extract32(val, 20, 2);
130
+ case PR_SME_SET_VL:
78
+ } else if (cpu_isar_feature(aa32_mve, cpu)) {
131
+ return do_prctl_sme_set_vl(env, arg2);
79
+ env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
132
case PR_PAC_RESET_KEYS:
80
+ FPCR_LTPSIZE_LENGTH);
133
if (arg3 || arg4 || arg5) {
81
}
134
return -TARGET_EINVAL;
82
83
if (arm_feature(env, ARM_FEATURE_NEON)) {
84
--
135
--
85
2.20.1
136
2.25.1
86
87
diff view generated by jsdifflib
1
The fp_sysreg_checks() function is supposed to be returning an
1
From: Richard Henderson <richard.henderson@linaro.org>
2
FPSysRegCheckResult, which is an enum with three possible values.
3
However, three places in the function "return false" (a hangover from
4
a previous iteration of the design where the function just returned a
5
bool). Make these return FPSysRegCheckFailed instead (for no
6
functional change, since both false and FPSysRegCheckFailed are
7
zero).
8
2
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20210520152840.24453-6-peter.maydell@linaro.org
12
---
9
---
13
target/arm/translate-vfp.c | 6 +++---
10
target/arm/cpu.c | 7 +++----
14
1 file changed, 3 insertions(+), 3 deletions(-)
11
1 file changed, 3 insertions(+), 4 deletions(-)
15
12
16
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate-vfp.c
15
--- a/target/arm/cpu.c
19
+++ b/target/arm/translate-vfp.c
16
+++ b/target/arm/cpu.c
20
@@ -XXX,XX +XXX,XX @@ static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
21
break;
18
/* and to the FP/Neon instructions */
22
case ARM_VFP_FPSCR_NZCVQC:
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
20
CPACR_EL1, FPEN, 3);
24
- return false;
21
- /* and to the SVE instructions */
25
+ return FPSysRegCheckFailed;
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
- CPACR_EL1, ZEN, 3);
24
- /* with reasonable vector length */
25
+ /* and to the SVE instructions, with default vector length */
26
if (cpu_isar_feature(aa64_sve, cpu)) {
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
+ CPACR_EL1, ZEN, 3);
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
26
}
30
}
27
break;
31
/*
28
case ARM_VFP_FPCXT_S:
29
case ARM_VFP_FPCXT_NS:
30
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
31
- return false;
32
+ return FPSysRegCheckFailed;
33
}
34
if (!s->v8m_secure) {
35
- return false;
36
+ return FPSysRegCheckFailed;
37
}
38
break;
39
default:
40
--
32
--
41
2.20.1
33
2.25.1
42
43
diff view generated by jsdifflib
1
Currently we allow board models to specify the initial value of the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
Secure VTOR register, using an init-svtor property on the TYPE_ARMV7M
3
object which is plumbed through to the CPU. Allow board models to
4
also specify the initial value of the Non-secure VTOR via a similar
5
init-nsvtor property.
6
2
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20210520152840.24453-10-peter.maydell@linaro.org
10
---
9
---
11
include/hw/arm/armv7m.h | 2 ++
10
target/arm/cpu.c | 11 +++++++++++
12
target/arm/cpu.h | 2 ++
11
1 file changed, 11 insertions(+)
13
hw/arm/armv7m.c | 7 +++++++
14
target/arm/cpu.c | 10 ++++++++++
15
4 files changed, 21 insertions(+)
16
12
17
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/hw/arm/armv7m.h
20
+++ b/include/hw/arm/armv7m.h
21
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M)
22
* devices will be automatically layered on top of this view.)
23
* + Property "idau": IDAU interface (forwarded to CPU object)
24
* + Property "init-svtor": secure VTOR reset value (forwarded to CPU object)
25
+ * + Property "init-nsvtor": non-secure VTOR reset value (forwarded to CPU object)
26
* + Property "vfp": enable VFP (forwarded to CPU object)
27
* + Property "dsp": enable DSP (forwarded to CPU object)
28
* + Property "enable-bitband": expose bitbanded IO
29
@@ -XXX,XX +XXX,XX @@ struct ARMv7MState {
30
MemoryRegion *board_memory;
31
Object *idau;
32
uint32_t init_svtor;
33
+ uint32_t init_nsvtor;
34
bool enable_bitband;
35
bool start_powered_off;
36
bool vfp;
37
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/arm/cpu.h
40
+++ b/target/arm/cpu.h
41
@@ -XXX,XX +XXX,XX @@ struct ARMCPU {
42
43
/* For v8M, initial value of the Secure VTOR */
44
uint32_t init_svtor;
45
+ /* For v8M, initial value of the Non-secure VTOR */
46
+ uint32_t init_nsvtor;
47
48
/* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
49
* QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
50
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/hw/arm/armv7m.c
53
+++ b/hw/arm/armv7m.c
54
@@ -XXX,XX +XXX,XX @@ static void armv7m_realize(DeviceState *dev, Error **errp)
55
return;
56
}
57
}
58
+ if (object_property_find(OBJECT(s->cpu), "init-nsvtor")) {
59
+ if (!object_property_set_uint(OBJECT(s->cpu), "init-nsvtor",
60
+ s->init_nsvtor, errp)) {
61
+ return;
62
+ }
63
+ }
64
if (object_property_find(OBJECT(s->cpu), "start-powered-off")) {
65
if (!object_property_set_bool(OBJECT(s->cpu), "start-powered-off",
66
s->start_powered_off, errp)) {
67
@@ -XXX,XX +XXX,XX @@ static Property armv7m_properties[] = {
68
MemoryRegion *),
69
DEFINE_PROP_LINK("idau", ARMv7MState, idau, TYPE_IDAU_INTERFACE, Object *),
70
DEFINE_PROP_UINT32("init-svtor", ARMv7MState, init_svtor, 0),
71
+ DEFINE_PROP_UINT32("init-nsvtor", ARMv7MState, init_nsvtor, 0),
72
DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false),
73
DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off,
74
false),
75
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
76
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/cpu.c
15
--- a/target/arm/cpu.c
78
+++ b/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
79
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
80
env->regs[14] = 0xffffffff;
18
CPACR_EL1, ZEN, 3);
81
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
82
env->v7m.vecbase[M_REG_S] = cpu->init_svtor & 0xffffff80;
20
}
83
+ env->v7m.vecbase[M_REG_NS] = cpu->init_nsvtor & 0xffffff80;
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
84
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
85
/* Load the initial SP and PC from offset 0 and 4 in the vector table */
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
86
vecbase = env->v7m.vecbase[env->v7m.secure];
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
87
@@ -XXX,XX +XXX,XX @@ void arm_cpu_post_init(Object *obj)
25
+ CPACR_EL1, SMEN, 3);
88
&cpu->init_svtor,
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
89
OBJ_PROP_FLAG_READWRITE);
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
90
}
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
91
+ if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
29
+ SMCR, FA64, 1);
92
+ /*
30
+ }
93
+ * Initial value of the NS VTOR (for cores without the Security
31
+ }
94
+ * extension, this is the only VTOR)
32
/*
95
+ */
33
* Enable 48-bit address space (TODO: take reserved_va into account).
96
+ object_property_add_uint32_ptr(obj, "init-nsvtor",
34
* Enable TBI0 but not TBI1.
97
+ &cpu->init_nsvtor,
98
+ OBJ_PROP_FLAG_READWRITE);
99
+ }
100
101
qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property);
102
103
--
35
--
104
2.20.1
36
2.25.1
105
106
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20210525225817.400336-12-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
7
---
8
linux-user/elfload.c | 2 ++
8
linux-user/elfload.c | 20 ++++++++++++++++++++
9
1 file changed, 2 insertions(+)
9
1 file changed, 20 insertions(+)
10
10
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/elfload.c
13
--- a/linux-user/elfload.c
14
+++ b/linux-user/elfload.c
14
+++ b/linux-user/elfload.c
15
@@ -XXX,XX +XXX,XX @@ enum {
16
ARM_HWCAP2_A64_RNG = 1 << 16,
17
ARM_HWCAP2_A64_BTI = 1 << 17,
18
ARM_HWCAP2_A64_MTE = 1 << 18,
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
31
};
32
33
#define ELF_HWCAP get_elf_hwcap()
15
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
16
GET_FEATURE_ID(aa64_sve_i8mm, ARM_HWCAP2_A64_SVEI8MM);
17
GET_FEATURE_ID(aa64_sve_f32mm, ARM_HWCAP2_A64_SVEF32MM);
18
GET_FEATURE_ID(aa64_sve_f64mm, ARM_HWCAP2_A64_SVEF64MM);
19
+ GET_FEATURE_ID(aa64_sve_bf16, ARM_HWCAP2_A64_SVEBF16);
20
GET_FEATURE_ID(aa64_i8mm, ARM_HWCAP2_A64_I8MM);
21
+ GET_FEATURE_ID(aa64_bf16, ARM_HWCAP2_A64_BF16);
22
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
23
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
24
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
39
+ ARM_HWCAP2_A64_SME_F32F32 |
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
25
--
49
--
26
2.20.1
50
2.25.1
27
28
diff view generated by jsdifflib