1
The big thing in here is RTH's caching-of-tb-flags patchset
1
I don't have anything else queued up at the moment, so this is just
2
which should improve TCG performance.
2
Richard's SME patches.
3
3
4
thanks
5
-- PMM
4
-- PMM
6
5
7
The following changes since commit 2152e740a8938b3bad73bfe1a01f8b94dab02d41:
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
8
7
9
Merge remote-tracking branch 'remotes/vivier2/tags/trivial-branch-pull-request' into staging (2019-10-22 12:03:03 +0100)
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
10
9
11
are available in the Git repository at:
10
are available in the Git repository at:
12
11
13
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20191022
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
14
13
15
for you to fetch changes up to 833043a060f7d0e95ded88e61e992466305c0345:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
16
15
17
hw/arm/digic4: Inline digic4_board_setup_ram() function (2019-10-22 14:21:57 +0100)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
18
17
19
----------------------------------------------------------------
18
----------------------------------------------------------------
20
target-arm queue:
19
target-arm:
21
* Fix sign-extension for SMLAL* instructions
20
* Implement SME emulation, for both system and linux-user
22
* aspeed: Add an AST2600 eval board
23
* Various ptimer device conversions to new transaction API
24
* Cache TB flags to avoid expensively recomputing them every time
25
* Add a dummy Samsung SDHCI controller model to exynos4 boards
26
* Minor refactorings of RAM creation for some arm boards
27
21
28
----------------------------------------------------------------
22
----------------------------------------------------------------
29
Cédric Le Goater (1):
23
Richard Henderson (45):
30
aspeed: Add an AST2600 eval board
24
target/arm: Handle SME in aarch64_cpu_dump_state
25
target/arm: Add infrastructure for disas_sme
26
target/arm: Trap non-streaming usage when Streaming SVE is active
27
target/arm: Mark ADR as non-streaming
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
31
69
32
Guenter Roeck (1):
70
docs/system/arm/emulation.rst | 4 +
33
hw/timer/exynos4210_mct: Initialize ptimer before starting it
71
linux-user/aarch64/target_cpu.h | 5 +-
34
72
linux-user/aarch64/target_prctl.h | 62 +-
35
Peter Maydell (7):
73
target/arm/cpu.h | 7 +
36
hw/timer/arm_mptimer.c: Undo accidental rename of arm_mptimer_init()
74
target/arm/helper-sme.h | 126 ++++
37
hw/timer/puv3_ost.c: Switch to transaction-based ptimer API
75
target/arm/helper-sve.h | 4 +
38
hw/timer/sh_timer: Switch to transaction-based ptimer API
76
target/arm/helper.h | 18 +
39
hw/timer/lm32_timer: Switch to transaction-based ptimer API
77
target/arm/translate-a64.h | 45 ++
40
hw/timer/altera_timer.c: Switch to transaction-based ptimer API
78
target/arm/translate.h | 16 +
41
hw/watchdog/etraxfs_timer.c: Switch to transaction-based ptimer API
79
target/arm/sme-fa64.decode | 60 ++
42
hw/m68k/mcf5208.c: Switch to transaction-based ptimer API
80
target/arm/sme.decode | 88 +++
43
81
target/arm/sve.decode | 41 +-
44
Philippe Mathieu-Daudé (9):
82
linux-user/aarch64/cpu_loop.c | 9 +
45
hw/sd/sdhci: Add a comment to distinct the i.MX eSDHC functions
83
linux-user/aarch64/signal.c | 243 ++++++--
46
hw/sd/sdhci: Add dummy Samsung SDHCI controller
84
linux-user/elfload.c | 20 +
47
hw/arm/exynos4210: Use the Samsung s3c SDHCI controller
85
linux-user/syscall.c | 28 +-
48
hw/arm/xilinx_zynq: Use the IEC binary prefix definitions
86
target/arm/cpu.c | 35 +-
49
hw/arm/mps2: Use the IEC binary prefix definitions
87
target/arm/cpu64.c | 11 +
50
hw/arm/collie: Create the RAM in the board
88
target/arm/helper.c | 56 +-
51
hw/arm/omap2: Create the RAM in the board
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
52
hw/arm/omap1: Create the RAM in the board
90
target/arm/sve_helper.c | 28 +
53
hw/arm/digic4: Inline digic4_board_setup_ram() function
91
target/arm/translate-a64.c | 103 +++-
54
92
target/arm/translate-sme.c | 373 ++++++++++++
55
Richard Henderson (23):
93
target/arm/translate-sve.c | 393 ++++++++++---
56
target/arm: Fix sign-extension for SMLAL*
94
target/arm/translate-vfp.c | 12 +
57
target/arm: Split out rebuild_hflags_common
95
target/arm/translate.c | 2 +
58
target/arm: Split out rebuild_hflags_a64
96
target/arm/vec_helper.c | 24 +
59
target/arm: Split out rebuild_hflags_common_32
97
target/arm/meson.build | 3 +
60
target/arm: Split arm_cpu_data_is_big_endian
98
28 files changed, 2821 insertions(+), 135 deletions(-)
61
target/arm: Split out rebuild_hflags_m32
99
create mode 100644 target/arm/sme-fa64.decode
62
target/arm: Reduce tests vs M-profile in cpu_get_tb_cpu_state
100
create mode 100644 target/arm/sme.decode
63
target/arm: Split out rebuild_hflags_a32
101
create mode 100644 target/arm/translate-sme.c
64
target/arm: Split out rebuild_hflags_aprofile
65
target/arm: Hoist XSCALE_CPAR, VECLEN, VECSTRIDE in cpu_get_tb_cpu_state
66
target/arm: Simplify set of PSTATE_SS in cpu_get_tb_cpu_state
67
target/arm: Hoist computation of TBFLAG_A32.VFPEN
68
target/arm: Add arm_rebuild_hflags
69
target/arm: Split out arm_mmu_idx_el
70
target/arm: Hoist store to cs_base in cpu_get_tb_cpu_state
71
target/arm: Add HELPER(rebuild_hflags_{a32, a64, m32})
72
target/arm: Rebuild hflags at EL changes
73
target/arm: Rebuild hflags at MSR writes
74
target/arm: Rebuild hflags at CPSR writes
75
target/arm: Rebuild hflags at Xscale SCTLR writes
76
target/arm: Rebuild hflags for M-profile
77
target/arm: Rebuild hflags for M-profile NVIC
78
target/arm: Rely on hflags correct in cpu_get_tb_cpu_state
79
80
hw/arm/strongarm.h | 4 +-
81
include/hw/arm/aspeed.h | 1 +
82
include/hw/arm/omap.h | 10 +-
83
include/hw/sd/sdhci.h | 2 +
84
target/arm/cpu.h | 84 ++++++----
85
target/arm/helper.h | 4 +
86
target/arm/internals.h | 9 ++
87
hw/arm/aspeed.c | 23 +++
88
hw/arm/collie.c | 8 +-
89
hw/arm/digic_boards.c | 9 +-
90
hw/arm/exynos4210.c | 2 +-
91
hw/arm/mps2-tz.c | 3 +-
92
hw/arm/mps2.c | 3 +-
93
hw/arm/nseries.c | 10 +-
94
hw/arm/omap1.c | 12 +-
95
hw/arm/omap2.c | 13 +-
96
hw/arm/omap_sx1.c | 8 +-
97
hw/arm/palm.c | 8 +-
98
hw/arm/strongarm.c | 7 +-
99
hw/arm/xilinx_zynq.c | 3 +-
100
hw/intc/armv7m_nvic.c | 22 +--
101
hw/m68k/mcf5208.c | 9 +-
102
hw/sd/sdhci.c | 68 +++++++-
103
hw/timer/altera_timer.c | 13 +-
104
hw/timer/arm_mptimer.c | 4 +-
105
hw/timer/etraxfs_timer.c | 23 +--
106
hw/timer/exynos4210_mct.c | 2 +-
107
hw/timer/lm32_timer.c | 13 +-
108
hw/timer/puv3_ost.c | 9 +-
109
hw/timer/sh_timer.c | 13 +-
110
linux-user/syscall.c | 1 +
111
target/arm/cpu.c | 1 +
112
target/arm/helper-a64.c | 3 +
113
target/arm/helper.c | 393 +++++++++++++++++++++++++++++----------------
114
target/arm/m_helper.c | 6 +
115
target/arm/machine.c | 1 +
116
target/arm/op_helper.c | 4 +
117
target/arm/translate-a64.c | 13 +-
118
target/arm/translate.c | 37 ++++-
119
39 files changed, 588 insertions(+), 270 deletions(-)
120
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/cpu.c | 17 ++++++++++++++++-
11
1 file changed, 16 insertions(+), 1 deletion(-)
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
18
int i;
19
int el = arm_current_el(env);
20
const char *ns_status;
21
+ bool sve;
22
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
24
for (i = 0; i < 32; i++) {
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
26
el,
27
psr & PSTATE_SP ? 'h' : 't');
28
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
31
+ env->svcr,
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
+ }
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
37
}
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
41
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
44
+ sve = sme_exception_el(env, el) == 0;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
49
+ }
50
+
51
+ if (sve) {
52
int j, zcr_len = sve_vqm1_for_el(env, el);
53
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
55
--
56
2.25.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Having the RAM creation code in a separate function is not
3
This includes the build rules for the decoder, and the
4
very helpful. Move this code directly inside the board_init()
4
new file for translation, but excludes any instructions.
5
function, this will later allow the board to have the QOM
6
ownership of the RAM.
7
5
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
11
Message-id: 20191021190653.9511-7-philmd@redhat.com
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
hw/arm/digic_boards.c | 9 ++-------
11
target/arm/translate-a64.h | 1 +
15
1 file changed, 2 insertions(+), 7 deletions(-)
12
target/arm/sme.decode | 20 ++++++++++++++++++++
13
target/arm/translate-a64.c | 7 ++++++-
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
15
target/arm/meson.build | 2 ++
16
5 files changed, 64 insertions(+), 1 deletion(-)
17
create mode 100644 target/arm/sme.decode
18
create mode 100644 target/arm/translate-sme.c
16
19
17
diff --git a/hw/arm/digic_boards.c b/hw/arm/digic_boards.c
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
18
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/arm/digic_boards.c
22
--- a/target/arm/translate-a64.h
20
+++ b/hw/arm/digic_boards.c
23
+++ b/target/arm/translate-a64.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct DigicBoard {
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
22
const char *rom1_def_filename;
25
}
23
} DigicBoard;
26
24
27
bool disas_sve(DisasContext *, uint32_t);
25
-static void digic4_board_setup_ram(DigicBoardState *s, hwaddr ram_size)
28
+bool disas_sme(DisasContext *, uint32_t);
26
-{
29
27
- memory_region_allocate_system_memory(&s->ram, NULL, "ram", ram_size);
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
28
- memory_region_add_subregion(get_system_memory(), 0, &s->ram);
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
29
-}
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
30
-
33
new file mode 100644
31
static void digic4_board_init(DigicBoard *board)
34
index XXXXXXX..XXXXXXX
32
{
35
--- /dev/null
33
Error *err = NULL;
36
+++ b/target/arm/sme.decode
34
@@ -XXX,XX +XXX,XX @@ static void digic4_board_init(DigicBoard *board)
37
@@ -XXX,XX +XXX,XX @@
35
exit(1);
38
+# AArch64 SME instruction descriptions
39
+#
40
+# Copyright (c) 2022 Linaro, Ltd
41
+#
42
+# This library is free software; you can redistribute it and/or
43
+# modify it under the terms of the GNU Lesser General Public
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
47
+# This library is distributed in the hope that it will be useful,
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
54
+
55
+#
56
+# This file is processed by scripts/decodetree.py
57
+#
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
36
}
63
}
37
64
38
- digic4_board_setup_ram(s, board->ram_size);
65
switch (extract32(insn, 25, 4)) {
39
+ memory_region_allocate_system_memory(&s->ram, NULL, "ram", board->ram_size);
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
40
+ memory_region_add_subregion(get_system_memory(), 0, &s->ram);
67
+ case 0x0:
41
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
42
if (board->add_rom0) {
69
+ unallocated_encoding(s);
43
board->add_rom0(s, DIGIC4_ROM0_BASE, board->rom0_def_filename);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/translate-sme.c
81
@@ -XXX,XX +XXX,XX @@
82
+/*
83
+ * AArch64 SME translation
84
+ *
85
+ * Copyright (c) 2022 Linaro, Ltd
86
+ *
87
+ * This library is free software; you can redistribute it and/or
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
91
+ *
92
+ * This library is distributed in the hope that it will be useful,
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
99
+ */
100
+
101
+#include "qemu/osdep.h"
102
+#include "cpu.h"
103
+#include "tcg/tcg-op.h"
104
+#include "tcg/tcg-op-gvec.h"
105
+#include "tcg/tcg-gvec-desc.h"
106
+#include "translate.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
110
+
111
+
112
+/*
113
+ * Include the generated decoder.
114
+ */
115
+
116
+#include "decode-sme.c.inc"
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
118
index XXXXXXX..XXXXXXX 100644
119
--- a/target/arm/meson.build
120
+++ b/target/arm/meson.build
121
@@ -XXX,XX +XXX,XX @@
122
gen = [
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
129
'sme_helper.c',
130
'translate-a64.c',
131
'translate-sve.c',
132
+ 'translate-sme.c',
133
))
134
135
arm_softmmu_ss = ss.source_set()
44
--
136
--
45
2.20.1
137
2.25.1
46
47
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create a function to compute the values of the TBFLAG_ANY bits
3
This new behaviour is in the ARM pseudocode function
4
that will be cached. For now, the env->hflags variable is not
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
used, and the results are fed back to cpu_get_tb_cpu_state.
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
6
the trap would be delivered is in AArch64 mode.
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
detection ought to be trivially true, but the pseudocode still contains
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
support for EL[12] when v9 features are present.
12
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20191018174431.1784-2-richard.henderson@linaro.org
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
24
---
12
target/arm/cpu.h | 29 ++++++++++++++++++-----------
25
target/arm/cpu.h | 7 +++
13
target/arm/helper.c | 26 +++++++++++++++++++-------
26
target/arm/translate.h | 4 ++
14
2 files changed, 37 insertions(+), 18 deletions(-)
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
28
target/arm/helper.c | 41 +++++++++++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
30
target/arm/translate-vfp.c | 12 +++++
31
target/arm/translate.c | 2 +
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
15
35
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu.h
38
--- a/target/arm/cpu.h
19
+++ b/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
20
@@ -XXX,XX +XXX,XX @@ typedef struct CPUARMState {
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
21
uint32_t pstate;
22
uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
23
24
+ /* Cached TBFLAGS state. See below for which bits are included. */
25
+ uint32_t hflags;
26
+
27
/* Frequently accessed CPSR bits are stored separately for efficiency.
28
This contains all the other bits. Use cpsr_{read,write} to access
29
the whole CPSR. */
30
@@ -XXX,XX +XXX,XX @@ typedef ARMCPU ArchCPU;
31
32
#include "exec/cpu-all.h"
33
34
-/* Bit usage in the TB flags field: bit 31 indicates whether we are
35
+/*
36
+ * Bit usage in the TB flags field: bit 31 indicates whether we are
37
* in 32 or 64 bit mode. The meaning of the other bits depends on that.
38
* We put flags which are shared between 32 and 64 bit mode at the top
39
* of the word, and flags which apply to only one mode at the bottom.
40
+ *
41
+ * Unless otherwise noted, these bits are cached in env->hflags.
42
*/
43
FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1)
44
FIELD(TBFLAG_ANY, MMUIDX, 28, 3)
45
FIELD(TBFLAG_ANY, SS_ACTIVE, 27, 1)
46
-FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1)
47
+FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1) /* Not cached. */
48
/* Target EL if we take a floating-point-disabled exception */
49
FIELD(TBFLAG_ANY, FPEXC_EL, 24, 2)
50
FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
51
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
52
FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 21, 2)
53
54
/* Bit usage when in AArch32 state: */
55
-FIELD(TBFLAG_A32, THUMB, 0, 1)
56
-FIELD(TBFLAG_A32, VECLEN, 1, 3)
57
-FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)
58
+FIELD(TBFLAG_A32, THUMB, 0, 1) /* Not cached. */
59
+FIELD(TBFLAG_A32, VECLEN, 1, 3) /* Not cached. */
60
+FIELD(TBFLAG_A32, VECSTRIDE, 4, 2) /* Not cached. */
61
/*
62
* We store the bottom two bits of the CPAR as TB flags and handle
63
* checks on the other bits at runtime. This shares the same bits as
64
* VECSTRIDE, which is OK as no XScale CPU has VFP.
65
+ * Not cached, because VECLEN+VECSTRIDE are not cached.
66
*/
67
FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
68
/*
69
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
70
* the same thing as the current security state of the processor!
41
* the same thing as the current security state of the processor!
71
*/
42
*/
72
FIELD(TBFLAG_A32, NS, 6, 1)
43
FIELD(TBFLAG_A32, NS, 10, 1)
73
-FIELD(TBFLAG_A32, VFPEN, 7, 1)
44
+/*
74
-FIELD(TBFLAG_A32, CONDEXEC, 8, 8)
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
75
+FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Not cached. */
46
+ * This requires an SME trap from AArch32 mode when using NEON.
76
+FIELD(TBFLAG_A32, CONDEXEC, 8, 8) /* Not cached. */
47
+ */
77
FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
78
/* For M profile only, set if FPCCR.LSPACT is set */
49
79
-FIELD(TBFLAG_A32, LSPACT, 18, 1)
50
/*
80
+FIELD(TBFLAG_A32, LSPACT, 18, 1) /* Not cached. */
51
* Bit usage when in AArch32 state, for M-profile only.
81
/* For M profile only, set if we must create a new FP context */
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
82
-FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1)
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
83
+FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1) /* Not cached. */
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
84
/* For M profile only, set if FPCCR.S does not match current security state */
55
FIELD(TBFLAG_A64, SVL, 24, 4)
85
-FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
86
+FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1) /* Not cached. */
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
87
/* For M profile only, Handler (ie not Thread) mode */
58
88
FIELD(TBFLAG_A32, HANDLER, 21, 1)
59
/*
89
/* For M profile only, whether we should generate stack-limit checks */
60
* Helpers for using the above.
90
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
91
FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
62
index XXXXXXX..XXXXXXX 100644
92
FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
63
--- a/target/arm/translate.h
93
FIELD(TBFLAG_A64, BT, 9, 1)
64
+++ b/target/arm/translate.h
94
-FIELD(TBFLAG_A64, BTYPE, 10, 2)
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
95
+FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
66
bool pstate_sm;
96
FIELD(TBFLAG_A64, TBID, 12, 2)
67
/* True if PSTATE.ZA is set. */
97
68
bool pstate_za;
98
static inline bool bswap_code(bool sctlr_b)
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
75
/*
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/sme-fa64.decode
81
@@ -XXX,XX +XXX,XX @@
82
+# AArch64 SME allowed instruction decoding
83
+#
84
+# Copyright (c) 2022 Linaro, Ltd
85
+#
86
+# This library is free software; you can redistribute it and/or
87
+# modify it under the terms of the GNU Lesser General Public
88
+# License as published by the Free Software Foundation; either
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
99
diff --git a/target/arm/helper.c b/target/arm/helper.c
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
100
index XXXXXXX..XXXXXXX 100644
173
index XXXXXXX..XXXXXXX 100644
101
--- a/target/arm/helper.c
174
--- a/target/arm/helper.c
102
+++ b/target/arm/helper.c
175
+++ b/target/arm/helper.c
103
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
104
}
178
}
105
#endif
179
106
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
107
+static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
181
+static bool sme_fa64(CPUARMState *env, int el)
108
+ ARMMMUIdx mmu_idx, uint32_t flags)
182
+{
109
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
110
+ flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
184
+ return false;
111
+ flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
185
+ }
112
+ arm_to_core_mmu_idx(mmu_idx));
186
+
113
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
114
+ if (arm_cpu_data_is_big_endian(env)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
115
+ flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
189
+ return false;
116
+ }
190
+ }
117
+ if (arm_singlestep_active(env)) {
191
+ }
118
+ flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
119
+ }
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
120
+ return flags;
194
+ return false;
121
+}
195
+ }
122
+
196
+ }
123
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
124
target_ulong *cs_base, uint32_t *pflags)
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
211
}
212
213
+ /*
214
+ * The SME exception we are testing for is raised via
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
217
+ */
218
+ if (el == 0
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
220
+ && (!arm_is_el2_enabled(env)
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
225
+ }
226
+
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
228
}
229
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
231
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
235
}
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
237
}
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/arm/translate-a64.c
241
+++ b/target/arm/translate-a64.c
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
243
* unallocated-encoding checks (otherwise the syndrome information
244
* for the resulting exception will be incorrect).
245
*/
246
-static bool fp_access_check(DisasContext *s)
247
+static bool fp_access_check_only(DisasContext *s)
125
{
248
{
126
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
249
if (s->fp_excp_el) {
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
253
}
254
255
+static bool fp_access_check(DisasContext *s)
256
+{
257
+ if (!fp_access_check_only(s)) {
258
+ return false;
259
+ }
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
262
+ syn_smetrap(SME_ET_Streaming, false));
263
+ return false;
264
+ }
265
+ return true;
266
+}
267
+
268
/* Check that SVE access is enabled. If it is, return true.
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
277
return;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
279
return;
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
282
}
283
284
+/*
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
309
dc->vec_len = 0;
310
dc->vec_stride = 0;
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
127
}
313
}
128
}
314
}
129
315
130
- flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
316
+ s->is_nonstreaming = false;
131
+ flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
317
+ if (s->sme_trap_nonstreaming) {
132
318
+ disas_sme_fa64(s, insn);
133
/* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
319
+ }
134
* states defined in the ARM ARM for software singlestep:
320
+
135
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
321
switch (extract32(insn, 25, 4)) {
136
* 0 x Inactive (the TB flag for SS is always 0)
322
case 0x0:
137
* 1 0 Active-pending
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
138
* 1 1 Active-not-pending
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
139
+ * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
325
index XXXXXXX..XXXXXXX 100644
140
*/
326
--- a/target/arm/translate-vfp.c
141
- if (arm_singlestep_active(env)) {
327
+++ b/target/arm/translate-vfp.c
142
- flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
143
+ if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE)) {
329
return false;
144
if (is_a64(env)) {
330
}
145
if (env->pstate & PSTATE_SS) {
331
146
flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
332
+ /*
147
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
148
}
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/arm/translate.c
350
+++ b/target/arm/translate.c
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
149
}
354
}
150
}
355
+ dc->sme_trap_nonstreaming =
151
- if (arm_cpu_data_is_big_endian(env)) {
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
152
- flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
357
}
153
- }
358
dc->cp_regs = cpu->cp_regs;
154
- flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
359
dc->features = env->features;
155
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
156
if (arm_v7m_is_handler_mode(env)) {
361
index XXXXXXX..XXXXXXX 100644
157
flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
362
--- a/target/arm/meson.build
363
+++ b/target/arm/meson.build
364
@@ -XXX,XX +XXX,XX @@
365
gen = [
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
158
--
372
--
159
2.20.1
373
2.25.1
160
161
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The SDRAM is incorrectly created in the OMAP310 SoC.
3
Mark ADR as a non-streaming instruction, which should trap
4
Move its creation in the board code, this will later allow the
4
if full a64 support is not enabled in streaming mode.
5
board to have the QOM ownership of the RAM.
6
5
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Removing entries from sme-fa64.decode is an easy way to see
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
what remains to be done.
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
10
Message-id: 20191021190653.9511-6-philmd@redhat.com
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
13
---
13
include/hw/arm/omap.h | 6 ++----
14
target/arm/translate.h | 7 +++++++
14
hw/arm/omap1.c | 12 +++++-------
15
target/arm/sme-fa64.decode | 1 -
15
hw/arm/omap_sx1.c | 8 ++++++--
16
target/arm/translate-sve.c | 8 ++++----
16
hw/arm/palm.c | 8 ++++++--
17
3 files changed, 11 insertions(+), 5 deletions(-)
17
4 files changed, 19 insertions(+), 15 deletions(-)
18
18
19
diff --git a/include/hw/arm/omap.h b/include/hw/arm/omap.h
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/hw/arm/omap.h
21
--- a/target/arm/translate.h
22
+++ b/include/hw/arm/omap.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s {
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
24
MemoryRegion mpui_io_iomem;
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
25
MemoryRegion tap_iomem;
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
26
MemoryRegion imif_ram;
26
27
- MemoryRegion emiff_ram;
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
28
MemoryRegion sram;
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
29
29
+ { \
30
struct omap_dma_port_if_s {
30
+ s->is_nonstreaming = true; \
31
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s {
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
hwaddr addr);
32
+ }
33
} port[__omap_dma_port_last];
33
+
34
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
- unsigned long sdram_size;
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
+ uint64_t sdram_size;
37
unsigned long sram_size;
38
39
/* MPUI-TIPB peripherals */
40
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s {
41
};
42
43
/* omap1.c */
44
-struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
45
- unsigned long sdram_size,
46
+struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *sdram,
47
const char *core);
48
49
/* omap2.c */
50
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
51
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
52
--- a/hw/arm/omap1.c
37
--- a/target/arm/sme-fa64.decode
53
+++ b/hw/arm/omap1.c
38
+++ b/target/arm/sme-fa64.decode
54
@@ -XXX,XX +XXX,XX @@
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
55
#include "qapi/error.h"
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
56
#include "qemu-common.h"
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
57
#include "cpu.h"
42
58
+#include "exec/address-spaces.h"
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
59
#include "hw/boards.h"
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
60
#include "hw/hw.h"
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
61
#include "hw/irq.h"
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
62
@@ -XXX,XX +XXX,XX @@ static int omap_validate_tipb_mpui_addr(struct omap_mpu_state_s *s,
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
63
return range_covers_byte(0xe1010000, 0xe1020004 - 0xe1010000, addr);
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
64
}
53
}
65
54
66
-struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
67
- unsigned long sdram_size,
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
68
+struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *dram,
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
69
const char *cpu_type)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
70
{
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
71
int i;
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
72
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
73
qemu_irq dma_irqs[6];
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
74
DriveInfo *dinfo;
63
75
SysBusDevice *busdev;
64
/*
76
+ MemoryRegion *system_memory = get_system_memory();
65
*** SVE Integer Misc - Unpredicated Group
77
78
/* Core */
79
s->mpu_model = omap310;
80
s->cpu = ARM_CPU(cpu_create(cpu_type));
81
- s->sdram_size = sdram_size;
82
+ s->sdram_size = memory_region_size(dram);
83
s->sram_size = OMAP15XX_SRAM_SIZE;
84
85
s->wakeup = qemu_allocate_irq(omap_mpu_wakeup, s, 0);
86
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
87
omap_clk_init(s);
88
89
/* Memory-mapped stuff */
90
- memory_region_allocate_system_memory(&s->emiff_ram, NULL, "omap1.dram",
91
- s->sdram_size);
92
- memory_region_add_subregion(system_memory, OMAP_EMIFF_BASE, &s->emiff_ram);
93
memory_region_init_ram(&s->imif_ram, NULL, "omap1.sram", s->sram_size,
94
&error_fatal);
95
memory_region_add_subregion(system_memory, OMAP_IMIF_BASE, &s->imif_ram);
96
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
97
s->port[tipb_mpui].addr_valid = omap_validate_tipb_mpui_addr;
98
99
/* Register SDRAM and SRAM DMA ports for fast transfers. */
100
- soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(&s->emiff_ram),
101
+ soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(dram),
102
OMAP_EMIFF_BASE, s->sdram_size);
103
soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(&s->imif_ram),
104
OMAP_IMIF_BASE, s->sram_size);
105
diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/hw/arm/omap_sx1.c
108
+++ b/hw/arm/omap_sx1.c
109
@@ -XXX,XX +XXX,XX @@ static void sx1_init(MachineState *machine, const int version)
110
{
111
struct omap_mpu_state_s *mpu;
112
MemoryRegion *address_space = get_system_memory();
113
+ MemoryRegion *dram = g_new(MemoryRegion, 1);
114
MemoryRegion *flash = g_new(MemoryRegion, 1);
115
MemoryRegion *cs = g_new(MemoryRegion, 4);
116
static uint32_t cs0val = 0x00213090;
117
@@ -XXX,XX +XXX,XX @@ static void sx1_init(MachineState *machine, const int version)
118
flash_size = flash2_size;
119
}
120
121
- mpu = omap310_mpu_init(address_space, sx1_binfo.ram_size,
122
- machine->cpu_type);
123
+ memory_region_allocate_system_memory(dram, NULL, "omap1.dram",
124
+ sx1_binfo.ram_size);
125
+ memory_region_add_subregion(address_space, OMAP_EMIFF_BASE, dram);
126
+
127
+ mpu = omap310_mpu_init(dram, machine->cpu_type);
128
129
/* External Flash (EMIFS) */
130
memory_region_init_ram(flash, NULL, "omap_sx1.flash0-0", flash_size,
131
diff --git a/hw/arm/palm.c b/hw/arm/palm.c
132
index XXXXXXX..XXXXXXX 100644
133
--- a/hw/arm/palm.c
134
+++ b/hw/arm/palm.c
135
@@ -XXX,XX +XXX,XX @@ static void palmte_init(MachineState *machine)
136
MemoryRegion *address_space_mem = get_system_memory();
137
struct omap_mpu_state_s *mpu;
138
int flash_size = 0x00800000;
139
- int sdram_size = palmte_binfo.ram_size;
140
static uint32_t cs0val = 0xffffffff;
141
static uint32_t cs1val = 0x0000e1a0;
142
static uint32_t cs2val = 0x0000e1a0;
143
static uint32_t cs3val = 0xe1a0e1a0;
144
int rom_size, rom_loaded = 0;
145
+ MemoryRegion *dram = g_new(MemoryRegion, 1);
146
MemoryRegion *flash = g_new(MemoryRegion, 1);
147
MemoryRegion *cs = g_new(MemoryRegion, 4);
148
149
- mpu = omap310_mpu_init(address_space_mem, sdram_size, machine->cpu_type);
150
+ memory_region_allocate_system_memory(dram, NULL, "omap1.dram",
151
+ palmte_binfo.ram_size);
152
+ memory_region_add_subregion(address_space_mem, OMAP_EMIFF_BASE, dram);
153
+
154
+ mpu = omap310_mpu_init(dram, machine->cpu_type);
155
156
/* External Flash (EMIFS) */
157
memory_region_init_ram(flash, NULL, "palmte.flash", flash_size,
158
--
66
--
159
2.20.1
67
2.25.1
160
161
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Continue setting, but not relying upon, env->hflags.
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
4
5
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-22-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
hw/intc/armv7m_nvic.c | 22 +++++++++++++---------
11
target/arm/sme-fa64.decode | 2 --
12
1 file changed, 13 insertions(+), 9 deletions(-)
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
13
14
14
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/intc/armv7m_nvic.c
17
--- a/target/arm/sme-fa64.decode
17
+++ b/hw/intc/armv7m_nvic.c
18
+++ b/target/arm/sme-fa64.decode
18
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
19
}
20
20
}
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
21
nvic_irq_update(s);
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
22
- return MEMTX_OK;
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
23
+ goto exit_ok;
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
24
case 0x200 ... 0x23f: /* NVIC Set pend */
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
25
/* the special logic in armv7m_nvic_set_pending()
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
26
* is not needed since IRQs are never escalated
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
27
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
}
29
index XXXXXXX..XXXXXXX 100644
29
}
30
--- a/target/arm/translate-sve.c
30
nvic_irq_update(s);
31
+++ b/target/arm/translate-sve.c
31
- return MEMTX_OK;
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
32
+ goto exit_ok;
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
33
case 0x300 ... 0x33f: /* NVIC Active */
34
34
- return MEMTX_OK; /* R/O */
35
/* Note pat == 31 is #all, to set all elements. */
35
+ goto exit_ok; /* R/O */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
36
case 0x400 ... 0x5ef: /* NVIC Priority */
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
37
startvec = (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
38
39
39
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
40
/* Note pat == 32 is #unimp, to set no elements. */
40
}
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
41
}
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
42
nvic_irq_update(s);
43
.rd = a->rd, .pg = a->pg, .s = a->s,
43
- return MEMTX_OK;
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
44
+ goto exit_ok;
45
};
45
case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
46
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
47
- return MEMTX_OK;
48
+ goto exit_ok;
49
}
50
/* fall through */
51
case 0xd1c ... 0xd23: /* System Handler Priority (SHPR2, SHPR3) */
52
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
53
set_prio(s, hdlidx, sbank, newprio);
54
}
55
nvic_irq_update(s);
56
- return MEMTX_OK;
57
+ goto exit_ok;
58
case 0xd28 ... 0xd2b: /* Configurable Fault Status (CFSR) */
59
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
60
- return MEMTX_OK;
61
+ goto exit_ok;
62
}
63
/* All bits are W1C, so construct 32 bit value with 0s in
64
* the parts not written by the access size
65
@@ -XXX,XX +XXX,XX @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
66
*/
67
s->cpu->env.v7m.cfsr[M_REG_NS] &= ~(value & R_V7M_CFSR_BFSR_MASK);
68
}
69
- return MEMTX_OK;
70
+ goto exit_ok;
71
}
72
if (size == 4) {
73
nvic_writel(s, offset, value, attrs);
74
- return MEMTX_OK;
75
+ goto exit_ok;
76
}
77
qemu_log_mask(LOG_GUEST_ERROR,
78
"NVIC: Bad write of size %d at offset 0x%x\n", size, offset);
79
/* This is UNPREDICTABLE; treat as RAZ/WI */
80
+
46
+
81
+ exit_ok:
47
+ s->is_nonstreaming = true;
82
+ /* Ensure any changes made are reflected in the cached hflags. */
48
return trans_AND_pppp(s, &alt_a);
83
+ arm_rebuild_hflags(&s->cpu->env);
84
return MEMTX_OK;
85
}
49
}
86
50
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
87
--
58
--
88
2.20.1
59
2.25.1
89
90
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
34
NULL, gen_helper_sve_fexpa_h,
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
92
2.25.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
IEC binary prefixes ease code review: the unit is explicit.
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
4
5
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
8
Message-id: 20191021190653.9511-3-philmd@redhat.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
hw/arm/mps2-tz.c | 3 ++-
11
target/arm/sme-fa64.decode | 2 --
12
hw/arm/mps2.c | 3 ++-
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
13
2 files changed, 4 insertions(+), 2 deletions(-)
13
2 files changed, 15 insertions(+), 11 deletions(-)
14
14
15
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/arm/mps2-tz.c
17
--- a/target/arm/sme-fa64.decode
18
+++ b/hw/arm/mps2-tz.c
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
34
NULL, gen_helper_sve2_pmull_d,
35
};
36
- if (a->esz == 0
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
38
- : !dc_isar_feature(aa64_sve, s)) {
39
+
40
+ if (a->esz == 0) {
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
42
+ return false;
43
+ }
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
46
return false;
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
20
*/
51
*/
21
52
22
#include "qemu/osdep.h"
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
23
+#include "qemu/units.h"
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
24
#include "qapi/error.h"
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
25
#include "qemu/error-report.h"
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
26
#include "hw/arm/boot.h"
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
27
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
28
* call the 16MB our "system memory", as it's the largest lump.
59
+ 0, FPST_FPCR)
29
*/
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
30
memory_region_allocate_system_memory(&mms->psram,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
31
- NULL, "mps.ram", 0x01000000);
62
+ 0, FPST_FPCR)
32
+ NULL, "mps.ram", 16 * MiB);
63
33
memory_region_add_subregion(system_memory, 0x80000000, &mms->psram);
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
34
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
35
/* The overflow IRQs for all UARTs are ORed together.
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
36
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
37
index XXXXXXX..XXXXXXX 100644
68
gen_helper_gvec_bfdot_idx, a)
38
--- a/hw/arm/mps2.c
69
39
+++ b/hw/arm/mps2.c
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
40
@@ -XXX,XX +XXX,XX @@
71
- gen_helper_gvec_bfmmla, a, 0)
41
*/
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
42
73
+ gen_helper_gvec_bfmmla, a, 0)
43
#include "qemu/osdep.h"
74
44
+#include "qemu/units.h"
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
45
#include "qapi/error.h"
76
{
46
#include "qemu/error-report.h"
47
#include "hw/arm/boot.h"
48
@@ -XXX,XX +XXX,XX @@ static void mps2_common_init(MachineState *machine)
49
* zbt_boot_ctrl is always zero).
50
*/
51
memory_region_allocate_system_memory(&mms->psram,
52
- NULL, "mps.ram", 0x1000000);
53
+ NULL, "mps.ram", 16 * MiB);
54
memory_region_add_subregion(system_memory, 0x21000000, &mms->psram);
55
56
switch (mmc->fpga_type) {
57
--
77
--
58
2.20.1
78
2.25.1
59
60
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 15 +++++++++++----
13
2 files changed, 11 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
36
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
43
44
/*
45
*** SVE Floating Point Accumulating Reduction Group
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
48
return false;
49
}
50
+ s->is_nonstreaming = true;
51
if (!sve_access_check(s)) {
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
55
DO_FP3(FADD_zzz, fadd)
56
DO_FP3(FSUB_zzz, fsub)
57
DO_FP3(FMUL_zzz, fmul)
58
-DO_FP3(FTSMUL, ftsmul)
59
DO_FP3(FRECPS, recps)
60
DO_FP3(FRSQRTS, rsqrts)
61
62
#undef DO_FP3
63
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
65
+ NULL, gen_helper_gvec_ftsmul_h,
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
67
+};
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
69
+ ftsmul_fns[a->esz], a, 0)
70
+
71
/*
72
*** SVE Floating Point Arithmetic - Predicated Group
73
*/
74
--
75
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
50
--
51
2.25.1
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The SDRAM is incorrectly created in the OMAP2420 SoC.
3
Mark these as non-streaming instructions, which should trap
4
Move its creation in the board code, this will later allow the
4
if full a64 support is not enabled in streaming mode.
5
board to have the QOM ownership of the RAM.
6
5
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
10
Message-id: 20191021190653.9511-5-philmd@redhat.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
include/hw/arm/omap.h | 4 +---
11
target/arm/sme-fa64.decode | 1 -
14
hw/arm/nseries.c | 10 +++++++---
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
15
hw/arm/omap2.c | 13 +++++--------
13
2 files changed, 18 insertions(+), 18 deletions(-)
16
3 files changed, 13 insertions(+), 14 deletions(-)
17
14
18
diff --git a/include/hw/arm/omap.h b/include/hw/arm/omap.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/arm/omap.h
17
--- a/target/arm/sme-fa64.decode
21
+++ b/include/hw/arm/omap.h
18
+++ b/target/arm/sme-fa64.decode
22
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s {
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
23
MemoryRegion tap_iomem;
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
24
MemoryRegion imif_ram;
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
25
MemoryRegion emiff_ram;
22
26
- MemoryRegion sdram;
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
27
MemoryRegion sram;
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
28
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
29
struct omap_dma_port_if_s {
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
30
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
const char *core);
32
33
/* omap2.c */
34
-struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
35
- unsigned long sdram_size,
36
+struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sdram,
37
const char *core);
38
39
uint32_t omap_badwidth_read8(void *opaque, hwaddr addr);
40
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
41
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/arm/nseries.c
29
--- a/target/arm/translate-sve.c
43
+++ b/hw/arm/nseries.c
30
+++ b/target/arm/translate-sve.c
44
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
45
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
46
/* Nokia N8x0 support */
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
47
struct n800_s {
48
+ MemoryRegion sdram;
49
struct omap_mpu_state_s *mpu;
50
51
struct rfbi_chip_s blizzard;
52
@@ -XXX,XX +XXX,XX @@ static int n810_atag_setup(const struct arm_boot_info *info, void *p)
53
static void n8x0_init(MachineState *machine,
54
struct arm_boot_info *binfo, int model)
55
{
56
- MemoryRegion *sysmem = get_system_memory();
57
struct n800_s *s = (struct n800_s *) g_malloc0(sizeof(*s));
58
- int sdram_size = binfo->ram_size;
59
+ uint64_t sdram_size = binfo->ram_size;
60
61
- s->mpu = omap2420_mpu_init(sysmem, sdram_size, machine->cpu_type);
62
+ memory_region_allocate_system_memory(&s->sdram, NULL, "omap2.dram",
63
+ sdram_size);
64
+ memory_region_add_subregion(get_system_memory(), OMAP2_Q2_BASE, &s->sdram);
65
+
66
+ s->mpu = omap2420_mpu_init(&s->sdram, machine->cpu_type);
67
68
/* Setup peripherals
69
*
70
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/hw/arm/omap2.c
73
+++ b/hw/arm/omap2.c
74
@@ -XXX,XX +XXX,XX @@
75
#include "qemu/error-report.h"
76
#include "qapi/error.h"
77
#include "cpu.h"
78
+#include "exec/address-spaces.h"
79
#include "sysemu/blockdev.h"
80
#include "sysemu/qtest.h"
81
#include "sysemu/reset.h"
82
@@ -XXX,XX +XXX,XX @@ static const struct dma_irq_map omap2_dma_irq_map[] = {
83
{ 0, OMAP_INT_24XX_SDMA_IRQ3 },
84
};
34
};
85
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
86
-struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
87
- unsigned long sdram_size,
37
88
+struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sdram,
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
89
const char *cpu_type)
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
90
{
40
};
91
struct omap_mpu_state_s *s = g_new0(struct omap_mpu_state_s, 1);
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
92
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
93
int i;
43
94
SysBusDevice *busdev;
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
95
struct omap_target_agent_s *ta;
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
96
+ MemoryRegion *sysmem = get_system_memory();
46
};
97
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
98
/* Core */
48
- histcnt_fns[a->esz], a, 0)
99
s->mpu_model = omap2420;
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
100
s->cpu = ARM_CPU(cpu_create(cpu_type));
50
+ histcnt_fns[a->esz], a, 0)
101
- s->sdram_size = sdram_size;
51
102
s->sram_size = OMAP242X_SRAM_SIZE;
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
103
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
104
s->wakeup = qemu_allocate_irq(omap_mpu_wakeup, s, 0);
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
105
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
106
omap_clk_init(s);
56
107
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
108
/* Memory-mapped stuff */
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
109
- memory_region_allocate_system_memory(&s->sdram, NULL, "omap2.dram",
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
110
- s->sdram_size);
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
111
- memory_region_add_subregion(sysmem, OMAP2_Q2_BASE, &s->sdram);
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
112
memory_region_init_ram(&s->sram, NULL, "omap2.sram", s->sram_size,
62
113
&error_fatal);
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
114
memory_region_add_subregion(sysmem, OMAP2_SRAM_BASE, &s->sram);
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
115
@@ -XXX,XX +XXX,XX @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
116
s->port->addr_valid = omap2_validate_addr;
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
117
67
118
/* Register SDRAM and SRAM ports for fast DMA transfers. */
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
119
- soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(&s->sdram),
69
- gen_helper_crypto_aese, a, false)
120
- OMAP2_Q2_BASE, s->sdram_size);
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
121
+ soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(sdram),
71
- gen_helper_crypto_aese, a, true)
122
+ OMAP2_Q2_BASE, memory_region_size(sdram));
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
123
soc_dma_port_add_mem(s->dma, memory_region_get_ram_ptr(&s->sram),
73
+ gen_helper_crypto_aese, a, false)
124
OMAP2_SRAM_BASE, s->sram_size);
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
125
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
126
--
92
--
127
2.20.1
93
2.25.1
128
129
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The SDRAM is incorrectly created in the SA1110 SoC.
3
Mark these as a non-streaming instructions, which should trap
4
Move its creation in the board code, this will later allow the
4
if full a64 support is not enabled in streaming mode.
5
board to have the QOM ownership of the RAM.
6
5
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
10
Message-id: 20191021190653.9511-4-philmd@redhat.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
hw/arm/strongarm.h | 4 +---
11
target/arm/sme-fa64.decode | 9 ---------
14
hw/arm/collie.c | 8 ++++++--
12
target/arm/translate-sve.c | 6 ++++++
15
hw/arm/strongarm.c | 7 +------
13
2 files changed, 6 insertions(+), 9 deletions(-)
16
3 files changed, 8 insertions(+), 11 deletions(-)
17
14
18
diff --git a/hw/arm/strongarm.h b/hw/arm/strongarm.h
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/arm/strongarm.h
17
--- a/target/arm/sme-fa64.decode
21
+++ b/hw/arm/strongarm.h
18
+++ b/target/arm/sme-fa64.decode
22
@@ -XXX,XX +XXX,XX @@ enum {
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
23
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
24
typedef struct {
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
25
ARMCPU *cpu;
22
26
- MemoryRegion sdram;
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
27
DeviceState *pic;
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
28
DeviceState *gpio;
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
29
DeviceState *ppc;
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
31
SSIBus *ssp_bus;
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
32
} StrongARMState;
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
33
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
34
-StrongARMState *sa1110_init(MemoryRegion *sysmem,
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
35
- unsigned int sdram_size, const char *rev);
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
36
+StrongARMState *sa1110_init(const char *cpu_type);
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
37
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
38
#endif
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
39
diff --git a/hw/arm/collie.c b/hw/arm/collie.c
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
41
--- a/hw/arm/collie.c
41
--- a/target/arm/translate-sve.c
42
+++ b/hw/arm/collie.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static void collie_init(MachineState *machine)
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
{
44
if (!dc_isar_feature(aa64_sve, s)) {
45
StrongARMState *s;
45
return false;
46
DriveInfo *dinfo;
46
}
47
- MemoryRegion *sysmem = get_system_memory();
47
+ s->is_nonstreaming = true;
48
+ MemoryRegion *sdram = g_new(MemoryRegion, 1);
48
if (!sve_access_check(s)) {
49
49
return true;
50
- s = sa1110_init(sysmem, collie_binfo.ram_size, machine->cpu_type);
50
}
51
+ s = sa1110_init(machine->cpu_type);
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
52
+
52
if (!dc_isar_feature(aa64_sve, s)) {
53
+ memory_region_allocate_system_memory(sdram, NULL, "strongarm.sdram",
53
return false;
54
+ collie_binfo.ram_size);
54
}
55
+ memory_region_add_subregion(get_system_memory(), SA_SDCS0, sdram);
55
+ s->is_nonstreaming = true;
56
56
if (!sve_access_check(s)) {
57
dinfo = drive_get(IF_PFLASH, 0, 0);
57
return true;
58
pflash_cfi01_register(SA_CS0, "collie.fl1", 0x02000000,
58
}
59
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
60
index XXXXXXX..XXXXXXX 100644
60
if (!dc_isar_feature(aa64_sve2, s)) {
61
--- a/hw/arm/strongarm.c
61
return false;
62
+++ b/hw/arm/strongarm.c
62
}
63
@@ -XXX,XX +XXX,XX @@ static const TypeInfo strongarm_ssp_info = {
63
+ s->is_nonstreaming = true;
64
};
64
if (!sve_access_check(s)) {
65
65
return true;
66
/* Main CPU functions */
66
}
67
-StrongARMState *sa1110_init(MemoryRegion *sysmem,
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
- unsigned int sdram_size, const char *cpu_type)
68
if (!dc_isar_feature(aa64_sve, s)) {
69
+StrongARMState *sa1110_init(const char *cpu_type)
69
return false;
70
{
70
}
71
StrongARMState *s;
71
+ s->is_nonstreaming = true;
72
int i;
72
if (!sve_access_check(s)) {
73
@@ -XXX,XX +XXX,XX @@ StrongARMState *sa1110_init(MemoryRegion *sysmem,
73
return true;
74
74
}
75
s->cpu = ARM_CPU(cpu_create(cpu_type));
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
76
if (!dc_isar_feature(aa64_sve, s)) {
77
- memory_region_allocate_system_memory(&s->sdram, NULL, "strongarm.sdram",
77
return false;
78
- sdram_size);
78
}
79
- memory_region_add_subregion(sysmem, SA_SDCS0, &s->sdram);
79
+ s->is_nonstreaming = true;
80
-
80
if (!sve_access_check(s)) {
81
s->pic = sysbus_create_varargs("strongarm_pic", 0x90050000,
81
return true;
82
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ),
82
}
83
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_FIQ),
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
90
}
84
--
91
--
85
2.20.1
92
2.25.1
86
87
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Currently a trivial wrapper for rebuild_hflags_common_32.
3
Mark these as a non-streaming instructions, which should trap if full
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
4
6
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-8-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
11
---
10
target/arm/helper.c | 8 +++++++-
12
target/arm/sme-fa64.decode | 3 ---
11
1 file changed, 7 insertions(+), 1 deletion(-)
13
target/arm/sve.decode | 10 +++++-----
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
12
16
13
diff --git a/target/arm/helper.c b/target/arm/helper.c
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.c
19
--- a/target/arm/sme-fa64.decode
16
+++ b/target/arm/helper.c
20
+++ b/target/arm/sme-fa64.decode
17
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
18
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
19
}
72
}
20
73
21
+static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
22
+ ARMMMUIdx mmu_idx)
23
+{
75
+{
24
+ return rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
76
+ if (!dc_isar_feature(aa64_sve, s)) {
77
+ return false;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
25
+}
83
+}
26
+
84
+
27
static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
85
/*
28
ARMMMUIdx mmu_idx)
86
* Move Prefix
29
{
87
*
30
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
31
flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
32
}
33
} else {
34
- flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
35
+ flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
36
}
37
38
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
39
--
88
--
40
2.20.1
89
2.25.1
41
42
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The Exynos SoC has specific SDHCI registers. Use the s3c SDHCI
3
Mark these as a non-streaming instructions, which should trap
4
model which handle these specific registers.
4
if full a64 support is not enabled in streaming mode.
5
5
6
This silents the following "SDHC ... not implemented" warnings so
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
we can focus on the important registers missing:
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
$ qemu-system-arm ... -d unimp \
10
-append "... root=/dev/mmcblk0 rootfstype=ext4 rw rootwait" \
11
-drive file=linux-build-test/rootfs/arm/rootfs-armv5.ext2,if=sd,format=raw
12
[...]
13
[ 25.744858] sdhci: Secure Digital Host Controller Interface driver
14
[ 25.745862] sdhci: Copyright(c) Pierre Ossman
15
[ 25.783188] s3c-sdhci 12530000.sdhci: clock source 2: mmc_busclk.2 (12000000 Hz)
16
SDHC rd_4b @0x80 not implemented
17
SDHC wr_4b @0x80 <- 0x00000020 not implemented
18
SDHC wr_4b @0x8c <- 0x00030000 not implemented
19
SDHC rd_4b @0x80 not implemented
20
SDHC wr_4b @0x80 <- 0xc0004100 not implemented
21
SDHC wr_4b @0x84 <- 0x80808080 not implemented
22
[ 26.013318] mmc0: SDHCI controller on samsung-hsmmc [12530000.sdhci] using ADMA
23
[ 26.032318] Synopsys Designware Multimedia Card Interface Driver
24
[ 42.024885] Waiting for root device /dev/mmcblk0...
25
26
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
27
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
28
Message-id: 20191005154748.21718-5-f4bug@amsat.org
29
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
30
---
10
---
31
hw/arm/exynos4210.c | 2 +-
11
target/arm/sme-fa64.decode | 2 --
32
1 file changed, 1 insertion(+), 1 deletion(-)
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 2 deletions(-)
33
14
34
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
35
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
36
--- a/hw/arm/exynos4210.c
17
--- a/target/arm/sme-fa64.decode
37
+++ b/hw/arm/exynos4210.c
18
+++ b/target/arm/sme-fa64.decode
38
@@ -XXX,XX +XXX,XX @@ static void exynos4210_realize(DeviceState *socdev, Error **errp)
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
39
* public datasheet which is very similar (implementing
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
40
* MMC Specification Version 4.0 being the only difference noted)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
41
*/
22
42
- dev = qdev_create(NULL, TYPE_SYSBUS_SDHCI);
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
43
+ dev = qdev_create(NULL, TYPE_S3C_SDHCI);
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
44
qdev_prop_set_uint64(dev, "capareg", EXYNOS4210_SDHCI_CAPABILITIES);
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
45
qdev_init_nofail(dev);
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
46
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
32
if (!dc_isar_feature(aa64_sve, s)) {
33
return false;
34
}
35
+ s->is_nonstreaming = true;
36
if (sve_access_check(s)) {
37
TCGv_i64 addr = new_tmp_a64(s);
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
47
--
48
2.20.1
48
2.25.1
49
50
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We do not need to compute any of these values for M-profile.
3
Mark these as a non-streaming instructions, which should trap
4
Further, XSCALE_CPAR overlaps VECSTRIDE so obviously the two
4
if full a64 support is not enabled in streaming mode.
5
sets must be mutually exclusive.
6
5
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20191018174431.1784-10-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
target/arm/helper.c | 21 ++++++++++++++-------
11
target/arm/sme-fa64.decode | 3 ---
13
1 file changed, 14 insertions(+), 7 deletions(-)
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
14
15
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper.c
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/helper.c
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
}
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
} else {
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
+
23
-
24
+ /*
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
+ * Note that XSCALE_CPAR shares bits with VECSTRIDE.
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
+ * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
+ */
27
index XXXXXXX..XXXXXXX 100644
28
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
28
--- a/target/arm/translate-sve.c
29
+ flags = FIELD_DP32(flags, TBFLAG_A32,
29
+++ b/target/arm/translate-sve.c
30
+ XSCALE_CPAR, env->cp15.c15_cpar);
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
+ } else {
31
if (a->rm == 31) {
32
+ flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN,
32
return false;
33
+ env->vfp.vec_len);
34
+ flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
35
+ env->vfp.vec_stride);
36
+ }
37
}
38
39
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
40
- flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
41
- flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
42
flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
43
if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
44
|| arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
45
flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
46
}
47
- /* Note that XSCALE_CPAR shares bits with VECSTRIDE */
48
- if (arm_feature(env, ARM_FEATURE_XSCALE)) {
49
- flags = FIELD_DP32(flags, TBFLAG_A32,
50
- XSCALE_CPAR, env->cp15.c15_cpar);
51
- }
52
}
33
}
53
34
+ s->is_nonstreaming = true;
54
/* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
55
--
46
--
56
2.20.1
47
2.25.1
57
58
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The Linux kernel access few S3C-specific registers [1] to set some
3
These functions will be used to verify that the cpu
4
clock. We don't care about this part for device emulation [2]. Add
4
is in the correct state for a given instruction.
5
a dummy device to properly ignore these accesses, so we can focus
6
on the important registers missing.
7
5
8
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/mmc/host/sdhci-s3c-regs.h?h=cc014f3
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/mmc/host/sdhci-s3c.c?h=v5.3#n263
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
11
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
12
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
13
Message-id: 20191005154748.21718-4-f4bug@amsat.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
10
---
16
include/hw/sd/sdhci.h | 2 ++
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
17
hw/sd/sdhci.c | 65 +++++++++++++++++++++++++++++++++++++++++++
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
18
2 files changed, 67 insertions(+)
13
2 files changed, 55 insertions(+)
19
14
20
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/sd/sdhci.h
17
--- a/target/arm/translate-a64.h
23
+++ b/include/hw/sd/sdhci.h
18
+++ b/target/arm/translate-a64.h
24
@@ -XXX,XX +XXX,XX @@ typedef struct SDHCIState {
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
25
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
26
#define TYPE_IMX_USDHC "imx-usdhc"
21
unsigned int imms, unsigned int immr);
27
22
bool sve_access_check(DisasContext *s);
28
+#define TYPE_S3C_SDHCI "s3c-sdhci"
23
+bool sme_enabled_check(DisasContext *s);
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
29
+
25
+
30
#endif /* SDHCI_H */
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
31
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/sd/sdhci.c
34
+++ b/hw/sd/sdhci.c
35
@@ -XXX,XX +XXX,XX @@ static const TypeInfo imx_usdhc_info = {
36
.instance_init = imx_usdhc_init,
37
};
38
39
+/* --- qdev Samsung s3c --- */
40
+
41
+#define S3C_SDHCI_CONTROL2 0x80
42
+#define S3C_SDHCI_CONTROL3 0x84
43
+#define S3C_SDHCI_CONTROL4 0x8c
44
+
45
+static uint64_t sdhci_s3c_read(void *opaque, hwaddr offset, unsigned size)
46
+{
28
+{
47
+ uint64_t ret;
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
48
+
49
+ switch (offset) {
50
+ case S3C_SDHCI_CONTROL2:
51
+ case S3C_SDHCI_CONTROL3:
52
+ case S3C_SDHCI_CONTROL4:
53
+ /* ignore */
54
+ ret = 0;
55
+ break;
56
+ default:
57
+ ret = sdhci_read(opaque, offset, size);
58
+ break;
59
+ }
60
+
61
+ return ret;
62
+}
30
+}
63
+
31
+
64
+static void sdhci_s3c_write(void *opaque, hwaddr offset, uint64_t val,
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
65
+ unsigned size)
33
+static inline bool sme_za_enabled_check(DisasContext *s)
66
+{
34
+{
67
+ switch (offset) {
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
68
+ case S3C_SDHCI_CONTROL2:
69
+ case S3C_SDHCI_CONTROL3:
70
+ case S3C_SDHCI_CONTROL4:
71
+ /* ignore */
72
+ break;
73
+ default:
74
+ sdhci_write(opaque, offset, val, size);
75
+ break;
76
+ }
77
+}
36
+}
78
+
37
+
79
+static const MemoryRegionOps sdhci_s3c_mmio_ops = {
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
80
+ .read = sdhci_s3c_read,
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
81
+ .write = sdhci_s3c_write,
82
+ .valid = {
83
+ .min_access_size = 1,
84
+ .max_access_size = 4,
85
+ .unaligned = false
86
+ },
87
+ .endianness = DEVICE_LITTLE_ENDIAN,
88
+};
89
+
90
+static void sdhci_s3c_init(Object *obj)
91
+{
40
+{
92
+ SDHCIState *s = SYSBUS_SDHCI(obj);
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
93
+
94
+ s->io_ops = &sdhci_s3c_mmio_ops;
95
+}
42
+}
96
+
43
+
97
+static const TypeInfo sdhci_s3c_info = {
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
98
+ .name = TYPE_S3C_SDHCI ,
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
99
+ .parent = TYPE_SYSBUS_SDHCI,
46
bool tag_checked, int log2_size);
100
+ .instance_init = sdhci_s3c_init,
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
101
+};
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
52
return true;
53
}
54
55
+/* This function corresponds to CheckSMEEnabled. */
56
+bool sme_enabled_check(DisasContext *s)
57
+{
58
+ /*
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
60
+ * to be zero when fp_excp_el has priority. This is because we need
61
+ * sme_excp_el by itself for cpregs access checks.
62
+ */
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
64
+ s->fp_access_checked = true;
65
+ return sme_access_check(s);
66
+ }
67
+ return fp_access_check_only(s);
68
+}
102
+
69
+
103
static void sdhci_register_types(void)
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
104
{
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
105
type_register_static(&sdhci_sysbus_info);
72
+{
106
type_register_static(&sdhci_bus_info);
73
+ if (!sme_enabled_check(s)) {
107
type_register_static(&imx_usdhc_info);
74
+ return false;
108
+ type_register_static(&sdhci_s3c_info);
75
+ }
109
}
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
110
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
111
type_init(sdhci_register_types)
78
+ syn_smetrap(SME_ET_NotStreaming, false));
79
+ return false;
80
+ }
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
83
+ syn_smetrap(SME_ET_InactiveZA, false));
84
+ return false;
85
+ }
86
+ return true;
87
+}
88
+
89
/*
90
* This utility function is for doing register extension with an
91
* optional shift. You will likely want to pass a temporary for the
112
--
92
--
113
2.20.1
93
2.25.1
114
115
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Continue setting, but not relying upon, env->hflags.
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
SVE mode, and for SME present but SVE absent.
4
5
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-18-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
---
10
target/arm/translate-a64.c | 13 +++++++++++--
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
11
target/arm/translate.c | 28 +++++++++++++++++++++++-----
12
1 file changed, 16 insertions(+), 6 deletions(-)
12
2 files changed, 34 insertions(+), 7 deletions(-)
13
13
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/translate-a64.c
16
--- a/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
18
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
19
if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
19
return true;
20
/* I/O operations must end the TB here (whether read or write) */
20
}
21
s->base.is_jmp = DISAS_UPDATE;
21
22
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
22
-/* Check that SVE access is enabled. If it is, return true.
23
- /* We default to ending the TB on a coprocessor register write,
23
+/*
24
+ }
24
+ * Check that SVE access is enabled. If it is, return true.
25
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
25
* If not, emit code to generate an appropriate exception and return false.
26
+ /*
26
+ * This function corresponds to CheckSVEEnabled().
27
+ * A write to any coprocessor regiser that ends a TB
27
*/
28
+ * must rebuild the hflags for the next TB.
28
bool sve_access_check(DisasContext *s)
29
+ */
29
{
30
+ TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
30
- if (s->sve_excp_el) {
31
+ gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
31
- assert(!s->sve_access_checked);
32
+ tcg_temp_free_i32(tcg_el);
32
- s->sve_access_checked = true;
33
+ /*
33
-
34
+ * We default to ending the TB on a coprocessor register write,
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
35
* but allow this to be suppressed by the register definition
35
+ assert(dc_isar_feature(aa64_sme, s));
36
* (usually only necessary to work around guest bugs).
36
+ if (!sme_sm_enabled_check(s)) {
37
*/
37
+ goto fail_exit;
38
diff --git a/target/arm/translate.c b/target/arm/translate.c
38
+ }
39
index XXXXXXX..XXXXXXX 100644
39
+ } else if (s->sve_excp_el) {
40
--- a/target/arm/translate.c
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
41
+++ b/target/arm/translate.c
41
syn_sve_access_trap(), s->sve_excp_el);
42
@@ -XXX,XX +XXX,XX @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
42
- return false;
43
ri = get_arm_cp_reginfo(s->cp_regs,
43
+ goto fail_exit;
44
ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
44
}
45
if (ri) {
45
s->sve_access_checked = true;
46
+ bool need_exit_tb;
46
return fp_access_check(s);
47
+
47
+
48
/* Check access permissions */
48
+ fail_exit:
49
if (!cp_access_ok(s->current_el, ri, isread)) {
49
+ /* Assert that we only raise one exception per instruction. */
50
return 1;
50
+ assert(!s->sve_access_checked);
51
@@ -XXX,XX +XXX,XX @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
51
+ s->sve_access_checked = true;
52
}
52
+ return false;
53
}
53
}
54
54
55
- if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
55
/*
56
- /* I/O operations must end the TB here (whether read or write) */
57
- gen_lookup_tb(s);
58
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
59
- /* We default to ending the TB on a coprocessor register write,
60
+ /* I/O operations must end the TB here (whether read or write) */
61
+ need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
62
+ (ri->type & ARM_CP_IO));
63
+
64
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
65
+ /*
66
+ * A write to any coprocessor regiser that ends a TB
67
+ * must rebuild the hflags for the next TB.
68
+ */
69
+ TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
70
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
71
+ gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
72
+ } else {
73
+ gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
74
+ }
75
+ tcg_temp_free_i32(tcg_el);
76
+ /*
77
+ * We default to ending the TB on a coprocessor register write,
78
* but allow this to be suppressed by the register definition
79
* (usually only necessary to work around guest bugs).
80
*/
81
+ need_exit_tb = true;
82
+ }
83
+ if (need_exit_tb) {
84
gen_lookup_tb(s);
85
}
86
87
--
56
--
88
2.20.1
57
2.25.1
89
90
diff view generated by jsdifflib
1
From: Cédric Le Goater <clg@kaod.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Define the board with 1 GiB of RAM but some boards can have up to 2
3
These SME instructions are nominally within the SVE decode space,
4
GiB.
4
so we add them to sve.decode and translate-sve.c.
5
5
6
Signed-off-by: Cédric Le Goater <clg@kaod.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Joel Stanley <joel@jms.id.au>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191016090745.15334-1-clg@kaod.org
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
include/hw/arm/aspeed.h | 1 +
11
target/arm/translate-a64.h | 12 ++++++++++++
12
hw/arm/aspeed.c | 23 +++++++++++++++++++++++
12
target/arm/sve.decode | 5 ++++-
13
2 files changed, 24 insertions(+)
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
14
3 files changed, 54 insertions(+), 1 deletion(-)
14
15
15
diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/include/hw/arm/aspeed.h
18
--- a/target/arm/translate-a64.h
18
+++ b/include/hw/arm/aspeed.h
19
+++ b/target/arm/translate-a64.h
19
@@ -XXX,XX +XXX,XX @@ typedef struct AspeedBoardConfig {
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
20
const char *desc;
21
return s->vl;
21
const char *soc_name;
22
}
22
uint32_t hw_strap1;
23
23
+ uint32_t hw_strap2;
24
+/* Return the byte size of the vector register, SVL / 8. */
24
const char *fmc_model;
25
+static inline int streaming_vec_reg_size(DisasContext *s)
25
const char *spi_model;
26
+{
26
uint32_t num_cs;
27
+ return s->svl;
27
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
28
+}
28
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/arm/aspeed.c
30
+++ b/hw/arm/aspeed.c
31
@@ -XXX,XX +XXX,XX @@ struct AspeedBoardState {
32
/* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */
33
#define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1
34
35
+/* AST2600 evb hardware value */
36
+#define AST2600_EVB_HW_STRAP1 0x000000C0
37
+#define AST2600_EVB_HW_STRAP2 0x00000003
38
+
29
+
39
/*
30
/*
40
* The max ram region is for firmwares that scan the address space
31
* Return the offset info CPUARMState of the predicate vector register Pn.
41
* with load/store to guess how much RAM the SoC has.
32
* Note for this purpose, FFR is P16.
42
@@ -XXX,XX +XXX,XX @@ static void aspeed_board_init(MachineState *machine,
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
43
&error_abort);
34
return s->vl >> 3;
44
object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap1, "hw-strap1",
45
&error_abort);
46
+ object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap2, "hw-strap2",
47
+ &error_abort);
48
object_property_set_int(OBJECT(&bmc->soc), cfg->num_cs, "num-cs",
49
&error_abort);
50
object_property_set_int(OBJECT(&bmc->soc), machine->smp.cpus, "num-cpus",
51
@@ -XXX,XX +XXX,XX @@ static void ast2500_evb_i2c_init(AspeedBoardState *bmc)
52
i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 0x32);
53
}
35
}
54
36
55
+static void ast2600_evb_i2c_init(AspeedBoardState *bmc)
37
+/* Return the byte size of the predicate register, SVL / 64. */
38
+static inline int streaming_pred_reg_size(DisasContext *s)
56
+{
39
+{
57
+ /* Start with some devices on our I2C busses */
40
+ return s->svl >> 3;
58
+ ast2500_evb_i2c_init(bmc);
59
+}
41
+}
60
+
42
+
61
static void romulus_bmc_i2c_init(AspeedBoardState *bmc)
43
/*
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
51
# SVE index generation (register start, register increment)
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
53
54
-### SVE Stack Allocation Group
55
+### SVE / Streaming SVE Stack Allocation Group
56
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
74
return true;
75
}
76
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
78
+{
79
+ if (!dc_isar_feature(aa64_sme, s)) {
80
+ return false;
81
+ }
82
+ if (sme_enabled_check(s)) {
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
86
+ }
87
+ return true;
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
62
{
91
{
63
AspeedSoCState *soc = &bmc->soc;
92
if (!dc_isar_feature(aa64_sve, s)) {
64
@@ -XXX,XX +XXX,XX @@ static const AspeedBoardConfig aspeed_boards[] = {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
65
.num_cs = 2,
94
return true;
66
.i2c_init = witherspoon_bmc_i2c_init,
95
}
67
.ram = 512 * MiB,
96
68
+ }, {
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
69
+ .name = MACHINE_TYPE_NAME("ast2600-evb"),
98
+{
70
+ .desc = "Aspeed AST2600 EVB (Cortex A7)",
99
+ if (!dc_isar_feature(aa64_sme, s)) {
71
+ .soc_name = "ast2600-a0",
100
+ return false;
72
+ .hw_strap1 = AST2600_EVB_HW_STRAP1,
101
+ }
73
+ .hw_strap2 = AST2600_EVB_HW_STRAP2,
102
+ if (sme_enabled_check(s)) {
74
+ .fmc_model = "w25q512jv",
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
75
+ .spi_model = "mx66u51235f",
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
76
+ .num_cs = 1,
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
77
+ .i2c_init = ast2600_evb_i2c_init,
106
+ }
78
+ .ram = 1 * GiB,
107
+ return true;
79
},
108
+}
80
};
109
+
81
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
+{
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
127
+}
128
+
129
/*
130
*** SVE Compute Vector Address Group
131
*/
82
--
132
--
83
2.20.1
133
2.25.1
84
85
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
IEC binary prefixes ease code review: the unit is explicit.
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20191021190653.9511-2-philmd@redhat.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
7
---
11
hw/arm/xilinx_zynq.c | 3 ++-
8
target/arm/helper-sme.h | 2 ++
12
1 file changed, 2 insertions(+), 1 deletion(-)
9
target/arm/sme.decode | 4 ++++
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
12
4 files changed, 44 insertions(+)
13
13
14
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/arm/xilinx_zynq.c
16
--- a/target/arm/helper-sme.h
17
+++ b/hw/arm/xilinx_zynq.c
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@
19
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
22
+
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@
29
#
30
# This file is processed by scripts/decodetree.py
31
#
32
+
33
+### SME Misc
34
+
35
+ZERO 11000000 00 001 00000000000 imm:8
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
43
}
44
+
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
46
+{
47
+ uint32_t i;
48
+
49
+ /*
50
+ * Special case clearing the entire ZA space.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * parts of the ZA storage outside of SVL.
53
+ */
54
+ if (imm == 0xff) {
55
+ memset(env->zarray, 0, sizeof(env->zarray));
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sme.c
72
+++ b/target/arm/translate-sme.c
18
@@ -XXX,XX +XXX,XX @@
73
@@ -XXX,XX +XXX,XX @@
19
*/
74
*/
20
75
21
#include "qemu/osdep.h"
76
#include "decode-sme.c.inc"
22
+#include "qemu/units.h"
77
+
23
#include "qapi/error.h"
78
+
24
#include "cpu.h"
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
25
#include "hw/sysbus.h"
80
+{
26
@@ -XXX,XX +XXX,XX @@ static void zynq_init(MachineState *machine)
81
+ if (!dc_isar_feature(aa64_sme, s)) {
27
memory_region_add_subregion(address_space_mem, 0, ext_ram);
82
+ return false;
28
83
+ }
29
/* 256K of on-chip memory */
84
+ if (sme_za_enabled_check(s)) {
30
- memory_region_init_ram(ocm_ram, NULL, "zynq.ocm_ram", 256 << 10,
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
31
+ memory_region_init_ram(ocm_ram, NULL, "zynq.ocm_ram", 256 * KiB,
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
32
&error_fatal);
87
+ }
33
memory_region_add_subregion(address_space_mem, 0xFFFC0000, ocm_ram);
88
+ return true;
34
89
+}
35
--
90
--
36
2.20.1
91
2.25.1
37
38
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Continue setting, but not relying upon, env->hflags.
3
We can reuse the SVE functions for implementing moves to/from
4
horizontal tile slices, but we need new ones for moves to/from
5
vertical tile slices.
4
6
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-21-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
11
---
10
target/arm/m_helper.c | 6 ++++++
12
target/arm/helper-sme.h | 12 +++
11
target/arm/translate.c | 5 ++++-
13
target/arm/helper-sve.h | 2 +
12
2 files changed, 10 insertions(+), 1 deletion(-)
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
13
21
14
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/m_helper.c
24
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/m_helper.c
25
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
19
switch_v7m_security_state(env, dest & 1);
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
20
env->thumb = 1;
28
21
env->regs[15] = dest & ~1;
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
22
+ arm_rebuild_hflags(env);
30
+
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
23
}
61
}
24
62
25
void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
63
+/* Return a newly allocated pointer to the predicate register. */
26
@@ -XXX,XX +XXX,XX @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
27
switch_v7m_security_state(env, 0);
65
+{
28
env->thumb = 1;
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
29
env->regs[15] = dest;
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
30
+ arm_rebuild_hflags(env);
68
+ return ret;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
31
}
80
}
32
81
33
static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode,
82
+static inline int plus_12(DisasContext *s, int x)
34
@@ -XXX,XX +XXX,XX @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
83
+{
35
env->regs[14] = lr;
84
+ return x + 12;
36
env->regs[15] = addr & 0xfffffffe;
85
+}
37
env->thumb = addr & 1;
86
+
38
+ arm_rebuild_hflags(env);
87
static inline int times_2(DisasContext *s, int x)
88
{
89
return x * 2;
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
39
}
132
}
40
133
+
41
static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
134
+
42
@@ -XXX,XX +XXX,XX @@ static void do_v7m_exception_exit(ARMCPU *cpu)
135
+/*
43
136
+ * When considering the ZA storage as an array of elements of
44
/* Otherwise, we have a successful exception exit. */
137
+ * type T, the index within that array of the Nth element of
45
arm_clear_exclusive(env);
138
+ * a vertical slice of a tile can be calculated like this,
46
+ arm_rebuild_hflags(env);
139
+ * regardless of the size of type T. This is because the tiles
47
qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
210
+
211
+ /*
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
213
+ * the address arithmetic.
214
+ */
215
+ for (i = 0; i < oprsz; i++) {
216
+ if (pg[H2(i)] & 1) {
217
+ a[tile_vslice_index(i)] = n[i];
218
+ }
219
+ }
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
48
}
286
}
49
287
50
@@ -XXX,XX +XXX,XX @@ static bool do_v7m_function_return(ARMCPU *cpu)
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
51
xpsr_write(env, 0, XPSR_IT);
289
+ void *vg, uint32_t desc)
52
env->thumb = newpc & 1;
290
+{
53
env->regs[15] = newpc & ~1;
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
54
+ arm_rebuild_hflags(env);
292
+ Int128 *d = vd, *n = vn, *m = vm;
55
293
+ uint16_t *pg = vg;
56
qemu_log_mask(CPU_LOG_INT, "...function return successful\n");
294
+
57
return true;
295
+ for (i = 0; i < opr_sz; i += 1) {
58
@@ -XXX,XX +XXX,XX @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu)
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
59
switch_v7m_security_state(env, true);
297
+ }
60
xpsr_write(env, 0, XPSR_IT);
298
+}
61
env->regs[15] += 4;
299
+
62
+ arm_rebuild_hflags(env);
300
/* Two operand comparison controlled by a predicate.
63
return true;
301
* ??? It is very tempting to want to be able to expand this inline
64
302
* with x86 instructions, e.g.
65
gen_invep:
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
66
diff --git a/target/arm/translate.c b/target/arm/translate.c
304
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
305
--- a/target/arm/translate-sme.c
68
--- a/target/arm/translate.c
306
+++ b/target/arm/translate-sme.c
69
+++ b/target/arm/translate.c
307
@@ -XXX,XX +XXX,XX @@
70
@@ -XXX,XX +XXX,XX @@ static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
308
#include "decode-sme.c.inc"
71
309
72
static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
310
311
+/*
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
313
+ * are always decoded together, dependent on the element size.
314
+ */
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
316
+ int tile_index, bool vertical)
317
+{
318
+ int tile = tile_index >> (4 - esz);
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
320
+ int pos, len, offset;
321
+ TCGv_i32 tmp;
322
+ TCGv_ptr addr;
323
+
324
+ /* Compute the final index, which is Rs+imm. */
325
+ tmp = tcg_temp_new_i32();
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
327
+ tcg_gen_addi_i32(tmp, tmp, index);
328
+
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
331
+
332
+ if (vertical) {
333
+ /*
334
+ * Compute the byte offset of the index within the tile:
335
+ * (index % (svl / size)) * size
336
+ * = (index % (svl >> esz)) << esz
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
338
+ * Perform the multiply by shifting left by @pos bits.
339
+ * Perform these operations simultaneously via deposit into zero.
340
+ */
341
+ pos = esz;
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
343
+
344
+ /*
345
+ * For big-endian, adjust the indexed column byte offset within
346
+ * the uint64_t host words that make up env->zarray[].
347
+ */
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
350
+ }
351
+ } else {
352
+ /*
353
+ * Compute the byte offset of the index within the tile:
354
+ * (index % (svl / size)) * (size * sizeof(row))
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
356
+ */
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
73
{
380
{
74
- TCGv_i32 addr, reg;
381
if (!dc_isar_feature(aa64_sme, s)) {
75
+ TCGv_i32 addr, reg, el;
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
76
383
}
77
if (!arm_dc_feature(s, ARM_FEATURE_M)) {
78
return false;
79
@@ -XXX,XX +XXX,XX @@ static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
80
gen_helper_v7m_msr(cpu_env, addr, reg);
81
tcg_temp_free_i32(addr);
82
tcg_temp_free_i32(reg);
83
+ el = tcg_const_i32(s->current_el);
84
+ gen_helper_rebuild_hflags_m32(cpu_env, el);
85
+ tcg_temp_free_i32(el);
86
gen_lookup_tb(s);
87
return true;
384
return true;
88
}
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
89
--
445
--
90
2.20.1
446
2.25.1
91
92
diff view generated by jsdifflib
1
Switch the altera_timer code away from bottom-half based ptimers to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
the new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the timer.
5
2
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
4
because those functions accept only a Zreg register number.
5
For SME, we want to pass a pointer into ZA storage.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20191017132905.5604-6-peter.maydell@linaro.org
10
---
11
---
11
hw/timer/altera_timer.c | 13 +++++++++----
12
target/arm/helper-sme.h | 82 +++++
12
1 file changed, 9 insertions(+), 4 deletions(-)
13
target/arm/sme.decode | 9 +
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
15
target/arm/translate-sme.c | 70 +++++
16
4 files changed, 756 insertions(+)
13
17
14
diff --git a/hw/timer/altera_timer.c b/hw/timer/altera_timer.c
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/timer/altera_timer.c
20
--- a/target/arm/helper-sme.h
17
+++ b/hw/timer/altera_timer.c
21
+++ b/target/arm/helper-sme.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
18
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@
19
*/
20
130
21
#include "qemu/osdep.h"
131
#include "qemu/osdep.h"
22
-#include "qemu/main-loop.h"
132
#include "cpu.h"
23
#include "qemu/module.h"
133
+#include "internals.h"
24
#include "qapi/error.h"
134
#include "tcg/tcg-gvec-desc.h"
25
135
#include "exec/helper-proto.h"
26
@@ -XXX,XX +XXX,XX @@ typedef struct AlteraTimer {
136
+#include "exec/cpu_ldst.h"
27
MemoryRegion mmio;
137
+#include "exec/exec-all.h"
28
qemu_irq irq;
138
#include "qemu/int128.h"
29
uint32_t freq_hz;
139
#include "vec_internal.h"
30
- QEMUBH *bh;
140
+#include "sve_ldst_internal.h"
31
ptimer_state *ptimer;
141
32
uint32_t regs[R_MAX];
142
/* ResetSVEState */
33
} AlteraTimer;
143
void arm_reset_sve_state(CPUARMState *env)
34
@@ -XXX,XX +XXX,XX @@ static void timer_write(void *opaque, hwaddr addr,
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
35
break;
36
37
case R_CONTROL:
38
+ ptimer_transaction_begin(t->ptimer);
39
t->regs[R_CONTROL] = value & (CONTROL_ITO | CONTROL_CONT);
40
if ((value & CONTROL_START) &&
41
!(t->regs[R_STATUS] & STATUS_RUN)) {
42
@@ -XXX,XX +XXX,XX @@ static void timer_write(void *opaque, hwaddr addr,
43
ptimer_stop(t->ptimer);
44
t->regs[R_STATUS] &= ~STATUS_RUN;
45
}
46
+ ptimer_transaction_commit(t->ptimer);
47
break;
48
49
case R_PERIODL:
50
case R_PERIODH:
51
+ ptimer_transaction_begin(t->ptimer);
52
t->regs[addr] = value & 0xFFFF;
53
if (t->regs[R_STATUS] & STATUS_RUN) {
54
ptimer_stop(t->ptimer);
55
@@ -XXX,XX +XXX,XX @@ static void timer_write(void *opaque, hwaddr addr,
56
}
57
tvalue = (t->regs[R_PERIODH] << 16) | t->regs[R_PERIODL];
58
ptimer_set_limit(t->ptimer, tvalue + 1, 1);
59
+ ptimer_transaction_commit(t->ptimer);
60
break;
61
62
case R_SNAPL:
63
@@ -XXX,XX +XXX,XX @@ static void altera_timer_realize(DeviceState *dev, Error **errp)
64
return;
65
}
66
67
- t->bh = qemu_bh_new(timer_hit, t);
68
- t->ptimer = ptimer_init_with_bh(t->bh, PTIMER_POLICY_DEFAULT);
69
+ t->ptimer = ptimer_init(timer_hit, t, PTIMER_POLICY_DEFAULT);
70
+ ptimer_transaction_begin(t->ptimer);
71
ptimer_set_freq(t->ptimer, t->freq_hz);
72
+ ptimer_transaction_commit(t->ptimer);
73
74
memory_region_init_io(&t->mmio, OBJECT(t), &timer_ops, t,
75
TYPE_ALTERA_TIMER, R_MAX * sizeof(uint32_t));
76
@@ -XXX,XX +XXX,XX @@ static void altera_timer_reset(DeviceState *dev)
77
{
78
AlteraTimer *t = ALTERA_TIMER(dev);
79
80
+ ptimer_transaction_begin(t->ptimer);
81
ptimer_stop(t->ptimer);
82
ptimer_set_limit(t->ptimer, 0xffffffff, 1);
83
+ ptimer_transaction_commit(t->ptimer);
84
memset(t->regs, 0, sizeof(t->regs));
85
}
145
}
86
146
147
#undef DO_MOVA_Z
148
+
149
+/*
150
+ * Clear elements in a tile slice comprising len bytes.
151
+ */
152
+
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
154
+
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
156
+{
157
+ memset(ptr + off, 0, len);
158
+}
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
197
+ */
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
392
+ return;
393
+ }
394
+
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
397
+
398
+ /* Handle watchpoints for all active elements. */
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
400
+ BP_MEM_READ, ra);
401
+
402
+ /*
403
+ * Handle mte checks for all active elements.
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
405
+ */
406
+ if (mtedesc) {
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
408
+ mtedesc, ra);
409
+ }
410
+
411
+ flags = info.page[0].flags | info.page[1].flags;
412
+ if (unlikely(flags != 0)) {
413
+#ifdef CONFIG_USER_ONLY
414
+ g_assert_not_reached();
415
+#else
416
+ /*
417
+ * At least one page includes MMIO.
418
+ * Any bus operation can fail with cpu_transaction_failed,
419
+ * which for ARM will raise SyncExternal. Perform the load
420
+ * into scratch memory to preserve register state until the end.
421
+ */
422
+ ARMVectorReg scratch = { };
423
+
424
+ reg_off = info.reg_off_first[0];
425
+ reg_last = info.reg_off_last[1];
426
+ if (reg_last < 0) {
427
+ reg_last = info.reg_off_split;
428
+ if (reg_last < 0) {
429
+ reg_last = info.reg_off_last[0];
430
+ }
431
+ }
432
+
433
+ do {
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
87
--
817
--
88
2.20.1
818
2.25.1
89
90
diff view generated by jsdifflib
1
Switch the etraxfs_timer code away from bottom-half based ptimers to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
the new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the timer.
5
2
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
We will reuse this for SME save and restore array insns.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20191017132905.5604-7-peter.maydell@linaro.org
10
---
10
---
11
hw/timer/etraxfs_timer.c | 23 +++++++++++++----------
11
target/arm/translate-a64.h | 3 +++
12
1 file changed, 13 insertions(+), 10 deletions(-)
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
13
2 files changed, 39 insertions(+), 12 deletions(-)
13
14
14
diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/timer/etraxfs_timer.c
17
--- a/target/arm/translate-a64.h
17
+++ b/hw/timer/etraxfs_timer.c
18
+++ b/target/arm/translate-a64.h
18
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
19
#include "hw/sysbus.h"
20
uint32_t rm_ofs, int64_t shift,
20
#include "sysemu/reset.h"
21
uint32_t opr_sz, uint32_t max_sz);
21
#include "sysemu/runstate.h"
22
22
-#include "qemu/main-loop.h"
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
23
#include "qemu/module.h"
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
24
#include "qemu/timer.h"
25
+
25
#include "hw/irq.h"
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
26
@@ -XXX,XX +XXX,XX @@ typedef struct ETRAXTimerState {
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
qemu_irq irq;
28
index XXXXXXX..XXXXXXX 100644
28
qemu_irq nmi;
29
--- a/target/arm/translate-sve.c
29
30
+++ b/target/arm/translate-sve.c
30
- QEMUBH *bh_t0;
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
31
- QEMUBH *bh_t1;
32
* The load should begin at the address Rn + IMM.
32
- QEMUBH *bh_wd;
33
*/
33
ptimer_state *ptimer_t0;
34
34
ptimer_state *ptimer_t1;
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
35
ptimer_state *ptimer_wd;
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
36
@@ -XXX,XX +XXX,XX @@ static void update_ctrl(ETRAXTimerState *t, int tnum)
37
+ int len, int rn, int imm)
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
37
}
81
}
38
82
39
D(printf ("freq_hz=%d div=%d\n", freq_hz, div));
83
/*
40
+ ptimer_transaction_begin(timer);
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
41
ptimer_set_freq(timer, freq_hz);
85
default:
42
ptimer_set_limit(timer, div, 0);
86
g_assert_not_reached();
43
87
}
44
@@ -XXX,XX +XXX,XX @@ static void update_ctrl(ETRAXTimerState *t, int tnum)
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
45
abort();
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
46
break;
90
tcg_temp_free_i64(t0);
47
}
91
}
48
+ ptimer_transaction_commit(timer);
49
}
92
}
50
93
51
static void timer_update_irq(ETRAXTimerState *t)
94
/* Similarly for stores. */
52
@@ -XXX,XX +XXX,XX @@ static inline void timer_watchdog_update(ETRAXTimerState *t, uint32_t value)
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
53
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
54
t->wd_hits = 0;
97
+ int len, int rn, int imm)
55
98
{
56
+ ptimer_transaction_begin(t->ptimer_wd);
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
57
ptimer_set_freq(t->ptimer_wd, 760);
100
int len_remain = len % 8;
58
if (wd_cnt == 0)
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
59
wd_cnt = 256;
102
60
@@ -XXX,XX +XXX,XX @@ static inline void timer_watchdog_update(ETRAXTimerState *t, uint32_t value)
103
t0 = tcg_temp_new_i64();
61
ptimer_stop(t->ptimer_wd);
104
for (i = 0; i < len_align; i += 8) {
62
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
63
t->rw_wd_ctrl = value;
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
64
+ ptimer_transaction_commit(t->ptimer_wd);
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
109
}
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
111
clean_addr = new_tmp_a64_local(s);
112
tcg_gen_mov_i64(clean_addr, t0);
113
114
+ if (base != cpu_env) {
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
118
+ }
119
+
120
gen_set_label(loop);
121
122
t0 = tcg_temp_new_i64();
123
tp = tcg_temp_new_ptr();
124
- tcg_gen_add_ptr(tp, cpu_env, i);
125
+ tcg_gen_add_ptr(tp, base, i);
126
tcg_gen_ld_i64(t0, tp, vofs);
127
tcg_gen_addi_ptr(i, i, 8);
128
tcg_temp_free_ptr(tp);
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
130
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
132
tcg_temp_free_ptr(i);
133
+
134
+ if (base != cpu_env) {
135
+ tcg_temp_free_ptr(base);
136
+ assert(len_remain == 0);
137
+ }
138
}
139
140
/* Predicate register stores can be any multiple of 2. */
141
if (len_remain) {
142
t0 = tcg_temp_new_i64();
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
145
146
switch (len_remain) {
147
case 2:
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
149
if (sve_access_check(s)) {
150
int size = vec_full_reg_size(s);
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
154
}
155
return true;
65
}
156
}
66
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
67
static void
158
if (sve_access_check(s)) {
68
@@ -XXX,XX +XXX,XX @@ static void etraxfs_timer_reset(void *opaque)
159
int size = pred_full_reg_size(s);
69
{
160
int off = pred_full_reg_offset(s, a->rd);
70
ETRAXTimerState *t = opaque;
161
- do_ldr(s, off, size, a->rn, a->imm * size);
71
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
72
+ ptimer_transaction_begin(t->ptimer_t0);
163
}
73
ptimer_stop(t->ptimer_t0);
164
return true;
74
+ ptimer_transaction_commit(t->ptimer_t0);
165
}
75
+ ptimer_transaction_begin(t->ptimer_t1);
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
76
ptimer_stop(t->ptimer_t1);
167
if (sve_access_check(s)) {
77
+ ptimer_transaction_commit(t->ptimer_t1);
168
int size = vec_full_reg_size(s);
78
+ ptimer_transaction_begin(t->ptimer_wd);
169
int off = vec_full_reg_offset(s, a->rd);
79
ptimer_stop(t->ptimer_wd);
170
- do_str(s, off, size, a->rn, a->imm * size);
80
+ ptimer_transaction_commit(t->ptimer_wd);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
81
t->rw_wd_ctrl = 0;
172
}
82
t->r_intr = 0;
173
return true;
83
t->rw_intr_mask = 0;
174
}
84
@@ -XXX,XX +XXX,XX @@ static void etraxfs_timer_realize(DeviceState *dev, Error **errp)
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
85
ETRAXTimerState *t = ETRAX_TIMER(dev);
176
if (sve_access_check(s)) {
86
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
177
int size = pred_full_reg_size(s);
87
178
int off = pred_full_reg_offset(s, a->rd);
88
- t->bh_t0 = qemu_bh_new(timer0_hit, t);
179
- do_str(s, off, size, a->rn, a->imm * size);
89
- t->bh_t1 = qemu_bh_new(timer1_hit, t);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
90
- t->bh_wd = qemu_bh_new(watchdog_hit, t);
181
}
91
- t->ptimer_t0 = ptimer_init_with_bh(t->bh_t0, PTIMER_POLICY_DEFAULT);
182
return true;
92
- t->ptimer_t1 = ptimer_init_with_bh(t->bh_t1, PTIMER_POLICY_DEFAULT);
183
}
93
- t->ptimer_wd = ptimer_init_with_bh(t->bh_wd, PTIMER_POLICY_DEFAULT);
94
+ t->ptimer_t0 = ptimer_init(timer0_hit, t, PTIMER_POLICY_DEFAULT);
95
+ t->ptimer_t1 = ptimer_init(timer1_hit, t, PTIMER_POLICY_DEFAULT);
96
+ t->ptimer_wd = ptimer_init(watchdog_hit, t, PTIMER_POLICY_DEFAULT);
97
98
sysbus_init_irq(sbd, &t->irq);
99
sysbus_init_irq(sbd, &t->nmi);
100
--
184
--
101
2.20.1
185
2.25.1
102
103
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create a function to compute the values of the TBFLAG_A32 bits
3
We can reuse the SVE functions for LDR and STR, passing in the
4
that will be cached, and are used by all profiles.
4
base of the ZA vector and a zero offset.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-4-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.c | 16 +++++++++++-----
11
target/arm/sme.decode | 7 +++++++
12
1 file changed, 11 insertions(+), 5 deletions(-)
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
13
2 files changed, 31 insertions(+)
13
14
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
17
--- a/target/arm/sme.decode
17
+++ b/target/arm/helper.c
18
+++ b/target/arm/sme.decode
18
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
19
return flags;
20
&ldst rs=%mova_rs
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
22
&ldst esz=4 rs=%mova_rs
23
+
24
+&ldstr rv rn imm
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
26
+ &ldstr rv=%mova_rs
27
+
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-sme.c
33
+++ b/target/arm/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
tcg_temp_free_i64(addr);
36
return true;
20
}
37
}
21
38
+
22
+static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
23
+ ARMMMUIdx mmu_idx, uint32_t flags)
40
+
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
24
+{
42
+{
25
+ flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
43
+ int svl = streaming_vec_reg_size(s);
26
+ flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
27
+
46
+
28
+ return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
47
+ if (!sme_za_enabled_check(s)) {
48
+ return true;
49
+ }
50
+
51
+ /* ZA[n] equates to ZA0H.B[n]. */
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
53
+
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
55
+
56
+ tcg_temp_free_ptr(base);
57
+ return true;
29
+}
58
+}
30
+
59
+
31
static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
32
ARMMMUIdx mmu_idx)
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
33
{
34
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
35
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
36
int current_el = arm_current_el(env);
37
int fp_el = fp_exception_el(env, current_el);
38
- uint32_t flags = 0;
39
+ uint32_t flags;
40
41
if (is_a64(env)) {
42
*pc = env->pc;
43
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
44
}
45
} else {
46
*pc = env->regs[15];
47
+ flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
48
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
49
flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
50
flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
51
flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
52
- flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
53
- flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
54
if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
55
|| arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
56
flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
57
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
58
flags = FIELD_DP32(flags, TBFLAG_A32,
59
XSCALE_CPAR, env->cp15.c15_cpar);
60
}
61
-
62
- flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
63
}
64
65
/* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
66
--
62
--
67
2.20.1
63
2.25.1
68
69
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This function assumes nothing about the current state of the cpu,
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
and writes the computed value to env->hflags.
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-13-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
7
---
11
target/arm/cpu.h | 6 ++++++
8
target/arm/helper-sme.h | 5 +++
12
target/arm/helper.c | 30 ++++++++++++++++++++++--------
9
target/arm/sme.decode | 11 +++++
13
2 files changed, 28 insertions(+), 8 deletions(-)
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 31 +++++++++++++
12
4 files changed, 137 insertions(+)
14
13
15
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/cpu.h
16
--- a/target/arm/helper-sme.h
18
+++ b/target/arm/cpu.h
17
+++ b/target/arm/helper-sme.h
19
@@ -XXX,XX +XXX,XX @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
20
void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
21
*opaque);
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
22
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
23
+/**
24
+ * arm_rebuild_hflags:
25
+ * Rebuild the cached TBFLAGS for arbitrary changed processor state.
26
+ */
27
+void arm_rebuild_hflags(CPUARMState *env);
28
+
22
+
29
/**
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
30
* aa32_vfp_dreg:
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
31
* Return a pointer to the Dn register within env in 32-bit mode.
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
32
diff --git a/target/arm/helper.c b/target/arm/helper.c
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
33
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/helper.c
29
--- a/target/arm/sme.decode
35
+++ b/target/arm/helper.c
30
+++ b/target/arm/sme.decode
36
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
37
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
32
38
}
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
39
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
40
+static uint32_t rebuild_hflags_internal(CPUARMState *env)
35
+
36
+### SME Add Vector to Array
37
+
38
+&adda zad zn pm pn
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
41
+
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sme_helper.c
49
+++ b/target/arm/sme_helper.c
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
51
DO_ST(q, _le, MO_128)
52
53
#undef DO_ST
54
+
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
56
+ void *vpm, uint32_t desc)
41
+{
57
+{
42
+ int el = arm_current_el(env);
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
43
+ int fp_el = fp_exception_el(env, el);
59
+ uint64_t *pn = vpn, *pm = vpm;
44
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
60
+ uint32_t *zda = vzda, *zn = vzn;
45
+
61
+
46
+ if (is_a64(env)) {
62
+ for (row = 0; row < oprsz; ) {
47
+ return rebuild_hflags_a64(env, el, fp_el, mmu_idx);
63
+ uint64_t pa = pn[row >> 4];
48
+ } else if (arm_feature(env, ARM_FEATURE_M)) {
64
+ do {
49
+ return rebuild_hflags_m32(env, fp_el, mmu_idx);
65
+ if (pa & 1) {
50
+ } else {
66
+ for (col = 0; col < oprsz; ) {
51
+ return rebuild_hflags_a32(env, fp_el, mmu_idx);
67
+ uint64_t pb = pm[col >> 4];
68
+ do {
69
+ if (pb & 1) {
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
71
+ }
72
+ pb >>= 4;
73
+ } while (++col & 15);
74
+ }
75
+ }
76
+ pa >>= 4;
77
+ } while (++row & 15);
52
+ }
78
+ }
53
+}
79
+}
54
+
80
+
55
+void arm_rebuild_hflags(CPUARMState *env)
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
56
+{
83
+{
57
+ env->hflags = rebuild_hflags_internal(env);
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
85
+ uint8_t *pn = vpn, *pm = vpm;
86
+ uint64_t *zda = vzda, *zn = vzn;
87
+
88
+ for (row = 0; row < oprsz; ++row) {
89
+ if (pn[H1(row)] & 1) {
90
+ for (col = 0; col < oprsz; ++col) {
91
+ if (pm[H1(col)] & 1) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
93
+ }
94
+ }
95
+ }
96
+ }
58
+}
97
+}
59
+
98
+
60
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
61
target_ulong *cs_base, uint32_t *pflags)
100
+ void *vpm, uint32_t desc)
62
{
101
+{
63
- ARMMMUIdx mmu_idx = arm_mmu_idx(env);
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
64
- int current_el = arm_current_el(env);
103
+ uint64_t *pn = vpn, *pm = vpm;
65
- int fp_el = fp_exception_el(env, current_el);
104
+ uint32_t *zda = vzda, *zn = vzn;
66
uint32_t flags, pstate_for_ss;
67
68
+ flags = rebuild_hflags_internal(env);
69
+
105
+
70
if (is_a64(env)) {
106
+ for (row = 0; row < oprsz; ) {
71
*pc = env->pc;
107
+ uint64_t pa = pn[row >> 4];
72
- flags = rebuild_hflags_a64(env, current_el, fp_el, mmu_idx);
108
+ do {
73
if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
109
+ if (pa & 1) {
74
flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
110
+ uint32_t zn_row = zn[H4(row)];
75
}
111
+ for (col = 0; col < oprsz; ) {
76
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
112
+ uint64_t pb = pm[col >> 4];
77
*pc = env->regs[15];
113
+ do {
78
114
+ if (pb & 1) {
79
if (arm_feature(env, ARM_FEATURE_M)) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
80
- flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
116
+ }
81
-
117
+ pb >>= 4;
82
if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
118
+ } while (++col & 15);
83
FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
119
+ }
84
!= env->v7m.secure) {
120
+ }
85
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
121
+ pa >>= 4;
86
flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
122
+ } while (++row & 15);
87
}
123
+ }
88
} else {
124
+}
89
- flags = rebuild_hflags_a32(env, fp_el, mmu_idx);
125
+
90
-
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
91
/*
127
+ void *vpm, uint32_t desc)
92
* Note that XSCALE_CPAR shares bits with VECSTRIDE.
128
+{
93
* Note that VECLEN+VECSTRIDE are RES0 for M-profile.
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
132
+
133
+ for (row = 0; row < oprsz; ++row) {
134
+ if (pn[H1(row)] & 1) {
135
+ uint64_t zn_row = zn[row];
136
+ for (col = 0; col < oprsz; ++col) {
137
+ if (pm[H1(col)] & 1) {
138
+ zda[tile_vslice_index(row) + col] += zn_row;
139
+ }
140
+ }
141
+ }
142
+ }
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
152
+
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
155
+{
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
159
+
160
+ if (!sme_smza_enabled_check(s)) {
161
+ return true;
162
+ }
163
+
164
+ /* Sum XZR+zad to find ZAd. */
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
169
+
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
177
+}
178
+
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
94
--
183
--
95
2.20.1
184
2.25.1
96
97
diff view generated by jsdifflib
1
In commit b01422622b we did an automated rename of the ptimer_init()
1
From: Richard Henderson <richard.henderson@linaro.org>
2
function to ptimer_init_with_bh(). Unfortunately this caught the
3
unrelated arm_mptimer_init() function. Undo that accidental
4
renaming.
5
2
6
Fixes: b01422622b7c7293196fdaf1dbb4f495af44ecf9
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
7
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20191017133331.5901-1-peter.maydell@linaro.org
11
---
7
---
12
hw/timer/arm_mptimer.c | 4 ++--
8
target/arm/helper-sme.h | 5 +++
13
1 file changed, 2 insertions(+), 2 deletions(-)
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
14
13
15
diff --git a/hw/timer/arm_mptimer.c b/hw/timer/arm_mptimer.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/timer/arm_mptimer.c
16
--- a/target/arm/helper-sme.h
18
+++ b/hw/timer/arm_mptimer.c
17
+++ b/target/arm/helper-sme.h
19
@@ -XXX,XX +XXX,XX @@ static void arm_mptimer_reset(DeviceState *dev)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
22
+
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
36
+### SME Outer Product
37
+
38
+&op zad zn zm pm pn sub:bool
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
+
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/arm/sme_helper.c
47
+++ b/target/arm/sme_helper.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "exec/cpu_ldst.h"
50
#include "exec/exec-all.h"
51
#include "qemu/int128.h"
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
20
}
58
}
21
}
59
}
22
60
+
23
-static void arm_mptimer_init_with_bh(Object *obj)
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
24
+static void arm_mptimer_init(Object *obj)
62
+ void *vpm, void *vst, uint32_t desc)
25
{
63
+{
26
ARMMPTimerState *s = ARM_MPTIMER(obj);
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
27
65
+ uint32_t neg = simd_data(desc) << 31;
28
@@ -XXX,XX +XXX,XX @@ static const TypeInfo arm_mptimer_info = {
66
+ uint16_t *pn = vpn, *pm = vpm;
29
.name = TYPE_ARM_MPTIMER,
67
+ float_status fpst;
30
.parent = TYPE_SYS_BUS_DEVICE,
68
+
31
.instance_size = sizeof(ARMMPTimerState),
69
+ /*
32
- .instance_init = arm_mptimer_init_with_bh,
70
+ * Make a copy of float_status because this operation does not
33
+ .instance_init = arm_mptimer_init,
71
+ * update the cumulative fp exception status. It also produces
34
.class_init = arm_mptimer_class_init,
72
+ * default nans.
35
};
73
+ */
36
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
76
+
77
+ for (row = 0; row < oprsz; ) {
78
+ uint16_t pa = pn[H2(row >> 4)];
79
+ do {
80
+ if (pa & 1) {
81
+ void *vza_row = vza + tile_vslice_offset(row);
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
83
+
84
+ for (col = 0; col < oprsz; ) {
85
+ uint16_t pb = pm[H2(col >> 4)];
86
+ do {
87
+ if (pb & 1) {
88
+ uint32_t *a = vza_row + H1_4(col);
89
+ uint32_t *m = vzm + H1_4(col);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
91
+ }
92
+ col += 4;
93
+ pb >>= 4;
94
+ } while (col & 15);
95
+ }
96
+ }
97
+ row += 4;
98
+ pa >>= 4;
99
+ } while (row & 15);
100
+ }
101
+}
102
+
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
104
+ void *vpm, void *vst, uint32_t desc)
105
+{
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
111
+
112
+ set_default_nan_mode(true, &fpst);
113
+
114
+ for (row = 0; row < oprsz; ++row) {
115
+ if (pn[H1(row)] & 1) {
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
117
+ uint64_t n = zn[row] ^ neg;
118
+
119
+ for (col = 0; col < oprsz; ++col) {
120
+ if (pm[H1(col)] & 1) {
121
+ uint64_t *a = &za_row[col];
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
123
+ }
124
+ }
125
+ }
126
+ }
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
164
+}
165
+
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
37
--
168
--
38
2.20.1
169
2.25.1
39
40
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Set TBFLAG_ANY.BE_DATA in rebuild_hflags_common_32 and
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
rebuild_hflags_a64 instead of rebuild_hflags_common, where we do
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
5
not need to re-test is_a64() nor re-compute the various inputs.
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
6
13
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20191018174431.1784-5-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/cpu.h | 49 +++++++++++++++++++++++++++------------------
13
target/arm/helper.c | 16 +++++++++++----
14
2 files changed, 42 insertions(+), 23 deletions(-)
15
16
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/cpu.h
16
--- a/target/arm/helper-sme.h
19
+++ b/target/arm/cpu.h
17
+++ b/target/arm/helper-sme.h
20
@@ -XXX,XX +XXX,XX @@ static inline uint64_t arm_sctlr(CPUARMState *env, int el)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
21
}
40
}
22
}
41
}
23
42
+
24
+static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
43
+/*
25
+ bool sctlr_b)
44
+ * Alter PAIR as needed for controlling predicates being false,
45
+ * and for NEG on an enabled row element.
46
+ */
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
26
+{
48
+{
27
+#ifdef CONFIG_USER_ONLY
28
+ /*
49
+ /*
29
+ * In system mode, BE32 is modelled in line with the
50
+ * The pseudocode uses a conditional negate after the conditional zero.
30
+ * architecture (as word-invariant big-endianness), where loads
51
+ * It is simpler here to unconditionally negate before conditional zero.
31
+ * and stores are done little endian but from addresses which
32
+ * are adjusted by XORing with the appropriate constant. So the
33
+ * endianness to use for the raw data access is not affected by
34
+ * SCTLR.B.
35
+ * In user mode, however, we model BE32 as byte-invariant
36
+ * big-endianness (because user-only code cannot tell the
37
+ * difference), and so we need to use a data access endianness
38
+ * that depends on SCTLR.B.
39
+ */
52
+ */
40
+ if (sctlr_b) {
53
+ pair ^= neg;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
61
+}
62
+
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
64
+ void *vpm, uint32_t desc)
65
+{
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
68
+ uint16_t *pn = vpn, *pm = vpm;
69
+
70
+ for (row = 0; row < oprsz; ) {
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
41
+ return true;
114
+ return true;
42
+ }
115
+ }
43
+#endif
116
+
44
+ /* In 32bit endianness is determined by looking at CPSR's E bit */
117
+ /* Sum XZR+zad to find ZAd. */
45
+ return env->uncached_cpsr & CPSR_E;
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
46
+}
131
+}
47
+
132
+
48
+static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
49
+{
134
gen_helper_gvec_5_ptr *fn)
50
+ return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
51
+}
52
53
/* Return true if the processor is in big-endian mode. */
54
static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
55
{
135
{
56
- /* In 32bit endianness is determined by looking at CPSR's E bit */
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
57
if (!is_a64(env)) {
137
58
- return
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
59
-#ifdef CONFIG_USER_ONLY
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
60
- /* In system mode, BE32 is modelled in line with the
61
- * architecture (as word-invariant big-endianness), where loads
62
- * and stores are done little endian but from addresses which
63
- * are adjusted by XORing with the appropriate constant. So the
64
- * endianness to use for the raw data access is not affected by
65
- * SCTLR.B.
66
- * In user mode, however, we model BE32 as byte-invariant
67
- * big-endianness (because user-only code cannot tell the
68
- * difference), and so we need to use a data access endianness
69
- * that depends on SCTLR.B.
70
- */
71
- arm_sctlr_b(env) ||
72
-#endif
73
- ((env->uncached_cpsr & CPSR_E) ? 1 : 0);
74
+ return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
75
} else {
76
int cur_el = arm_current_el(env);
77
uint64_t sctlr = arm_sctlr(env, cur_el);
78
-
79
- return (sctlr & (cur_el ? SCTLR_EE : SCTLR_E0E)) != 0;
80
+ return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
81
}
82
}
83
84
diff --git a/target/arm/helper.c b/target/arm/helper.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/target/arm/helper.c
87
+++ b/target/arm/helper.c
88
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
89
flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
90
arm_to_core_mmu_idx(mmu_idx));
91
92
- if (arm_cpu_data_is_big_endian(env)) {
93
- flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
94
- }
95
if (arm_singlestep_active(env)) {
96
flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
97
}
98
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
99
static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
100
ARMMMUIdx mmu_idx, uint32_t flags)
101
{
102
- flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
103
+ bool sctlr_b = arm_sctlr_b(env);
104
+
140
+
105
+ if (sctlr_b) {
141
+/* TODO: FEAT_EBF16 */
106
+ flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, 1);
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
107
+ }
108
+ if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) {
109
+ flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
110
+ }
111
flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
112
113
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
114
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
115
116
sctlr = arm_sctlr(env, el);
117
118
+ if (arm_cpu_data_is_big_endian_a64(el, sctlr)) {
119
+ flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
120
+ }
121
+
122
if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
123
/*
124
* In order to save space in flags, we record only whether
125
--
143
--
126
2.20.1
144
2.25.1
127
128
diff view generated by jsdifflib
1
Switch the lm32_timer code away from bottom-half based ptimers to the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the ytimer.
5
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20191017132905.5604-4-peter.maydell@linaro.org
10
---
7
---
11
hw/timer/lm32_timer.c | 13 +++++++++----
8
target/arm/helper-sme.h | 2 ++
12
1 file changed, 9 insertions(+), 4 deletions(-)
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
13
13
14
diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/timer/lm32_timer.c
16
--- a/target/arm/helper-sme.h
17
+++ b/hw/timer/lm32_timer.c
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
19
#include "hw/ptimer.h"
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
20
#include "hw/qdev-properties.h"
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
21
#include "qemu/error-report.h"
21
22
-#include "qemu/main-loop.h"
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
23
#include "qemu/module.h"
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
24
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
25
#define DEFAULT_FREQUENCY (50*1000000)
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
26
@@ -XXX,XX +XXX,XX @@ struct LM32TimerState {
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
27
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
MemoryRegion iomem;
28
index XXXXXXX..XXXXXXX 100644
29
29
--- a/target/arm/sme.decode
30
- QEMUBH *bh;
30
+++ b/target/arm/sme.decode
31
ptimer_state *ptimer;
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
32
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
33
qemu_irq irq;
33
34
@@ -XXX,XX +XXX,XX @@ static void timer_write(void *opaque, hwaddr addr,
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
35
s->regs[R_SR] &= ~SR_TO;
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
36
break;
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
case R_CR:
37
index XXXXXXX..XXXXXXX 100644
38
+ ptimer_transaction_begin(s->ptimer);
38
--- a/target/arm/sme_helper.c
39
s->regs[R_CR] = value;
39
+++ b/target/arm/sme_helper.c
40
if (s->regs[R_CR] & CR_START) {
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
41
ptimer_run(s->ptimer, 1);
41
return pair;
42
@@ -XXX,XX +XXX,XX @@ static void timer_write(void *opaque, hwaddr addr,
43
if (s->regs[R_CR] & CR_STOP) {
44
ptimer_stop(s->ptimer);
45
}
46
+ ptimer_transaction_commit(s->ptimer);
47
break;
48
case R_PERIOD:
49
s->regs[R_PERIOD] = value;
50
+ ptimer_transaction_begin(s->ptimer);
51
ptimer_set_count(s->ptimer, value);
52
+ ptimer_transaction_commit(s->ptimer);
53
break;
54
case R_SNAPSHOT:
55
error_report("lm32_timer: write access to read only register 0x"
56
@@ -XXX,XX +XXX,XX @@ static void timer_reset(DeviceState *d)
57
for (i = 0; i < R_MAX; i++) {
58
s->regs[i] = 0;
59
}
60
+ ptimer_transaction_begin(s->ptimer);
61
ptimer_stop(s->ptimer);
62
+ ptimer_transaction_commit(s->ptimer);
63
}
42
}
64
43
65
static void lm32_timer_init(Object *obj)
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
66
@@ -XXX,XX +XXX,XX @@ static void lm32_timer_realize(DeviceState *dev, Error **errp)
45
+ float_status *s_std, float_status *s_odd)
46
+{
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
53
+
54
+ /*
55
+ * The ARM pseudocode function FPDot performs both multiplies
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
63
+
64
+ /* This conversion is exact, because we've already rounded. */
65
+ t32 = float64_to_float32(t64, s_std);
66
+
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
69
+}
70
+
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
67
{
120
{
68
LM32TimerState *s = LM32_TIMER(dev);
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
69
122
index XXXXXXX..XXXXXXX 100644
70
- s->bh = qemu_bh_new(timer_hit, s);
123
--- a/target/arm/translate-sme.c
71
- s->ptimer = ptimer_init_with_bh(s->bh, PTIMER_POLICY_DEFAULT);
124
+++ b/target/arm/translate-sme.c
72
+ s->ptimer = ptimer_init(timer_hit, s, PTIMER_POLICY_DEFAULT);
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
73
126
return true;
74
+ ptimer_transaction_begin(s->ptimer);
75
ptimer_set_freq(s->ptimer, s->freq_hz);
76
+ ptimer_transaction_commit(s->ptimer);
77
}
127
}
78
128
79
static const VMStateDescription vmstate_lm32_timer = {
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
132
80
--
133
--
81
2.20.1
134
2.25.1
82
83
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Avoid calling arm_current_el() twice.
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-14-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
9
---
11
target/arm/internals.h | 9 +++++++++
10
target/arm/helper-sme.h | 16 ++++++++
12
target/arm/helper.c | 12 +++++++-----
11
target/arm/sme.decode | 10 +++++
13
2 files changed, 16 insertions(+), 5 deletions(-)
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
14
4 files changed, 118 insertions(+)
14
15
15
diff --git a/target/arm/internals.h b/target/arm/internals.h
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/internals.h
18
--- a/target/arm/helper-sme.h
18
+++ b/target/arm/internals.h
19
+++ b/target/arm/helper-sme.h
19
@@ -XXX,XX +XXX,XX @@ void arm_cpu_update_virq(ARMCPU *cpu);
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
20
*/
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
21
void arm_cpu_update_vfiq(ARMCPU *cpu);
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
22
23
void, ptr, ptr, ptr, ptr, ptr, i32)
23
+/**
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
24
+ * arm_mmu_idx_el:
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
25
+ * @env: The cpu environment
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
26
+ * @el: The EL to use.
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
27
+ *
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
28
+ * Return the full ARMMMUIdx for the translation regime for EL.
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
29
+ */
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
30
+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el);
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
31
+
48
+
32
/**
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
33
* arm_mmu_idx:
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
34
* @env: The cpu environment
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
35
diff --git a/target/arm/helper.c b/target/arm/helper.c
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
53
+
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
36
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/helper.c
60
--- a/target/arm/sme_helper.c
38
+++ b/target/arm/helper.c
61
+++ b/target/arm/sme_helper.c
39
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
40
}
63
} while (row & 15);
41
#endif
42
43
-ARMMMUIdx arm_mmu_idx(CPUARMState *env)
44
+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
45
{
46
- int el;
47
-
48
if (arm_feature(env, ARM_FEATURE_M)) {
49
return arm_v7m_mmu_idx_for_secstate(env, env->v7m.secure);
50
}
51
52
- el = arm_current_el(env);
53
if (el < 2 && arm_is_secure_below_el3(env)) {
54
return ARMMMUIdx_S1SE0 + el;
55
} else {
56
@@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env)
57
}
64
}
58
}
65
}
59
66
+
60
+ARMMMUIdx arm_mmu_idx(CPUARMState *env)
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
68
+
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
61
+{
72
+{
62
+ return arm_mmu_idx_el(env, arm_current_el(env));
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
75
+
76
+ for (row = 0; row < oprsz; ++row) {
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
80
+
81
+ for (col = 0; col < oprsz; ++col) {
82
+ uint8_t pb = pm[H1(col)];
83
+ uint64_t *a = &za_row[col];
84
+
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
86
+ }
87
+ }
63
+}
88
+}
64
+
89
+
65
int cpu_mmu_index(CPUARMState *env, bool ifetch)
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
66
{
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
67
return arm_to_core_mmu_idx(arm_mmu_idx(env));
92
+{ \
68
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_internal(CPUARMState *env)
93
+ uint32_t sum0 = 0, sum1 = 0; \
69
{
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
70
int el = arm_current_el(env);
95
+ n &= expand_pred_b(p); \
71
int fp_el = fp_exception_el(env, el);
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
72
- ARMMMUIdx mmu_idx = arm_mmu_idx(env);
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
73
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
74
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
75
if (is_a64(env)) {
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
76
return rebuild_hflags_a64(env, el, fp_el, mmu_idx);
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
104
+ if (neg) { \
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
110
+}
111
+
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
114
+{ \
115
+ uint64_t sum = 0; \
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
117
+ n &= expand_pred_h(p); \
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
122
+ return neg ? a - sum : a + sum; \
123
+}
124
+
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
129
+
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
134
+
135
+#define DEF_IMOPH(NAME) \
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
137
+ void *vpm, uint32_t desc) \
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
139
+
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
77
--
166
--
78
2.20.1
167
2.25.1
79
80
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create a function to compute the values of the TBFLAG_A32 bits
3
This is an SVE instruction that operates using the SVE vector
4
that will be cached, and are used by M-profile.
4
length but that it is present only if SME is implemented.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-6-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.c | 45 ++++++++++++++++++++++++++++++---------------
11
target/arm/sve.decode | 20 +++++++++++++
12
1 file changed, 30 insertions(+), 15 deletions(-)
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
13
2 files changed, 77 insertions(+)
13
14
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
17
--- a/target/arm/sve.decode
17
+++ b/target/arm/helper.c
18
+++ b/target/arm/sve.decode
18
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
19
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
20
20
}
21
### SVE2 floating-point bfloat16 dot-product (indexed)
21
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
22
+static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
23
+
23
+ ARMMMUIdx mmu_idx)
24
+### SVE broadcast predicate element
25
+
26
+&psel esz pd pn pm rv imm
27
+%psel_rv 16:2 !function=plus_12
28
+%psel_imm_b 22:2 19:2
29
+%psel_imm_h 22:2 20:1
30
+%psel_imm_s 22:2
31
+%psel_imm_d 23:1
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
33
+ &psel rv=%psel_rv
34
+
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
36
+ @psel esz=0 imm=%psel_imm_b
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
51
+
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
24
+{
53
+{
25
+ uint32_t flags = 0;
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
26
+
59
+
27
+ if (arm_v7m_is_handler_mode(env)) {
60
+ if (!dc_isar_feature(aa64_sme, s)) {
28
+ flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
29
+ }
65
+ }
30
+
66
+
31
+ /*
67
+ tmp = tcg_temp_new_i64();
32
+ * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN
68
+ dbit = tcg_temp_new_i64();
33
+ * is suppressing them because the requested execution priority
69
+ didx = tcg_temp_new_i64();
34
+ * is less than 0.
70
+ ptr = tcg_temp_new_ptr();
35
+ */
71
+
36
+ if (arm_feature(env, ARM_FEATURE_V8) &&
72
+ /* Compute the predicate element. */
37
+ !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
38
+ (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
74
+ if (is_power_of_2(elements)) {
39
+ flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
40
+ }
78
+ }
41
+
79
+
42
+ return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
80
+ /* Extract the predicate byte and bit indices. */
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
86
+ }
87
+
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
106
+ return true;
43
+}
107
+}
44
+
45
static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
46
ARMMMUIdx mmu_idx)
47
{
48
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
49
}
50
} else {
51
*pc = env->regs[15];
52
- flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
53
+
54
+ if (arm_feature(env, ARM_FEATURE_M)) {
55
+ flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
56
+ } else {
57
+ flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
58
+ }
59
+
60
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
61
flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
62
flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
63
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
64
}
65
}
66
67
- if (arm_v7m_is_handler_mode(env)) {
68
- flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
69
- }
70
-
71
- /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
72
- * suppressing them because the requested execution priority is less than 0.
73
- */
74
- if (arm_feature(env, ARM_FEATURE_V8) &&
75
- arm_feature(env, ARM_FEATURE_M) &&
76
- !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
77
- (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
78
- flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
79
- }
80
-
81
if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
82
FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
83
flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
84
--
108
--
85
2.20.1
109
2.25.1
86
87
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
There are 3 conditions that each enable this flag. M-profile always
3
This is an SVE instruction that operates using the SVE vector
4
enables; A-profile with EL1 as AA64 always enables. Both of these
4
length but that it is present only if SME is implemented.
5
conditions can easily be cached. The final condition relies on the
6
FPEXC register which we are not prepared to cache.
7
5
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20191018174431.1784-12-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
target/arm/cpu.h | 2 +-
11
target/arm/helper-sve.h | 2 ++
14
target/arm/helper.c | 14 ++++++++++----
12
target/arm/sve.decode | 1 +
15
2 files changed, 11 insertions(+), 5 deletions(-)
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
16
16
17
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/cpu.h
19
--- a/target/arm/helper-sve.h
20
+++ b/target/arm/cpu.h
20
+++ b/target/arm/helper-sve.h
21
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
* the same thing as the current security state of the processor!
22
23
*/
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
FIELD(TBFLAG_A32, NS, 6, 1)
24
25
-FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Not cached. */
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */
26
+
27
FIELD(TBFLAG_A32, CONDEXEC, 8, 8) /* Not cached. */
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
/* For M profile only, set if FPCCR.LSPACT is set */
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
diff --git a/target/arm/helper.c b/target/arm/helper.c
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
31
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/helper.c
32
--- a/target/arm/sve.decode
33
+++ b/target/arm/helper.c
33
+++ b/target/arm/sve.decode
34
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
35
{
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
36
uint32_t flags = 0;
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
37
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
38
+ /* v8M always enables the fpu. */
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
39
+ flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
39
40
# SVE vector splice (predicated, destructive)
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/sve_helper.c
45
+++ b/target/arm/sve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
47
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
49
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
51
+{
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
53
+ uint64_t *d = vd, *n = vn;
54
+ uint8_t *pg = vg;
40
+
55
+
41
if (arm_v7m_is_handler_mode(env)) {
56
+ for (i = 0; i < opr_sz; i += 2) {
42
flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
57
+ if (pg[H1(i)] & 1) {
43
}
58
+ uint64_t n0 = n[i + 0];
44
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
59
+ uint64_t n1 = n[i + 1];
45
ARMMMUIdx mmu_idx)
60
+ d[i + 0] = n1;
46
{
61
+ d[i + 1] = n0;
47
uint32_t flags = rebuild_hflags_aprofile(env);
62
+ }
63
+ }
64
+}
48
+
65
+
49
+ if (arm_el_is_aa64(env, 1)) {
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
50
+ flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
51
+ }
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
52
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
53
}
70
index XXXXXXX..XXXXXXX 100644
54
71
--- a/target/arm/translate-sve.c
55
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
72
+++ b/target/arm/translate-sve.c
56
flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
57
env->vfp.vec_stride);
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
58
}
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
59
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
76
60
+ flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
61
+ }
78
+
62
}
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
63
80
gen_helper_sve_splice, a, a->esz)
64
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
65
flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
66
- if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
67
- || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
68
- flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
69
- }
70
pstate_for_ss = env->uncached_cpsr;
71
}
72
81
73
--
82
--
74
2.20.1
83
2.25.1
75
76
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This functions are given the mode and el state of the cpu
3
This is an SVE instruction that operates using the SVE vector
4
and writes the computed value to env->hflags.
4
length but that it is present only if SME is implemented.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-16-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.h | 4 ++++
11
target/arm/helper.h | 18 +++++++
12
target/arm/helper.c | 24 ++++++++++++++++++++++++
12
target/arm/sve.decode | 5 ++
13
2 files changed, 28 insertions(+)
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
14
16
15
diff --git a/target/arm/helper.h b/target/arm/helper.h
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/helper.h
19
--- a/target/arm/helper.h
18
+++ b/target/arm/helper.h
20
+++ b/target/arm/helper.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
20
DEF_HELPER_2(get_user_reg, i32, env, i32)
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
21
DEF_HELPER_3(set_user_reg, void, env, i32, i32)
23
void, ptr, ptr, ptr, ptr, ptr, i32)
22
24
23
+DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int)
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
24
+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
26
+ void, ptr, ptr, ptr, ptr, i32)
25
+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
26
+
28
+ void, ptr, ptr, ptr, ptr, i32)
27
DEF_HELPER_1(vfp_get_fpscr, i32, env)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
28
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
30
+ void, ptr, ptr, ptr, ptr, i32)
29
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
30
diff --git a/target/arm/helper.c b/target/arm/helper.c
32
+ void, ptr, ptr, ptr, ptr, i32)
31
index XXXXXXX..XXXXXXX 100644
33
+
32
--- a/target/arm/helper.c
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
33
+++ b/target/arm/helper.c
35
+ void, ptr, ptr, ptr, ptr, i32)
34
@@ -XXX,XX +XXX,XX @@ void arm_rebuild_hflags(CPUARMState *env)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
35
env->hflags = rebuild_hflags_internal(env);
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
36
}
66
}
37
67
+
38
+void HELPER(rebuild_hflags_m32)(CPUARMState *env, int el)
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
39
+{
69
+{
40
+ int fp_el = fp_exception_el(env, el);
70
+ tcg_gen_smax_i32(d, a, n);
41
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
71
+ tcg_gen_smin_i32(d, d, m);
42
+
72
+}
43
+ env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx);
73
+
44
+}
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
45
+
75
+{
46
+void HELPER(rebuild_hflags_a32)(CPUARMState *env, int el)
76
+ tcg_gen_smax_i64(d, a, n);
47
+{
77
+ tcg_gen_smin_i64(d, d, m);
48
+ int fp_el = fp_exception_el(env, el);
78
+}
49
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
79
+
50
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
51
+ env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx);
81
+ TCGv_vec m, TCGv_vec a)
52
+}
82
+{
53
+
83
+ tcg_gen_smax_vec(vece, d, a, n);
54
+void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
84
+ tcg_gen_smin_vec(vece, d, d, m);
55
+{
85
+}
56
+ int fp_el = fp_exception_el(env, el);
86
+
57
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
58
+
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
59
+ env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx);
89
+{
60
+}
90
+ static const TCGOpcode vecop[] = {
61
+
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
62
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
92
+ };
63
target_ulong *cs_base, uint32_t *pflags)
93
+ static const GVecGen4 ops[4] = {
64
{
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
65
--
201
--
66
2.20.1
202
2.25.1
67
68
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
By performing this store early, we avoid having to save and restore
3
We can handle both exception entry and exception return by
4
the register holding the address around any function calls.
4
hooking into aarch64_sve_change_el.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-15-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.c | 2 +-
11
target/arm/helper.c | 15 +++++++++++++--
12
1 file changed, 1 insertion(+), 1 deletion(-)
12
1 file changed, 13 insertions(+), 2 deletions(-)
13
13
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
16
--- a/target/arm/helper.c
17
+++ b/target/arm/helper.c
17
+++ b/target/arm/helper.c
18
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
19
{
19
return;
20
uint32_t flags, pstate_for_ss;
21
22
+ *cs_base = 0;
23
flags = rebuild_hflags_internal(env);
24
25
if (is_a64(env)) {
26
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
27
}
20
}
28
21
29
*pflags = flags;
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
30
- *cs_base = 0;
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
31
}
24
+
32
25
+ /*
33
#ifdef TARGET_AARCH64
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
27
+ * invoke ResetSVEState when taking an exception from, or
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
29
+ */
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
31
+ arm_reset_sve_state(env);
32
+ return;
33
+ }
34
+
35
/*
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
37
* at ELx, or not available because the EL is in AArch32 state, then
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
39
* we already have the correct register contents when encountering the
40
* vq0->vq0 transition between EL0->EL1.
41
*/
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
44
? sve_vqm1_for_el(env, old_el) : 0);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
47
? sve_vqm1_for_el(env, new_el) : 0);
48
34
--
49
--
35
2.20.1
50
2.25.1
36
37
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Hoist the computation of some TBFLAG_A32 bits that only apply to
3
Note that SME remains effectively disabled for user-only,
4
M-profile under a single test for ARM_FEATURE_M.
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
5
6
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-7-richard.henderson@linaro.org
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
---
11
target/arm/helper.c | 49 +++++++++++++++++++++------------------------
12
docs/system/arm/emulation.rst | 4 ++++
12
1 file changed, 23 insertions(+), 26 deletions(-)
13
target/arm/cpu64.c | 11 +++++++++++
14
2 files changed, 15 insertions(+)
13
15
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
18
--- a/docs/system/arm/emulation.rst
17
+++ b/target/arm/helper.c
19
+++ b/docs/system/arm/emulation.rst
18
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
19
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
20
if (arm_feature(env, ARM_FEATURE_M)) {
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
21
flags = rebuild_hflags_m32(env, fp_el, mmu_idx);
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
24
+- FEAT_SME (Scalable Matrix Extension)
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
28
- FEAT_SPECRES (Speculation restriction instructions)
29
- FEAT_SSBS (Speculative Store Bypass Safe)
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/cpu64.c
34
+++ b/target/arm/cpu64.c
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
36
*/
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
41
cpu->isar.id_aa64pfr1 = t;
42
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
45
cpu->isar.id_aa64dfr0 = t;
46
47
+ t = cpu->isar.id_aa64smfr0;
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
55
+ cpu->isar.id_aa64smfr0 = t;
22
+
56
+
23
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
57
/* Replicate the same data to the 32-bit id registers. */
24
+ FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
58
aa32_max_features(cpu);
25
+ != env->v7m.secure) {
26
+ flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
27
+ }
28
+
29
+ if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
30
+ (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
31
+ (env->v7m.secure &&
32
+ !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
33
+ /*
34
+ * ASPEN is set, but FPCA/SFPA indicate that there is no
35
+ * active FP context; we must create a new FP context before
36
+ * executing any FP insn.
37
+ */
38
+ flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
39
+ }
40
+
41
+ bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
42
+ if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
43
+ flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
44
+ }
45
} else {
46
flags = rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
47
}
48
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
49
}
50
}
51
52
- if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
53
- FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
54
- flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
55
- }
56
-
57
- if (arm_feature(env, ARM_FEATURE_M) &&
58
- (env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
59
- (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
60
- (env->v7m.secure &&
61
- !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
62
- /*
63
- * ASPEN is set, but FPCA/SFPA indicate that there is no active
64
- * FP context; we must create a new FP context before executing
65
- * any FP insn.
66
- */
67
- flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
68
- }
69
-
70
- if (arm_feature(env, ARM_FEATURE_M)) {
71
- bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
72
-
73
- if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
74
- flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
75
- }
76
- }
77
-
78
if (!arm_feature(env, ARM_FEATURE_M)) {
79
int target_el = arm_debug_target_el(env);
80
59
81
--
60
--
82
2.20.1
61
2.25.1
83
84
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Continue setting, but not relying upon, env->hflags.
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-id: 20191018174431.1784-20-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
7
---
10
target/arm/helper.c | 10 ++++++++++
8
linux-user/aarch64/target_cpu.h | 5 ++++-
11
1 file changed, 10 insertions(+)
9
1 file changed, 4 insertions(+), 1 deletion(-)
12
10
13
diff --git a/target/arm/helper.c b/target/arm/helper.c
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.c
13
--- a/linux-user/aarch64/target_cpu.h
16
+++ b/target/arm/helper.c
14
+++ b/linux-user/aarch64/target_cpu.h
17
@@ -XXX,XX +XXX,XX @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
18
/* ??? Lots of these bits are not implemented. */
16
19
/* This may enable/disable the MMU, so do a TLB flush. */
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
20
tlb_flush(CPU(cpu));
18
{
21
+
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
+ if (ri->type & ARM_CP_SUPPRESS_TB_END) {
20
+ /*
23
+ /*
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
24
+ * Normally we would always end the TB on an SCTLR write; see the
22
* different from AArch32 Linux, which uses TPIDRRO.
25
+ * comment in ARMCPRegInfo sctlr initialization below for why Xscale
23
*/
26
+ * is special. Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild
24
env->cp15.tpidr_el[0] = newtls;
27
+ * of hflags from the translator, so do it here.
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
28
+ */
26
+ env->cp15.tpidr2_el0 = 0;
29
+ arm_rebuild_hflags(env);
30
+ }
31
}
27
}
32
28
33
static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri,
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
34
--
30
--
35
2.20.1
31
2.25.1
36
37
diff view generated by jsdifflib
1
Switch the mcf5208 code away from bottom-half based ptimers to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
the new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the timer.
5
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Tested-by: Thomas Huth <huth@tuxfamily.org>
10
Message-id: 20191017132905.5604-9-peter.maydell@linaro.org
11
---
7
---
12
hw/m68k/mcf5208.c | 9 +++++----
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
13
1 file changed, 5 insertions(+), 4 deletions(-)
9
1 file changed, 9 insertions(+)
14
10
15
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/m68k/mcf5208.c
13
--- a/linux-user/aarch64/cpu_loop.c
18
+++ b/hw/m68k/mcf5208.c
14
+++ b/linux-user/aarch64/cpu_loop.c
19
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
20
#include "qemu/osdep.h"
16
21
#include "qemu/units.h"
17
switch (trapnr) {
22
#include "qemu/error-report.h"
18
case EXCP_SWI:
23
-#include "qemu/main-loop.h"
19
+ /*
24
#include "qapi/error.h"
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
25
#include "qemu-common.h"
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
26
#include "cpu.h"
22
+ */
27
@@ -XXX,XX +XXX,XX @@ static void m5208_timer_write(void *opaque, hwaddr offset,
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
28
return;
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
29
}
25
+ arm_rebuild_hflags(env);
30
26
+ arm_reset_sve_state(env);
31
+ ptimer_transaction_begin(s->timer);
27
+ }
32
if (s->pcsr & PCSR_EN)
28
ret = do_syscall(env,
33
ptimer_stop(s->timer);
29
env->xregs[8],
34
30
env->xregs[0],
35
@@ -XXX,XX +XXX,XX @@ static void m5208_timer_write(void *opaque, hwaddr offset,
36
37
if (s->pcsr & PCSR_EN)
38
ptimer_run(s->timer, 0);
39
+ ptimer_transaction_commit(s->timer);
40
break;
41
case 2:
42
+ ptimer_transaction_begin(s->timer);
43
s->pmr = value;
44
s->pcsr &= ~PCSR_PIF;
45
if ((s->pcsr & PCSR_RLD) == 0) {
46
@@ -XXX,XX +XXX,XX @@ static void m5208_timer_write(void *opaque, hwaddr offset,
47
} else {
48
ptimer_set_limit(s->timer, value, s->pcsr & PCSR_OVW);
49
}
50
+ ptimer_transaction_commit(s->timer);
51
break;
52
case 4:
53
break;
54
@@ -XXX,XX +XXX,XX @@ static void mcf5208_sys_init(MemoryRegion *address_space, qemu_irq *pic)
55
{
56
MemoryRegion *iomem = g_new(MemoryRegion, 1);
57
m5208_timer_state *s;
58
- QEMUBH *bh;
59
int i;
60
61
/* SDRAMC. */
62
@@ -XXX,XX +XXX,XX @@ static void mcf5208_sys_init(MemoryRegion *address_space, qemu_irq *pic)
63
/* Timers. */
64
for (i = 0; i < 2; i++) {
65
s = g_new0(m5208_timer_state, 1);
66
- bh = qemu_bh_new(m5208_timer_trigger, s);
67
- s->timer = ptimer_init_with_bh(bh, PTIMER_POLICY_DEFAULT);
68
+ s->timer = ptimer_init(m5208_timer_trigger, s, PTIMER_POLICY_DEFAULT);
69
memory_region_init_io(&s->iomem, NULL, &m5208_timer_ops, s,
70
"m5208-timer", 0x00004000);
71
memory_region_add_subregion(address_space, 0xfc080000 + 0x4000 * i,
72
--
31
--
73
2.20.1
32
2.25.1
74
75
diff view generated by jsdifflib
1
Switch the puv3_ost code away from bottom-half based ptimers to the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the timer.
5
2
3
Make sure to zero the currently reserved fields.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20191017132905.5604-2-peter.maydell@linaro.org
10
---
9
---
11
hw/timer/puv3_ost.c | 9 +++++----
10
linux-user/aarch64/signal.c | 9 ++++++++-
12
1 file changed, 5 insertions(+), 4 deletions(-)
11
1 file changed, 8 insertions(+), 1 deletion(-)
13
12
14
diff --git a/hw/timer/puv3_ost.c b/hw/timer/puv3_ost.c
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/timer/puv3_ost.c
15
--- a/linux-user/aarch64/signal.c
17
+++ b/hw/timer/puv3_ost.c
16
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
19
#include "hw/sysbus.h"
18
struct target_sve_context {
20
#include "hw/irq.h"
19
struct target_aarch64_ctx head;
21
#include "hw/ptimer.h"
20
uint16_t vl;
22
-#include "qemu/main-loop.h"
21
- uint16_t reserved[3];
23
#include "qemu/module.h"
22
+ uint16_t flags;
24
23
+ uint16_t reserved[2];
25
#undef DEBUG_PUV3
24
/* The actual SVE data immediately follows. It is laid out
26
@@ -XXX,XX +XXX,XX @@ typedef struct PUV3OSTState {
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
27
SysBusDevice parent_obj;
26
* the original struct pointer.
28
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
29
MemoryRegion iomem;
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
30
- QEMUBH *bh;
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
31
qemu_irq irq;
30
32
ptimer_state *ptimer;
31
+#define TARGET_SVE_SIG_FLAG_SM 1
33
32
+
34
@@ -XXX,XX +XXX,XX @@ static void puv3_ost_write(void *opaque, hwaddr offset,
33
struct target_rt_sigframe {
35
DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
34
struct target_siginfo info;
36
switch (offset) {
35
struct target_ucontext uc;
37
case 0x00: /* Match Register 0 */
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
38
+ ptimer_transaction_begin(s->ptimer);
37
{
39
s->reg_OSMR0 = value;
38
int i, j;
40
if (s->reg_OSMR0 > s->reg_OSCR) {
39
41
ptimer_set_count(s->ptimer, s->reg_OSMR0 - s->reg_OSCR);
40
+ memset(sve, 0, sizeof(*sve));
42
@@ -XXX,XX +XXX,XX @@ static void puv3_ost_write(void *opaque, hwaddr offset,
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
43
(0xffffffff - s->reg_OSCR));
42
__put_user(size, &sve->head.size);
44
}
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
45
ptimer_run(s->ptimer, 2);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
46
+ ptimer_transaction_commit(s->ptimer);
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
47
break;
46
+ }
48
case 0x14: /* Status Register */
47
49
assert(value == 0);
48
/* Note that SVE regs are stored as a byte stream, with each byte element
50
@@ -XXX,XX +XXX,XX @@ static void puv3_ost_realize(DeviceState *dev, Error **errp)
49
* at a subsequent address. This corresponds to a little-endian store
51
52
sysbus_init_irq(sbd, &s->irq);
53
54
- s->bh = qemu_bh_new(puv3_ost_tick, s);
55
- s->ptimer = ptimer_init_with_bh(s->bh, PTIMER_POLICY_DEFAULT);
56
+ s->ptimer = ptimer_init(puv3_ost_tick, s, PTIMER_POLICY_DEFAULT);
57
+ ptimer_transaction_begin(s->ptimer);
58
ptimer_set_freq(s->ptimer, 50 * 1000 * 1000);
59
+ ptimer_transaction_commit(s->ptimer);
60
61
memory_region_init_io(&s->iomem, OBJECT(s), &puv3_ost_ops, s, "puv3_ost",
62
PUV3_REGS_OFFSET);
63
--
50
--
64
2.20.1
51
2.25.1
65
66
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Hoist the variable load for PSTATE into the existing test vs is_a64.
3
Fold the return value setting into the goto, so each
4
point of failure need not do both.
4
5
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-11-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
---
10
target/arm/helper.c | 20 ++++++++------------
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
11
1 file changed, 8 insertions(+), 12 deletions(-)
12
1 file changed, 11 insertions(+), 15 deletions(-)
12
13
13
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/helper.c
16
--- a/linux-user/aarch64/signal.c
16
+++ b/target/arm/helper.c
17
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
18
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
19
struct target_sve_context *sve = NULL;
19
int current_el = arm_current_el(env);
20
uint64_t extra_datap = 0;
20
int fp_el = fp_exception_el(env, current_el);
21
bool used_extra = false;
21
- uint32_t flags;
22
- bool err = false;
22
+ uint32_t flags, pstate_for_ss;
23
int vq = 0, sve_size = 0;
23
24
24
if (is_a64(env)) {
25
target_restore_general_frame(env, sf);
25
*pc = env->pc;
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
26
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
27
switch (magic) {
27
if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
28
case 0:
28
flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
29
}
69
}
30
+ pstate_for_ss = env->pstate;
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
31
} else {
75
} else {
32
*pc = env->regs[15];
76
- err = true;
33
77
+ goto err;
34
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
35
|| arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
36
flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
37
}
38
+ pstate_for_ss = env->uncached_cpsr;
39
}
78
}
40
79
41
- /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
80
/* SVE data, if present, overwrites FPSIMD data. */
42
+ /*
81
if (sve) {
43
+ * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
82
target_restore_sve_record(env, sve, vq);
44
* states defined in the ARM ARM for software singlestep:
45
* SS_ACTIVE PSTATE.SS State
46
* 0 x Inactive (the TB flag for SS is always 0)
47
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
48
* 1 1 Active-not-pending
49
* SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
50
*/
51
- if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE)) {
52
- if (is_a64(env)) {
53
- if (env->pstate & PSTATE_SS) {
54
- flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
55
- }
56
- } else {
57
- if (env->uncached_cpsr & PSTATE_SS) {
58
- flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
59
- }
60
- }
61
+ if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) &&
62
+ (pstate_for_ss & PSTATE_SS)) {
63
+ flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
64
}
83
}
65
84
-
66
*pflags = flags;
85
- exit:
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
89
+
90
+ err:
91
+ unlock_user(extra, extra_datap, 0);
92
+ return 1;
93
}
94
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
67
--
96
--
68
2.20.1
97
2.25.1
69
70
diff view generated by jsdifflib
1
Switch the sh_timer code away from bottom-half based ptimers to the
1
From: Richard Henderson <richard.henderson@linaro.org>
2
new transaction-based ptimer API. This just requires adding
3
begin/commit calls around the various places that modify the ptimer
4
state, and using the new ptimer_init() function to create the timer.
5
2
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
or records that are smaller than the header. We were silently
5
allowing these cases to pass, dropping the record.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-id: 20191017132905.5604-3-peter.maydell@linaro.org
10
---
11
---
11
hw/timer/sh_timer.c | 13 +++++++++----
12
linux-user/aarch64/signal.c | 5 ++++-
12
1 file changed, 9 insertions(+), 4 deletions(-)
13
1 file changed, 4 insertions(+), 1 deletion(-)
13
14
14
diff --git a/hw/timer/sh_timer.c b/hw/timer/sh_timer.c
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/timer/sh_timer.c
17
--- a/linux-user/aarch64/signal.c
17
+++ b/hw/timer/sh_timer.c
18
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
19
#include "hw/irq.h"
20
break;
20
#include "hw/sh4/sh.h"
21
21
#include "qemu/timer.h"
22
case TARGET_SVE_MAGIC:
22
-#include "qemu/main-loop.h"
23
+ if (sve || size < sizeof(struct target_sve_context)) {
23
#include "hw/ptimer.h"
24
+ goto err;
24
25
+ }
25
//#define DEBUG_TIMER
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
26
@@ -XXX,XX +XXX,XX @@ static void sh_timer_write(void *opaque, hwaddr offset,
27
vq = sve_vq(env);
27
switch (offset >> 2) {
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
28
case OFFSET_TCOR:
29
- if (!sve && size == sve_size) {
29
s->tcor = value;
30
+ if (size == sve_size) {
30
+ ptimer_transaction_begin(s->timer);
31
sve = (struct target_sve_context *)ctx;
31
ptimer_set_limit(s->timer, s->tcor, 0);
32
break;
32
+ ptimer_transaction_commit(s->timer);
33
}
33
break;
34
case OFFSET_TCNT:
35
s->tcnt = value;
36
+ ptimer_transaction_begin(s->timer);
37
ptimer_set_count(s->timer, s->tcnt);
38
+ ptimer_transaction_commit(s->timer);
39
break;
40
case OFFSET_TCR:
41
+ ptimer_transaction_begin(s->timer);
42
if (s->enabled) {
43
/* Pause the timer if it is running. This may cause some
44
inaccuracy dure to rounding, but avoids a whole lot of other
45
@@ -XXX,XX +XXX,XX @@ static void sh_timer_write(void *opaque, hwaddr offset,
46
/* Restart the timer if still enabled. */
47
ptimer_run(s->timer, 0);
48
}
49
+ ptimer_transaction_commit(s->timer);
50
break;
51
case OFFSET_TCPR:
52
if (s->feat & TIMER_FEAT_CAPT) {
53
@@ -XXX,XX +XXX,XX @@ static void sh_timer_start_stop(void *opaque, int enable)
54
printf("sh_timer_start_stop %d (%d)\n", enable, s->enabled);
55
#endif
56
57
+ ptimer_transaction_begin(s->timer);
58
if (s->enabled && !enable) {
59
ptimer_stop(s->timer);
60
}
61
if (!s->enabled && enable) {
62
ptimer_run(s->timer, 0);
63
}
64
+ ptimer_transaction_commit(s->timer);
65
s->enabled = !!enable;
66
67
#ifdef DEBUG_TIMER
68
@@ -XXX,XX +XXX,XX @@ static void sh_timer_tick(void *opaque)
69
static void *sh_timer_init(uint32_t freq, int feat, qemu_irq irq)
70
{
71
sh_timer_state *s;
72
- QEMUBH *bh;
73
74
s = (sh_timer_state *)g_malloc0(sizeof(sh_timer_state));
75
s->freq = freq;
76
@@ -XXX,XX +XXX,XX @@ static void *sh_timer_init(uint32_t freq, int feat, qemu_irq irq)
77
s->enabled = 0;
78
s->irq = irq;
79
80
- bh = qemu_bh_new(sh_timer_tick, s);
81
- s->timer = ptimer_init_with_bh(bh, PTIMER_POLICY_DEFAULT);
82
+ s->timer = ptimer_init(sh_timer_tick, s, PTIMER_POLICY_DEFAULT);
83
84
sh_timer_write(s, OFFSET_TCOR >> 2, s->tcor);
85
sh_timer_write(s, OFFSET_TCNT >> 2, s->tcnt);
86
--
34
--
87
2.20.1
35
2.25.1
88
89
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Continue setting, but not relying upon, env->hflags.
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-19-richard.henderson@linaro.org
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
7
---
10
target/arm/op_helper.c | 3 +++
8
linux-user/aarch64/signal.c | 3 +++
11
1 file changed, 3 insertions(+)
9
1 file changed, 3 insertions(+)
12
10
13
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/op_helper.c
13
--- a/linux-user/aarch64/signal.c
16
+++ b/target/arm/op_helper.c
14
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
18
void HELPER(setend)(CPUARMState *env)
16
__get_user(extra_size,
19
{
17
&((struct target_extra_context *)ctx)->size);
20
env->uncached_cpsr ^= CPSR_E;
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
21
+ arm_rebuild_hflags(env);
19
+ if (!extra) {
22
}
20
+ return 1;
23
21
+ }
24
/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
22
break;
25
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(cpsr_read)(CPUARMState *env)
23
26
void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
24
default:
27
{
28
cpsr_write(env, val, mask, CPSRWriteByInstr);
29
+ /* TODO: Not all cpsr bits are relevant to hflags. */
30
+ arm_rebuild_hflags(env);
31
}
32
33
/* Write the CPSR for a 32-bit exception return */
34
--
25
--
35
2.20.1
26
2.25.1
36
37
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create a function to compute the values of the TBFLAG_A64 bits
3
Move the checks out of the parsing loop and into the
4
that will be cached. For now, the env->hflags variable is not
4
restore function. This more closely mirrors the code
5
used, and the results are fed back to cpu_get_tb_cpu_state.
5
structure in the kernel, and is slightly clearer.
6
6
7
Note that not all BTI related flags are cached, so we have to
7
Reject rather than silently skip incorrect VL and SVE record sizes,
8
test the BTI feature twice -- once for those bits moved out to
8
bringing our checks in to line with those the kernel does.
9
rebuild_hflags_a64 and once for those bits that remain in
10
cpu_get_tb_cpu_state.
11
9
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-id: 20191018174431.1784-3-richard.henderson@linaro.org
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
14
---
17
target/arm/helper.c | 131 +++++++++++++++++++++++---------------------
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
18
1 file changed, 69 insertions(+), 62 deletions(-)
16
1 file changed, 35 insertions(+), 16 deletions(-)
19
17
20
diff --git a/target/arm/helper.c b/target/arm/helper.c
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
21
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/helper.c
20
--- a/linux-user/aarch64/signal.c
23
+++ b/target/arm/helper.c
21
+++ b/linux-user/aarch64/signal.c
24
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
25
return flags;
23
}
26
}
24
}
27
25
28
+static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
26
-static void target_restore_sve_record(CPUARMState *env,
29
+ ARMMMUIdx mmu_idx)
27
- struct target_sve_context *sve, int vq)
30
+{
28
+static bool target_restore_sve_record(CPUARMState *env,
31
+ ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
29
+ struct target_sve_context *sve,
32
+ ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
30
+ int size)
33
+ uint32_t flags = 0;
31
{
34
+ uint64_t sctlr;
32
- int i, j;
35
+ int tbii, tbid;
33
+ int i, j, vl, vq;
36
+
34
37
+ flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
38
+
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
39
+ /* FIXME: ARMv8.1-VHE S2 translation regime. */
37
+ return false;
40
+ if (regime_el(env, stage1) < 2) {
41
+ ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
42
+ tbid = (p1.tbi << 1) | p0.tbi;
43
+ tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
44
+ } else {
45
+ tbid = p0.tbi;
46
+ tbii = tbid & !p0.tbid;
47
+ }
38
+ }
48
+
39
+
49
+ flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
40
+ __get_user(vl, &sve->vl);
50
+ flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
41
+ vq = sve_vq(env);
51
+
42
+
52
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
43
+ /* Reject mismatched VL. */
53
+ int sve_el = sve_exception_el(env, el);
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
54
+ uint32_t zcr_len;
45
+ return false;
55
+
56
+ /*
57
+ * If SVE is disabled, but FP is enabled,
58
+ * then the effective len is 0.
59
+ */
60
+ if (sve_el != 0 && fp_el == 0) {
61
+ zcr_len = 0;
62
+ } else {
63
+ zcr_len = sve_zcr_len_for_el(env, el);
64
+ }
65
+ flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
66
+ flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
67
+ }
46
+ }
68
+
47
+
69
+ sctlr = arm_sctlr(env, el);
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
70
+
49
+ if (size <= sizeof(*sve)) {
71
+ if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
50
+ return true;
72
+ /*
73
+ * In order to save space in flags, we record only whether
74
+ * pauth is "inactive", meaning all insns are implemented as
75
+ * a nop, or "active" when some action must be performed.
76
+ * The decision of which action to take is left to a helper.
77
+ */
78
+ if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
79
+ flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
80
+ }
81
+ }
51
+ }
82
+
52
+
83
+ if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
53
+ /* Reject non-empty but incomplete record. */
84
+ /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
85
+ if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
55
+ return false;
86
+ flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
87
+ }
88
+ }
56
+ }
89
+
57
+
90
+ return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
58
+ /*
91
+}
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
92
+
60
* at a subsequent address. This corresponds to a little-endian load
93
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
61
* of our 64-bit hunks.
94
target_ulong *cs_base, uint32_t *pflags)
62
*/
95
{
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
96
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
64
}
97
uint32_t flags = 0;
65
}
98
66
}
99
if (is_a64(env)) {
67
+ return true;
100
- ARMCPU *cpu = env_archcpu(env);
68
}
101
- uint64_t sctlr;
69
102
-
70
static int target_restore_sigframe(CPUARMState *env,
103
*pc = env->pc;
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
104
- flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
72
struct target_sve_context *sve = NULL;
105
-
73
uint64_t extra_datap = 0;
106
- /* Get control bits for tagged addresses. */
74
bool used_extra = false;
107
- {
75
- int vq = 0, sve_size = 0;
108
- ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
76
+ int sve_size = 0;
109
- ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
77
110
- int tbii, tbid;
78
target_restore_general_frame(env, sf);
111
-
79
112
- /* FIXME: ARMv8.1-VHE S2 translation regime. */
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
113
- if (regime_el(env, stage1) < 2) {
81
if (sve || size < sizeof(struct target_sve_context)) {
114
- ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
82
goto err;
115
- tbid = (p1.tbi << 1) | p0.tbi;
83
}
116
- tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
117
- } else {
85
- vq = sve_vq(env);
118
- tbid = p0.tbi;
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
119
- tbii = tbid & !p0.tbid;
87
- if (size == sve_size) {
88
- sve = (struct target_sve_context *)ctx;
89
- break;
90
- }
120
- }
91
- }
121
-
92
- goto err;
122
- flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
93
+ sve = (struct target_sve_context *)ctx;
123
- flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
94
+ sve_size = size;
124
- }
95
+ break;
125
-
96
126
- if (cpu_isar_feature(aa64_sve, cpu)) {
97
case TARGET_EXTRA_MAGIC:
127
- int sve_el = sve_exception_el(env, current_el);
98
if (extra || size != sizeof(struct target_extra_context)) {
128
- uint32_t zcr_len;
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
129
-
100
}
130
- /* If SVE is disabled, but FP is enabled,
101
131
- * then the effective len is 0.
102
/* SVE data, if present, overwrites FPSIMD data. */
132
- */
103
- if (sve) {
133
- if (sve_el != 0 && fp_el == 0) {
104
- target_restore_sve_record(env, sve, vq);
134
- zcr_len = 0;
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
135
- } else {
106
+ goto err;
136
- zcr_len = sve_zcr_len_for_el(env, current_el);
107
}
137
- }
108
unlock_user(extra, extra_datap, 0);
138
- flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
109
return 0;
139
- flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
140
- }
141
-
142
- sctlr = arm_sctlr(env, current_el);
143
-
144
- if (cpu_isar_feature(aa64_pauth, cpu)) {
145
- /*
146
- * In order to save space in flags, we record only whether
147
- * pauth is "inactive", meaning all insns are implemented as
148
- * a nop, or "active" when some action must be performed.
149
- * The decision of which action to take is left to a helper.
150
- */
151
- if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
152
- flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
153
- }
154
- }
155
-
156
- if (cpu_isar_feature(aa64_bti, cpu)) {
157
- /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */
158
- if (sctlr & (current_el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
159
- flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
160
- }
161
+ flags = rebuild_hflags_a64(env, current_el, fp_el, mmu_idx);
162
+ if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
163
flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
164
}
165
} else {
166
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
167
flags = FIELD_DP32(flags, TBFLAG_A32,
168
XSCALE_CPAR, env->cp15.c15_cpar);
169
}
170
- }
171
172
- flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
173
+ flags = rebuild_hflags_common(env, fp_el, mmu_idx, flags);
174
+ }
175
176
/* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
177
* states defined in the ARM ARM for software singlestep:
178
--
110
--
179
2.20.1
111
2.25.1
180
181
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Create a function to compute the values of the TBFLAG_ANY bits
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
that will be cached, and are used by A-profile.
4
Restore SM and ZA state according to the records present on return.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20191018174431.1784-9-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
target/arm/helper.c | 20 ++++++++++++--------
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
12
1 file changed, 12 insertions(+), 8 deletions(-)
12
1 file changed, 154 insertions(+), 13 deletions(-)
13
13
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper.c
16
--- a/linux-user/aarch64/signal.c
17
+++ b/target/arm/helper.c
17
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
19
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
19
20
#define TARGET_SVE_SIG_FLAG_SM 1
21
22
+#define TARGET_ZA_MAGIC 0x54366345
23
+
24
+struct target_za_context {
25
+ struct target_aarch64_ctx head;
26
+ uint16_t vl;
27
+ uint16_t reserved[3];
28
+ /* The actual ZA data immediately follows. */
29
+};
30
+
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
37
+
38
struct target_rt_sigframe {
39
struct target_siginfo info;
40
struct target_ucontext uc;
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
20
}
42
}
21
43
22
+static uint32_t rebuild_hflags_aprofile(CPUARMState *env)
44
static void target_setup_sve_record(struct target_sve_context *sve,
45
- CPUARMState *env, int vq, int size)
46
+ CPUARMState *env, int size)
47
{
48
- int i, j;
49
+ int i, j, vq = sve_vq(env);
50
51
memset(sve, 0, sizeof(*sve));
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
54
}
55
}
56
57
+static void target_setup_za_record(struct target_za_context *za,
58
+ CPUARMState *env, int size)
23
+{
59
+{
24
+ int flags = 0;
60
+ int vq = sme_vq(env);
25
+
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
26
+ flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL,
62
+ int i, j;
27
+ arm_debug_target_el(env));
63
+
28
+ return flags;
64
+ memset(za, 0, sizeof(*za));
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
66
+ __put_user(size, &za->head.size);
67
+ __put_user(vl, &za->vl);
68
+
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
70
+ return;
71
+ }
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
73
+
74
+ /*
75
+ * Note that ZA vectors are stored as a byte stream,
76
+ * with each byte element at a subsequent address.
77
+ */
78
+ for (i = 0; i < vl; ++i) {
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
80
+ for (j = 0; j < vq * 2; ++j) {
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
82
+ }
83
+ }
29
+}
84
+}
30
+
85
+
31
static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
86
static void target_restore_general_frame(CPUARMState *env,
32
ARMMMUIdx mmu_idx)
87
struct target_rt_sigframe *sf)
33
{
88
{
34
- return rebuild_hflags_common_32(env, fp_el, mmu_idx, 0);
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
35
+ uint32_t flags = rebuild_hflags_aprofile(env);
90
36
+ return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
91
static bool target_restore_sve_record(CPUARMState *env,
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
112
113
- __get_user(vl, &sve->vl);
114
- vq = sve_vq(env);
115
+ /*
116
+ * Note that we cannot use sve_vq() because that depends on the
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
133
return true;
37
}
134
}
38
135
39
static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
136
+static bool target_restore_za_record(CPUARMState *env,
40
ARMMMUIdx mmu_idx)
137
+ struct target_za_context *za,
41
{
138
+ int size, int *svcr)
42
+ uint32_t flags = rebuild_hflags_aprofile(env);
139
+{
43
ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
140
+ int i, j, vl, vq;
44
ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
141
+
45
- uint32_t flags = 0;
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
46
uint64_t sctlr;
143
+ return false;
47
int tbii, tbid;
144
+ }
48
145
+
49
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
146
+ __get_user(vl, &za->vl);
50
}
147
+ vq = sme_vq(env);
51
}
148
+
52
149
+ /* Reject mismatched VL. */
53
- if (!arm_feature(env, ARM_FEATURE_M)) {
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
54
- int target_el = arm_debug_target_el(env);
151
+ return false;
55
-
152
+ }
56
- flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL, target_el);
153
+
57
- }
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
58
-
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
59
*pflags = flags;
156
+ return true;
60
*cs_base = 0;
157
+ }
61
}
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
161
+ return false;
162
+ }
163
+
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
165
+
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
172
+ return true;
173
+}
174
+
175
static int target_restore_sigframe(CPUARMState *env,
176
struct target_rt_sigframe *sf)
177
{
178
struct target_aarch64_ctx *ctx, *extra = NULL;
179
struct target_fpsimd_context *fpsimd = NULL;
180
struct target_sve_context *sve = NULL;
181
+ struct target_za_context *za = NULL;
182
uint64_t extra_datap = 0;
183
bool used_extra = false;
184
int sve_size = 0;
185
+ int za_size = 0;
186
+ int svcr = 0;
187
188
target_restore_general_frame(env, sf);
189
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
191
sve_size = size;
192
break;
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
62
--
288
--
63
2.20.1
289
2.25.1
64
65
diff view generated by jsdifflib
1
From: Guenter Roeck <linux@roeck-us.net>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
When booting a recent Linux kernel, the qemu message "Timer with delta
3
Add "sve" to the sve prctl functions, to distinguish
4
zero, disabling" is seen, apparently because a ptimer is started before
4
them from the coming "sme" prctls with similar names.
5
being initialized. Fix the problem by initializing the offending ptimer
6
before starting it.
7
5
8
The bug is effectively harmless in the old QEMUBH setup
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
because the sequence of events is:
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
* the delta zero means the timer expires immediately
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
11
* ptimer_reload() arranges for exynos4210_gfrc_event() to be called
12
* ptimer_reload() notices the zero delta and disables the timer
13
* later, the QEMUBH runs, and exynos4210_gfrc_event() correctly
14
configures the timer and restarts it
15
16
In the new transaction based API the bug is still harmless,
17
but differences of when the callback function runs mean the
18
message is not printed any more:
19
* ptimer_run() does nothing as it's inside a transaction block
20
* ptimer_transaction_commit() sees it has work to do and
21
calls ptimer_reload()
22
* the zero delta means the timer expires immediately
23
* ptimer_reload() calls exynos4210_gfrc_event() directly
24
* exynos4210_gfrc_event() configures the timer
25
* the delta is no longer zero so ptimer_reload() doesn't complain
26
(the zero-delta test is after the trigger-callback in
27
the ptimer_reload() function)
28
29
Regardless, the behaviour here was not intentional, and we should
30
just program the ptimer correctly to start with.
31
32
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
33
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
34
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
35
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
36
Message-id: 20191018143149.9216-1-peter.maydell@linaro.org
37
[PMM: Expansion/clarification of the commit message:
38
the message is about a zero delta, not a zero period;
39
added detail to the commit message of the analysis of what
40
is happening and why the kernel boots even with the message;
41
added note that the message goes away with the new ptimer API]
42
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
43
---
10
---
44
hw/timer/exynos4210_mct.c | 2 +-
11
linux-user/aarch64/target_prctl.h | 8 ++++----
45
1 file changed, 1 insertion(+), 1 deletion(-)
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
46
14
47
diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
48
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/timer/exynos4210_mct.c
17
--- a/linux-user/aarch64/target_prctl.h
50
+++ b/hw/timer/exynos4210_mct.c
18
+++ b/linux-user/aarch64/target_prctl.h
51
@@ -XXX,XX +XXX,XX @@ static void exynos4210_mct_write(void *opaque, hwaddr offset,
19
@@ -XXX,XX +XXX,XX @@
52
/* Start FRC if transition from disabled to enabled */
20
#ifndef AARCH64_TARGET_PRCTL_H
53
if ((value & G_TCON_TIMER_ENABLE) > (old_val &
21
#define AARCH64_TARGET_PRCTL_H
54
G_TCON_TIMER_ENABLE)) {
22
55
- exynos4210_gfrc_start(&s->g_timer);
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
56
+ exynos4210_gfrc_restart(s);
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
57
}
25
{
58
if ((value & G_TCON_TIMER_ENABLE) < (old_val &
26
ARMCPU *cpu = env_archcpu(env);
59
G_TCON_TIMER_ENABLE)) {
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
29
}
30
return -TARGET_EINVAL;
31
}
32
-#define do_prctl_get_vl do_prctl_get_vl
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
34
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
37
{
38
/*
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
41
}
42
return -TARGET_EINVAL;
43
}
44
-#define do_prctl_set_vl do_prctl_set_vl
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
48
{
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/linux-user/syscall.c
52
+++ b/linux-user/syscall.c
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
54
#ifndef do_prctl_set_fp_mode
55
#define do_prctl_set_fp_mode do_prctl_inval1
56
#endif
57
-#ifndef do_prctl_get_vl
58
-#define do_prctl_get_vl do_prctl_inval0
59
+#ifndef do_prctl_sve_get_vl
60
+#define do_prctl_sve_get_vl do_prctl_inval0
61
#endif
62
-#ifndef do_prctl_set_vl
63
-#define do_prctl_set_vl do_prctl_inval1
64
+#ifndef do_prctl_sve_set_vl
65
+#define do_prctl_sve_set_vl do_prctl_inval1
66
#endif
67
#ifndef do_prctl_reset_keys
68
#define do_prctl_reset_keys do_prctl_inval1
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
70
case PR_SET_FP_MODE:
71
return do_prctl_set_fp_mode(env, arg2);
72
case PR_SVE_GET_VL:
73
- return do_prctl_get_vl(env);
74
+ return do_prctl_sve_get_vl(env);
75
case PR_SVE_SET_VL:
76
- return do_prctl_set_vl(env, arg2);
77
+ return do_prctl_sve_set_vl(env, arg2);
78
case PR_PAC_RESET_KEYS:
79
if (arg3 || arg4 || arg5) {
80
return -TARGET_EINVAL;
60
--
81
--
61
2.20.1
82
2.25.1
62
63
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This is the payoff.
3
These prctl set the Streaming SVE vector length, which may
4
be completely different from the Normal SVE vector length.
4
5
5
From perf record -g data of ubuntu 18 boot and shutdown:
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
7
BEFORE:
8
9
- 23.02% 2.82% qemu-system-aar [.] helper_lookup_tb_ptr
10
- 20.22% helper_lookup_tb_ptr
11
+ 10.05% tb_htable_lookup
12
- 9.13% cpu_get_tb_cpu_state
13
3.20% aa64_va_parameters_both
14
0.55% fp_exception_el
15
16
- 11.66% 4.74% qemu-system-aar [.] cpu_get_tb_cpu_state
17
- 6.96% cpu_get_tb_cpu_state
18
3.63% aa64_va_parameters_both
19
0.60% fp_exception_el
20
0.53% sve_exception_el
21
22
AFTER:
23
24
- 16.40% 3.40% qemu-system-aar [.] helper_lookup_tb_ptr
25
- 13.03% helper_lookup_tb_ptr
26
+ 11.19% tb_htable_lookup
27
0.55% cpu_get_tb_cpu_state
28
29
0.98% 0.71% qemu-system-aar [.] cpu_get_tb_cpu_state
30
31
0.87% 0.24% qemu-system-aar [.] rebuild_hflags_a64
32
33
Before, helper_lookup_tb_ptr is the second hottest function in the
34
application, consuming almost a quarter of the runtime. Within the
35
entire execution, cpu_get_tb_cpu_state consumes about 12%.
36
37
After, helper_lookup_tb_ptr has dropped to the fourth hottest function,
38
with consumption dropping to a sixth of the runtime. Within the
39
entire execution, cpu_get_tb_cpu_state has dropped below 1%, and the
40
supporting function to rebuild hflags also consumes about 1%.
41
42
Assertions are retained for --enable-debug-tcg.
43
44
Tested-by: Alex Bennée <alex.bennee@linaro.org>
45
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
46
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
47
Message-id: 20191018174431.1784-23-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
48
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
49
---
10
---
50
target/arm/helper.c | 9 ++++++---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
51
1 file changed, 6 insertions(+), 3 deletions(-)
12
linux-user/syscall.c | 16 +++++++++
13
2 files changed, 70 insertions(+)
52
14
53
diff --git a/target/arm/helper.c b/target/arm/helper.c
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
54
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
55
--- a/target/arm/helper.c
17
--- a/linux-user/aarch64/target_prctl.h
56
+++ b/target/arm/helper.c
18
+++ b/linux-user/aarch64/target_prctl.h
57
@@ -XXX,XX +XXX,XX @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
58
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
59
target_ulong *cs_base, uint32_t *pflags)
60
{
20
{
61
- uint32_t flags, pstate_for_ss;
21
ARMCPU *cpu = env_archcpu(env);
62
+ uint32_t flags = env->hflags;
22
if (cpu_isar_feature(aa64_sve, cpu)) {
63
+ uint32_t pstate_for_ss;
23
+ /* PSTATE.SM is always unset on syscall entry. */
64
24
return sve_vq(env) * 16;
65
*cs_base = 0;
25
}
66
- flags = rebuild_hflags_internal(env);
26
return -TARGET_EINVAL;
67
+#ifdef CONFIG_DEBUG_TCG
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
68
+ assert(flags == rebuild_hflags_internal(env));
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
69
+#endif
107
+#endif
70
108
71
- if (is_a64(env)) {
109
#include "target_prctl.h"
72
+ if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) {
110
73
*pc = env->pc;
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
74
if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
112
#ifndef do_prctl_set_unalign
75
flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
76
--
135
--
77
2.20.1
136
2.25.1
78
79
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
The 32-bit product should be sign-extended, not zero-extended.
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
4
5
Fixes: ea96b374641b
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reported-by: Laurent Desnogues <laurent.desnogues@gmail.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
9
Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
10
Message-id: 20190912183058.17947-1-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
9
---
13
target/arm/translate.c | 4 +++-
10
target/arm/cpu.c | 7 +++----
14
1 file changed, 3 insertions(+), 1 deletion(-)
11
1 file changed, 3 insertions(+), 4 deletions(-)
15
12
16
diff --git a/target/arm/translate.c b/target/arm/translate.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
17
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
18
--- a/target/arm/translate.c
15
--- a/target/arm/cpu.c
19
+++ b/target/arm/translate.c
16
+++ b/target/arm/cpu.c
20
@@ -XXX,XX +XXX,XX @@ static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
21
case 2:
18
/* and to the FP/Neon instructions */
22
tl = load_reg(s, a->ra);
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
th = load_reg(s, a->rd);
20
CPACR_EL1, FPEN, 3);
24
- t1 = tcg_const_i32(0);
21
- /* and to the SVE instructions */
25
+ /* Sign-extend the 32-bit product to 64 bits. */
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
26
+ t1 = tcg_temp_new_i32();
23
- CPACR_EL1, ZEN, 3);
27
+ tcg_gen_sari_i32(t1, t0, 31);
24
- /* with reasonable vector length */
28
tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
25
+ /* and to the SVE instructions, with default vector length */
29
tcg_temp_free_i32(t0);
26
if (cpu_isar_feature(aa64_sve, cpu)) {
30
tcg_temp_free_i32(t1);
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
+ CPACR_EL1, ZEN, 3);
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
30
}
31
/*
31
--
32
--
32
2.20.1
33
2.25.1
33
34
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Begin setting, but not relying upon, env->hflags.
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20191018174431.1784-17-richard.henderson@linaro.org
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
9
---
10
linux-user/syscall.c | 1 +
10
target/arm/cpu.c | 11 +++++++++++
11
target/arm/cpu.c | 1 +
11
1 file changed, 11 insertions(+)
12
target/arm/helper-a64.c | 3 +++
13
target/arm/helper.c | 2 ++
14
target/arm/machine.c | 1 +
15
target/arm/op_helper.c | 1 +
16
6 files changed, 9 insertions(+)
17
12
18
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/linux-user/syscall.c
21
+++ b/linux-user/syscall.c
22
@@ -XXX,XX +XXX,XX @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
23
aarch64_sve_narrow_vq(env, vq);
24
}
25
env->vfp.zcr_el[1] = vq - 1;
26
+ arm_rebuild_hflags(env);
27
ret = vq * 16;
28
}
29
return ret;
30
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
31
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/cpu.c
15
--- a/target/arm/cpu.c
33
+++ b/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
34
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(CPUState *s)
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
35
18
CPACR_EL1, ZEN, 3);
36
hw_breakpoint_update_all(cpu);
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
37
hw_watchpoint_update_all(cpu);
38
+ arm_rebuild_hflags(env);
39
}
40
41
bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
42
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-a64.c
45
+++ b/target/arm/helper-a64.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
47
} else {
48
env->regs[15] = new_pc & ~0x3;
49
}
20
}
50
+ helper_rebuild_hflags_a32(env, new_el);
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
51
qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
52
"AArch32 EL%d PC 0x%" PRIx32 "\n",
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
53
cur_el, new_el, env->regs[15]);
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
54
@@ -XXX,XX +XXX,XX @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
25
+ CPACR_EL1, SMEN, 3);
55
}
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
56
aarch64_restore_sp(env, new_el);
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
57
env->pc = new_pc;
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
58
+ helper_rebuild_hflags_a64(env, new_el);
29
+ SMCR, FA64, 1);
59
qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
30
+ }
60
"AArch64 EL%d PC 0x%" PRIx64 "\n",
31
+ }
61
cur_el, new_el, env->pc);
32
/*
62
}
33
* Enable 48-bit address space (TODO: take reserved_va into account).
63
+
34
* Enable TBI0 but not TBI1.
64
/*
65
* Note that cur_el can never be 0. If new_el is 0, then
66
* el0_a64 is return_to_aa64, else el0_a64 is ignored.
67
diff --git a/target/arm/helper.c b/target/arm/helper.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/arm/helper.c
70
+++ b/target/arm/helper.c
71
@@ -XXX,XX +XXX,XX @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
72
env->regs[14] = env->regs[15] + offset;
73
}
74
env->regs[15] = newpc;
75
+ arm_rebuild_hflags(env);
76
}
77
78
static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
79
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
80
pstate_write(env, PSTATE_DAIF | new_mode);
81
env->aarch64 = 1;
82
aarch64_restore_sp(env, new_el);
83
+ helper_rebuild_hflags_a64(env, new_el);
84
85
env->pc = addr;
86
87
diff --git a/target/arm/machine.c b/target/arm/machine.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/machine.c
90
+++ b/target/arm/machine.c
91
@@ -XXX,XX +XXX,XX @@ static int cpu_post_load(void *opaque, int version_id)
92
if (!kvm_enabled()) {
93
pmu_op_finish(&cpu->env);
94
}
95
+ arm_rebuild_hflags(&cpu->env);
96
97
return 0;
98
}
99
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/arm/op_helper.c
102
+++ b/target/arm/op_helper.c
103
@@ -XXX,XX +XXX,XX @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
104
* state. Do the masking now.
105
*/
106
env->regs[15] &= (env->thumb ? ~1 : ~3);
107
+ arm_rebuild_hflags(env);
108
109
qemu_mutex_lock_iothread();
110
arm_call_el_change_hook(env_archcpu(env));
111
--
35
--
112
2.20.1
36
2.25.1
113
114
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This file keeps the various QDev blocks separated by comments.
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
6
Reviewed-by: Cleber Rosa <crosa@redhat.com>
7
Message-id: 20191005154748.21718-3-f4bug@amsat.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
7
---
10
hw/sd/sdhci.c | 3 ++-
8
linux-user/elfload.c | 20 ++++++++++++++++++++
11
1 file changed, 2 insertions(+), 1 deletion(-)
9
1 file changed, 20 insertions(+)
12
10
13
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/sd/sdhci.c
13
--- a/linux-user/elfload.c
16
+++ b/hw/sd/sdhci.c
14
+++ b/linux-user/elfload.c
17
@@ -XXX,XX +XXX,XX @@ static const TypeInfo sdhci_bus_info = {
15
@@ -XXX,XX +XXX,XX @@ enum {
18
.class_init = sdhci_bus_class_init,
16
ARM_HWCAP2_A64_RNG = 1 << 16,
17
ARM_HWCAP2_A64_BTI = 1 << 17,
18
ARM_HWCAP2_A64_MTE = 1 << 18,
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
19
};
31
};
20
32
21
+/* --- qdev i.MX eSDHC --- */
33
#define ELF_HWCAP get_elf_hwcap()
22
+
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
23
static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
24
{
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
25
SDHCIState *s = SYSBUS_SDHCI(opaque);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
26
@@ -XXX,XX +XXX,XX @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
27
}
39
+ ARM_HWCAP2_A64_SME_F32F32 |
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
28
}
48
}
29
30
-
31
static const MemoryRegionOps usdhc_mmio_ops = {
32
.read = usdhc_read,
33
.write = usdhc_write,
34
--
49
--
35
2.20.1
50
2.25.1
36
37
diff view generated by jsdifflib