1
The following changes since commit eae587e8e3694b1aceab23239493fb4c7e1a80f5:
1
I don't have anything else queued up at the moment, so this is just
2
Richard's SME patches.
2
3
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2021-09-13' into staging (2021-09-13 11:00:30 +0100)
4
-- PMM
5
6
The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3:
7
8
Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20210913
12
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711
8
13
9
for you to fetch changes up to 9a2b2ecf4d25a3943918c95d2db4508b304161b5:
14
for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8:
10
15
11
hw/arm/mps2.c: Mark internal-only I2C buses as 'full' (2021-09-13 17:09:28 +0100)
16
linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
target-arm queue:
19
target-arm:
15
* mark MPS2/MPS3 board-internal i2c buses as 'full' so that command
20
* Implement SME emulation, for both system and linux-user
16
line user-created devices are not plugged into them
17
* Take an exception if PSTATE.IL is set
18
* Support an emulated ITS in the virt board
19
* Add support for kudo-bmc board
20
* Probe for KVM_CAP_ARM_VM_IPA_SIZE when creating scratch VM
21
* cadence_uart: Fix clock handling issues that prevented
22
u-boot from running
23
21
24
----------------------------------------------------------------
22
----------------------------------------------------------------
25
Bin Meng (6):
23
Richard Henderson (45):
26
hw/misc: zynq_slcr: Correctly compute output clocks in the reset exit phase
24
target/arm: Handle SME in aarch64_cpu_dump_state
27
hw/char: cadence_uart: Disable transmit when input clock is disabled
25
target/arm: Add infrastructure for disas_sme
28
hw/char: cadence_uart: Move clock/reset check to uart_can_receive()
26
target/arm: Trap non-streaming usage when Streaming SVE is active
29
hw/char: cadence_uart: Convert to memop_with_attrs() ops
27
target/arm: Mark ADR as non-streaming
30
hw/char: cadence_uart: Ignore access when unclocked or in reset for uart_{read, write}()
28
target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
31
hw/char: cadence_uart: Log a guest error when device is unclocked or in reset
29
target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming
30
target/arm: Mark PMULL, FMMLA as non-streaming
31
target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
32
target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
33
target/arm: Mark string/histo/crypto as non-streaming
34
target/arm: Mark gather/scatter load/store as non-streaming
35
target/arm: Mark gather prefetch as non-streaming
36
target/arm: Mark LDFF1 and LDNF1 as non-streaming
37
target/arm: Mark LD1RO as non-streaming
38
target/arm: Add SME enablement checks
39
target/arm: Handle SME in sve_access_check
40
target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
41
target/arm: Implement SME ZERO
42
target/arm: Implement SME MOVA
43
target/arm: Implement SME LD1, ST1
44
target/arm: Export unpredicated ld/st from translate-sve.c
45
target/arm: Implement SME LDR, STR
46
target/arm: Implement SME ADDHA, ADDVA
47
target/arm: Implement FMOPA, FMOPS (non-widening)
48
target/arm: Implement BFMOPA, BFMOPS
49
target/arm: Implement FMOPA, FMOPS (widening)
50
target/arm: Implement SME integer outer product
51
target/arm: Implement PSEL
52
target/arm: Implement REVD
53
target/arm: Implement SCLAMP, UCLAMP
54
target/arm: Reset streaming sve state on exception boundaries
55
target/arm: Enable SME for -cpu max
56
linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
57
linux-user/aarch64: Reset PSTATE.SM on syscalls
58
linux-user/aarch64: Add SM bit to SVE signal context
59
linux-user/aarch64: Tidy target_restore_sigframe error return
60
linux-user/aarch64: Do not allow duplicate or short sve records
61
linux-user/aarch64: Verify extra record lock succeeded
62
linux-user/aarch64: Move sve record checks into restore
63
linux-user/aarch64: Implement SME signal handling
64
linux-user: Rename sve prctls
65
linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
66
target/arm: Only set ZEN in reset if SVE present
67
target/arm: Enable SME for user-only
68
linux-user/aarch64: Add SME related hwcap entries
32
69
33
Chris Rauer (1):
70
docs/system/arm/emulation.rst | 4 +
34
hw/arm: Add support for kudo-bmc board.
71
linux-user/aarch64/target_cpu.h | 5 +-
35
72
linux-user/aarch64/target_prctl.h | 62 +-
36
Marc Zyngier (1):
73
target/arm/cpu.h | 7 +
37
hw/arm/virt: KVM: Probe for KVM_CAP_ARM_VM_IPA_SIZE when creating scratch VM
74
target/arm/helper-sme.h | 126 ++++
38
75
target/arm/helper-sve.h | 4 +
39
Peter Maydell (5):
76
target/arm/helper.h | 18 +
40
target/arm: Take an exception if PSTATE.IL is set
77
target/arm/translate-a64.h | 45 ++
41
qdev: Support marking individual buses as 'full'
78
target/arm/translate.h | 16 +
42
hw/arm/mps2-tz.c: Add extra data parameter to MakeDevFn
79
target/arm/sme-fa64.decode | 60 ++
43
hw/arm/mps2-tz.c: Mark internal-only I2C buses as 'full'
80
target/arm/sme.decode | 88 +++
44
hw/arm/mps2.c: Mark internal-only I2C buses as 'full'
81
target/arm/sve.decode | 41 +-
45
82
linux-user/aarch64/cpu_loop.c | 9 +
46
Richard Henderson (1):
83
linux-user/aarch64/signal.c | 243 ++++++--
47
target/arm: Merge disas_a64_insn into aarch64_tr_translate_insn
84
linux-user/elfload.c | 20 +
48
85
linux-user/syscall.c | 28 +-
49
Shashi Mallela (9):
86
target/arm/cpu.c | 35 +-
50
hw/intc: GICv3 ITS initial framework
87
target/arm/cpu64.c | 11 +
51
hw/intc: GICv3 ITS register definitions added
88
target/arm/helper.c | 56 +-
52
hw/intc: GICv3 ITS command queue framework
89
target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++
53
hw/intc: GICv3 ITS Command processing
90
target/arm/sve_helper.c | 28 +
54
hw/intc: GICv3 ITS Feature enablement
91
target/arm/translate-a64.c | 103 +++-
55
hw/intc: GICv3 redistributor ITS processing
92
target/arm/translate-sme.c | 373 ++++++++++++
56
tests/data/acpi/virt: Add IORT files for ITS
93
target/arm/translate-sve.c | 393 ++++++++++---
57
hw/arm/virt: add ITS support in virt GIC
94
target/arm/translate-vfp.c | 12 +
58
tests/data/acpi/virt: Update IORT files for ITS
95
target/arm/translate.c | 2 +
59
96
target/arm/vec_helper.c | 24 +
60
docs/system/arm/nuvoton.rst | 1 +
97
target/arm/meson.build | 3 +
61
hw/intc/gicv3_internal.h | 188 ++++-
98
28 files changed, 2821 insertions(+), 135 deletions(-)
62
include/hw/arm/virt.h | 2 +
99
create mode 100644 target/arm/sme-fa64.decode
63
include/hw/intc/arm_gicv3_common.h | 13 +
100
create mode 100644 target/arm/sme.decode
64
include/hw/intc/arm_gicv3_its_common.h | 32 +-
101
create mode 100644 target/arm/translate-sme.c
65
include/hw/qdev-core.h | 24 +
66
target/arm/cpu.h | 1 +
67
target/arm/kvm_arm.h | 4 +-
68
target/arm/syndrome.h | 5 +
69
target/arm/translate.h | 2 +
70
hw/arm/mps2-tz.c | 92 ++-
71
hw/arm/mps2.c | 12 +-
72
hw/arm/npcm7xx_boards.c | 34 +
73
hw/arm/virt.c | 29 +-
74
hw/char/cadence_uart.c | 61 +-
75
hw/intc/arm_gicv3.c | 14 +
76
hw/intc/arm_gicv3_common.c | 13 +
77
hw/intc/arm_gicv3_cpuif.c | 7 +-
78
hw/intc/arm_gicv3_dist.c | 5 +-
79
hw/intc/arm_gicv3_its.c | 1322 ++++++++++++++++++++++++++++++++
80
hw/intc/arm_gicv3_its_common.c | 7 +-
81
hw/intc/arm_gicv3_its_kvm.c | 2 +-
82
hw/intc/arm_gicv3_redist.c | 153 +++-
83
hw/misc/zynq_slcr.c | 31 +-
84
softmmu/qdev-monitor.c | 7 +-
85
target/arm/helper-a64.c | 1 +
86
target/arm/helper.c | 8 +
87
target/arm/kvm.c | 7 +-
88
target/arm/translate-a64.c | 255 +++---
89
target/arm/translate.c | 21 +
90
hw/intc/meson.build | 1 +
91
tests/data/acpi/virt/IORT | Bin 0 -> 124 bytes
92
tests/data/acpi/virt/IORT.memhp | Bin 0 -> 124 bytes
93
tests/data/acpi/virt/IORT.numamem | Bin 0 -> 124 bytes
94
tests/data/acpi/virt/IORT.pxb | Bin 0 -> 124 bytes
95
35 files changed, 2144 insertions(+), 210 deletions(-)
96
create mode 100644 hw/intc/arm_gicv3_its.c
97
create mode 100644 tests/data/acpi/virt/IORT
98
create mode 100644 tests/data/acpi/virt/IORT.memhp
99
create mode 100644 tests/data/acpi/virt/IORT.numamem
100
create mode 100644 tests/data/acpi/virt/IORT.pxb
101
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Dump SVCR, plus use the correct access check for Streaming Mode.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-2-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/cpu.c | 17 ++++++++++++++++-
11
1 file changed, 16 insertions(+), 1 deletion(-)
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
18
int i;
19
int el = arm_current_el(env);
20
const char *ns_status;
21
+ bool sve;
22
23
qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
24
for (i = 0; i < 32; i++) {
25
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
26
el,
27
psr & PSTATE_SP ? 'h' : 't');
28
29
+ if (cpu_isar_feature(aa64_sme, cpu)) {
30
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
31
+ env->svcr,
32
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
33
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
34
+ }
35
if (cpu_isar_feature(aa64_bti, cpu)) {
36
qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
37
}
38
@@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
39
qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
40
vfp_get_fpcr(env), vfp_get_fpsr(env));
41
42
- if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
43
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
44
+ sve = sme_exception_el(env, el) == 0;
45
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
46
+ sve = sve_exception_el(env, el) == 0;
47
+ } else {
48
+ sve = false;
49
+ }
50
+
51
+ if (sve) {
52
int j, zcr_len = sve_vqm1_for_el(env, el);
53
54
for (i = 0; i <= FFR_PRED_NUM; i++) {
55
--
56
2.25.1
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Added register definitions relevant to ITS,implemented overall
3
This includes the build rules for the decoder, and the
4
ITS device framework with stubs for ITS control and translater
4
new file for translation, but excludes any instructions.
5
regions read/write,extended ITS common to handle mmio init between
6
existing kvm device and newer qemu device.
7
5
8
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
8
Message-id: 20220708151540.18136-3-richard.henderson@linaro.org
12
Message-id: 20210910143951.92242-2-shashi.mallela@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
10
---
15
hw/intc/gicv3_internal.h | 96 +++++++++-
11
target/arm/translate-a64.h | 1 +
16
include/hw/intc/arm_gicv3_its_common.h | 9 +-
12
target/arm/sme.decode | 20 ++++++++++++++++++++
17
hw/intc/arm_gicv3_its.c | 241 +++++++++++++++++++++++++
13
target/arm/translate-a64.c | 7 ++++++-
18
hw/intc/arm_gicv3_its_common.c | 7 +-
14
target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++
19
hw/intc/arm_gicv3_its_kvm.c | 2 +-
15
target/arm/meson.build | 2 ++
20
hw/intc/meson.build | 1 +
16
5 files changed, 64 insertions(+), 1 deletion(-)
21
6 files changed, 342 insertions(+), 14 deletions(-)
17
create mode 100644 target/arm/sme.decode
22
create mode 100644 hw/intc/arm_gicv3_its.c
18
create mode 100644 target/arm/translate-sme.c
23
19
24
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
20
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
25
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/intc/gicv3_internal.h
22
--- a/target/arm/translate-a64.h
27
+++ b/hw/intc/gicv3_internal.h
23
+++ b/target/arm/translate-a64.h
28
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
29
#ifndef QEMU_ARM_GICV3_INTERNAL_H
25
}
30
#define QEMU_ARM_GICV3_INTERNAL_H
26
31
27
bool disas_sve(DisasContext *, uint32_t);
32
+#include "hw/registerfields.h"
28
+bool disas_sme(DisasContext *, uint32_t);
33
#include "hw/intc/arm_gicv3_common.h"
29
34
30
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
35
/* Distributor registers, as offsets from the distributor base address */
31
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
36
@@ -XXX,XX +XXX,XX @@
32
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
37
#define GICD_CTLR_E1NWF (1U << 7)
38
#define GICD_CTLR_RWP (1U << 31)
39
40
+/* 16 bits EventId */
41
+#define GICD_TYPER_IDBITS 0xf
42
+
43
/*
44
* Redistributor frame offsets from RD_base
45
*/
46
@@ -XXX,XX +XXX,XX @@
47
#define GICR_WAKER_ProcessorSleep (1U << 1)
48
#define GICR_WAKER_ChildrenAsleep (1U << 2)
49
50
-#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK (7ULL << 56)
51
-#define GICR_PROPBASER_ADDR_MASK (0xfffffffffULL << 12)
52
-#define GICR_PROPBASER_SHAREABILITY_MASK (3U << 10)
53
-#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7)
54
-#define GICR_PROPBASER_IDBITS_MASK (0x1f)
55
+FIELD(GICR_PROPBASER, IDBITS, 0, 5)
56
+FIELD(GICR_PROPBASER, INNERCACHE, 7, 3)
57
+FIELD(GICR_PROPBASER, SHAREABILITY, 10, 2)
58
+FIELD(GICR_PROPBASER, PHYADDR, 12, 40)
59
+FIELD(GICR_PROPBASER, OUTERCACHE, 56, 3)
60
61
-#define GICR_PENDBASER_PTZ (1ULL << 62)
62
-#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK (7ULL << 56)
63
-#define GICR_PENDBASER_ADDR_MASK (0xffffffffULL << 16)
64
-#define GICR_PENDBASER_SHAREABILITY_MASK (3U << 10)
65
-#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7)
66
+FIELD(GICR_PENDBASER, INNERCACHE, 7, 3)
67
+FIELD(GICR_PENDBASER, SHAREABILITY, 10, 2)
68
+FIELD(GICR_PENDBASER, PHYADDR, 16, 36)
69
+FIELD(GICR_PENDBASER, OUTERCACHE, 56, 3)
70
+FIELD(GICR_PENDBASER, PTZ, 62, 1)
71
72
#define ICC_CTLR_EL1_CBPR (1U << 0)
73
#define ICC_CTLR_EL1_EOIMODE (1U << 1)
74
@@ -XXX,XX +XXX,XX @@
75
#define ICH_VTR_EL2_PREBITS_SHIFT 26
76
#define ICH_VTR_EL2_PRIBITS_SHIFT 29
77
78
+/* ITS Registers */
79
+
80
+FIELD(GITS_BASER, SIZE, 0, 8)
81
+FIELD(GITS_BASER, PAGESIZE, 8, 2)
82
+FIELD(GITS_BASER, SHAREABILITY, 10, 2)
83
+FIELD(GITS_BASER, PHYADDR, 12, 36)
84
+FIELD(GITS_BASER, PHYADDRL_64K, 16, 32)
85
+FIELD(GITS_BASER, PHYADDRH_64K, 12, 4)
86
+FIELD(GITS_BASER, ENTRYSIZE, 48, 5)
87
+FIELD(GITS_BASER, OUTERCACHE, 53, 3)
88
+FIELD(GITS_BASER, TYPE, 56, 3)
89
+FIELD(GITS_BASER, INNERCACHE, 59, 3)
90
+FIELD(GITS_BASER, INDIRECT, 62, 1)
91
+FIELD(GITS_BASER, VALID, 63, 1)
92
+
93
+FIELD(GITS_CTLR, QUIESCENT, 31, 1)
94
+
95
+FIELD(GITS_TYPER, PHYSICAL, 0, 1)
96
+FIELD(GITS_TYPER, ITT_ENTRY_SIZE, 4, 4)
97
+FIELD(GITS_TYPER, IDBITS, 8, 5)
98
+FIELD(GITS_TYPER, DEVBITS, 13, 5)
99
+FIELD(GITS_TYPER, SEIS, 18, 1)
100
+FIELD(GITS_TYPER, PTA, 19, 1)
101
+FIELD(GITS_TYPER, CIDBITS, 32, 4)
102
+FIELD(GITS_TYPER, CIL, 36, 1)
103
+
104
+#define GITS_BASER_PAGESIZE_4K 0
105
+#define GITS_BASER_PAGESIZE_16K 1
106
+#define GITS_BASER_PAGESIZE_64K 2
107
+
108
+#define GITS_BASER_TYPE_DEVICE 1ULL
109
+#define GITS_BASER_TYPE_COLLECTION 4ULL
110
+
111
+/**
112
+ * Default features advertised by this version of ITS
113
+ */
114
+/* Physical LPIs supported */
115
+#define GITS_TYPE_PHYSICAL (1U << 0)
116
+
117
+/*
118
+ * 12 bytes Interrupt translation Table Entry size
119
+ * as per Table 5.3 in GICv3 spec
120
+ * ITE Lower 8 Bytes
121
+ * Bits: | 49 ... 26 | 25 ... 2 | 1 | 0 |
122
+ * Values: | 1023 | IntNum | IntType | Valid |
123
+ * ITE Higher 4 Bytes
124
+ * Bits: | 31 ... 16 | 15 ...0 |
125
+ * Values: | vPEID | ICID |
126
+ */
127
+#define ITS_ITT_ENTRY_SIZE 0xC
128
+
129
+/* 16 bits EventId */
130
+#define ITS_IDBITS GICD_TYPER_IDBITS
131
+
132
+/* 16 bits DeviceId */
133
+#define ITS_DEVBITS 0xF
134
+
135
+/* 16 bits CollectionId */
136
+#define ITS_CIDBITS 0xF
137
+
138
+/*
139
+ * 8 bytes Device Table Entry size
140
+ * Valid = 1 bit,ITTAddr = 44 bits,Size = 5 bits
141
+ */
142
+#define GITS_DTE_SIZE (0x8ULL)
143
+
144
+/*
145
+ * 8 bytes Collection Table Entry size
146
+ * Valid = 1 bit,RDBase = 36 bits(considering max RDBASE)
147
+ */
148
+#define GITS_CTE_SIZE (0x8ULL)
149
+
150
/* Special interrupt IDs */
151
#define INTID_SECURE 1020
152
#define INTID_NONSECURE 1021
153
diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
154
index XXXXXXX..XXXXXXX 100644
155
--- a/include/hw/intc/arm_gicv3_its_common.h
156
+++ b/include/hw/intc/arm_gicv3_its_common.h
157
@@ -XXX,XX +XXX,XX @@
158
#include "hw/intc/arm_gicv3_common.h"
159
#include "qom/object.h"
160
161
+#define TYPE_ARM_GICV3_ITS "arm-gicv3-its"
162
+
163
#define ITS_CONTROL_SIZE 0x10000
164
#define ITS_TRANS_SIZE 0x10000
165
#define ITS_SIZE (ITS_CONTROL_SIZE + ITS_TRANS_SIZE)
166
167
#define GITS_CTLR 0x0
168
#define GITS_IIDR 0x4
169
+#define GITS_TYPER 0x8
170
#define GITS_CBASER 0x80
171
#define GITS_CWRITER 0x88
172
#define GITS_CREADR 0x90
173
#define GITS_BASER 0x100
174
175
+#define GITS_TRANSLATER 0x0040
176
+
177
struct GICv3ITSState {
178
SysBusDevice parent_obj;
179
180
@@ -XXX,XX +XXX,XX @@ struct GICv3ITSState {
181
/* Registers */
182
uint32_t ctlr;
183
uint32_t iidr;
184
+ uint64_t typer;
185
uint64_t cbaser;
186
uint64_t cwriter;
187
uint64_t creadr;
188
@@ -XXX,XX +XXX,XX @@ struct GICv3ITSState {
189
190
typedef struct GICv3ITSState GICv3ITSState;
191
192
-void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops);
193
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops,
194
+ const MemoryRegionOps *tops);
195
196
#define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common"
197
typedef struct GICv3ITSCommonClass GICv3ITSCommonClass;
198
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
199
new file mode 100644
33
new file mode 100644
200
index XXXXXXX..XXXXXXX
34
index XXXXXXX..XXXXXXX
201
--- /dev/null
35
--- /dev/null
202
+++ b/hw/intc/arm_gicv3_its.c
36
+++ b/target/arm/sme.decode
37
@@ -XXX,XX +XXX,XX @@
38
+# AArch64 SME instruction descriptions
39
+#
40
+# Copyright (c) 2022 Linaro, Ltd
41
+#
42
+# This library is free software; you can redistribute it and/or
43
+# modify it under the terms of the GNU Lesser General Public
44
+# License as published by the Free Software Foundation; either
45
+# version 2.1 of the License, or (at your option) any later version.
46
+#
47
+# This library is distributed in the hope that it will be useful,
48
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50
+# Lesser General Public License for more details.
51
+#
52
+# You should have received a copy of the GNU Lesser General Public
53
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
54
+
55
+#
56
+# This file is processed by scripts/decodetree.py
57
+#
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
63
}
64
65
switch (extract32(insn, 25, 4)) {
66
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
67
+ case 0x0:
68
+ if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
69
+ unallocated_encoding(s);
70
+ }
71
+ break;
72
+ case 0x1: case 0x3: /* UNALLOCATED */
73
unallocated_encoding(s);
74
break;
75
case 0x2:
76
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/target/arm/translate-sme.c
203
@@ -XXX,XX +XXX,XX @@
81
@@ -XXX,XX +XXX,XX @@
204
+/*
82
+/*
205
+ * ITS emulation for a GICv3-based system
83
+ * AArch64 SME translation
206
+ *
84
+ *
207
+ * Copyright Linaro.org 2021
85
+ * Copyright (c) 2022 Linaro, Ltd
208
+ *
86
+ *
209
+ * Authors:
87
+ * This library is free software; you can redistribute it and/or
210
+ * Shashi Mallela <shashi.mallela@linaro.org>
88
+ * modify it under the terms of the GNU Lesser General Public
89
+ * License as published by the Free Software Foundation; either
90
+ * version 2.1 of the License, or (at your option) any later version.
211
+ *
91
+ *
212
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
92
+ * This library is distributed in the hope that it will be useful,
213
+ * option) any later version. See the COPYING file in the top-level directory.
93
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
94
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
95
+ * Lesser General Public License for more details.
214
+ *
96
+ *
97
+ * You should have received a copy of the GNU Lesser General Public
98
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
215
+ */
99
+ */
216
+
100
+
217
+#include "qemu/osdep.h"
101
+#include "qemu/osdep.h"
218
+#include "qemu/log.h"
102
+#include "cpu.h"
219
+#include "hw/qdev-properties.h"
103
+#include "tcg/tcg-op.h"
220
+#include "hw/intc/arm_gicv3_its_common.h"
104
+#include "tcg/tcg-op-gvec.h"
221
+#include "gicv3_internal.h"
105
+#include "tcg/tcg-gvec-desc.h"
222
+#include "qom/object.h"
106
+#include "translate.h"
223
+#include "qapi/error.h"
107
+#include "exec/helper-gen.h"
108
+#include "translate-a64.h"
109
+#include "fpu/softfloat.h"
224
+
110
+
225
+typedef struct GICv3ITSClass GICv3ITSClass;
226
+/* This is reusing the GICv3ITSState typedef from ARM_GICV3_ITS_COMMON */
227
+DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass,
228
+ ARM_GICV3_ITS, TYPE_ARM_GICV3_ITS)
229
+
111
+
230
+struct GICv3ITSClass {
112
+/*
231
+ GICv3ITSCommonClass parent_class;
113
+ * Include the generated decoder.
232
+ void (*parent_reset)(DeviceState *dev);
114
+ */
233
+};
234
+
115
+
235
+static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset,
116
+#include "decode-sme.c.inc"
236
+ uint64_t data, unsigned size,
117
diff --git a/target/arm/meson.build b/target/arm/meson.build
237
+ MemTxAttrs attrs)
238
+{
239
+ return MEMTX_OK;
240
+}
241
+
242
+static bool its_writel(GICv3ITSState *s, hwaddr offset,
243
+ uint64_t value, MemTxAttrs attrs)
244
+{
245
+ bool result = true;
246
+
247
+ return result;
248
+}
249
+
250
+static bool its_readl(GICv3ITSState *s, hwaddr offset,
251
+ uint64_t *data, MemTxAttrs attrs)
252
+{
253
+ bool result = true;
254
+
255
+ return result;
256
+}
257
+
258
+static bool its_writell(GICv3ITSState *s, hwaddr offset,
259
+ uint64_t value, MemTxAttrs attrs)
260
+{
261
+ bool result = true;
262
+
263
+ return result;
264
+}
265
+
266
+static bool its_readll(GICv3ITSState *s, hwaddr offset,
267
+ uint64_t *data, MemTxAttrs attrs)
268
+{
269
+ bool result = true;
270
+
271
+ return result;
272
+}
273
+
274
+static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data,
275
+ unsigned size, MemTxAttrs attrs)
276
+{
277
+ GICv3ITSState *s = (GICv3ITSState *)opaque;
278
+ bool result;
279
+
280
+ switch (size) {
281
+ case 4:
282
+ result = its_readl(s, offset, data, attrs);
283
+ break;
284
+ case 8:
285
+ result = its_readll(s, offset, data, attrs);
286
+ break;
287
+ default:
288
+ result = false;
289
+ break;
290
+ }
291
+
292
+ if (!result) {
293
+ qemu_log_mask(LOG_GUEST_ERROR,
294
+ "%s: invalid guest read at offset " TARGET_FMT_plx
295
+ "size %u\n", __func__, offset, size);
296
+ /*
297
+ * The spec requires that reserved registers are RAZ/WI;
298
+ * so use false returns from leaf functions as a way to
299
+ * trigger the guest-error logging but don't return it to
300
+ * the caller, or we'll cause a spurious guest data abort.
301
+ */
302
+ *data = 0;
303
+ }
304
+ return MEMTX_OK;
305
+}
306
+
307
+static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data,
308
+ unsigned size, MemTxAttrs attrs)
309
+{
310
+ GICv3ITSState *s = (GICv3ITSState *)opaque;
311
+ bool result;
312
+
313
+ switch (size) {
314
+ case 4:
315
+ result = its_writel(s, offset, data, attrs);
316
+ break;
317
+ case 8:
318
+ result = its_writell(s, offset, data, attrs);
319
+ break;
320
+ default:
321
+ result = false;
322
+ break;
323
+ }
324
+
325
+ if (!result) {
326
+ qemu_log_mask(LOG_GUEST_ERROR,
327
+ "%s: invalid guest write at offset " TARGET_FMT_plx
328
+ "size %u\n", __func__, offset, size);
329
+ /*
330
+ * The spec requires that reserved registers are RAZ/WI;
331
+ * so use false returns from leaf functions as a way to
332
+ * trigger the guest-error logging but don't return it to
333
+ * the caller, or we'll cause a spurious guest data abort.
334
+ */
335
+ }
336
+ return MEMTX_OK;
337
+}
338
+
339
+static const MemoryRegionOps gicv3_its_control_ops = {
340
+ .read_with_attrs = gicv3_its_read,
341
+ .write_with_attrs = gicv3_its_write,
342
+ .valid.min_access_size = 4,
343
+ .valid.max_access_size = 8,
344
+ .impl.min_access_size = 4,
345
+ .impl.max_access_size = 8,
346
+ .endianness = DEVICE_NATIVE_ENDIAN,
347
+};
348
+
349
+static const MemoryRegionOps gicv3_its_translation_ops = {
350
+ .write_with_attrs = gicv3_its_translation_write,
351
+ .valid.min_access_size = 2,
352
+ .valid.max_access_size = 4,
353
+ .impl.min_access_size = 2,
354
+ .impl.max_access_size = 4,
355
+ .endianness = DEVICE_NATIVE_ENDIAN,
356
+};
357
+
358
+static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
359
+{
360
+ GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
361
+ int i;
362
+
363
+ for (i = 0; i < s->gicv3->num_cpu; i++) {
364
+ if (!(s->gicv3->cpu[i].gicr_typer & GICR_TYPER_PLPIS)) {
365
+ error_setg(errp, "Physical LPI not supported by CPU %d", i);
366
+ return;
367
+ }
368
+ }
369
+
370
+ gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
371
+
372
+ /* set the ITS default features supported */
373
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL,
374
+ GITS_TYPE_PHYSICAL);
375
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE,
376
+ ITS_ITT_ENTRY_SIZE - 1);
377
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS);
378
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS);
379
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1);
380
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIDBITS, ITS_CIDBITS);
381
+}
382
+
383
+static void gicv3_its_reset(DeviceState *dev)
384
+{
385
+ GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
386
+ GICv3ITSClass *c = ARM_GICV3_ITS_GET_CLASS(s);
387
+
388
+ c->parent_reset(dev);
389
+
390
+ /* Quiescent bit reset to 1 */
391
+ s->ctlr = FIELD_DP32(s->ctlr, GITS_CTLR, QUIESCENT, 1);
392
+
393
+ /*
394
+ * setting GITS_BASER0.Type = 0b001 (Device)
395
+ * GITS_BASER1.Type = 0b100 (Collection Table)
396
+ * GITS_BASER<n>.Type,where n = 3 to 7 are 0b00 (Unimplemented)
397
+ * GITS_BASER<0,1>.Page_Size = 64KB
398
+ * and default translation table entry size to 16 bytes
399
+ */
400
+ s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, TYPE,
401
+ GITS_BASER_TYPE_DEVICE);
402
+ s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, PAGESIZE,
403
+ GITS_BASER_PAGESIZE_64K);
404
+ s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, ENTRYSIZE,
405
+ GITS_DTE_SIZE - 1);
406
+
407
+ s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, TYPE,
408
+ GITS_BASER_TYPE_COLLECTION);
409
+ s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, PAGESIZE,
410
+ GITS_BASER_PAGESIZE_64K);
411
+ s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, ENTRYSIZE,
412
+ GITS_CTE_SIZE - 1);
413
+}
414
+
415
+static Property gicv3_its_props[] = {
416
+ DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
417
+ GICv3State *),
418
+ DEFINE_PROP_END_OF_LIST(),
419
+};
420
+
421
+static void gicv3_its_class_init(ObjectClass *klass, void *data)
422
+{
423
+ DeviceClass *dc = DEVICE_CLASS(klass);
424
+ GICv3ITSClass *ic = ARM_GICV3_ITS_CLASS(klass);
425
+
426
+ dc->realize = gicv3_arm_its_realize;
427
+ device_class_set_props(dc, gicv3_its_props);
428
+ device_class_set_parent_reset(dc, gicv3_its_reset, &ic->parent_reset);
429
+}
430
+
431
+static const TypeInfo gicv3_its_info = {
432
+ .name = TYPE_ARM_GICV3_ITS,
433
+ .parent = TYPE_ARM_GICV3_ITS_COMMON,
434
+ .instance_size = sizeof(GICv3ITSState),
435
+ .class_init = gicv3_its_class_init,
436
+ .class_size = sizeof(GICv3ITSClass),
437
+};
438
+
439
+static void gicv3_its_register_types(void)
440
+{
441
+ type_register_static(&gicv3_its_info);
442
+}
443
+
444
+type_init(gicv3_its_register_types)
445
diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
446
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
447
--- a/hw/intc/arm_gicv3_its_common.c
119
--- a/target/arm/meson.build
448
+++ b/hw/intc/arm_gicv3_its_common.c
120
+++ b/target/arm/meson.build
449
@@ -XXX,XX +XXX,XX @@ static int gicv3_its_post_load(void *opaque, int version_id)
121
@@ -XXX,XX +XXX,XX @@
450
122
gen = [
451
static const VMStateDescription vmstate_its = {
123
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
452
.name = "arm_gicv3_its",
124
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
453
+ .version_id = 1,
125
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
454
+ .minimum_version_id = 1,
126
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
455
.pre_save = gicv3_its_pre_save,
127
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
456
.post_load = gicv3_its_post_load,
128
@@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
457
.priority = MIG_PRI_GICV3_ITS,
129
'sme_helper.c',
458
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps gicv3_its_trans_ops = {
130
'translate-a64.c',
459
.endianness = DEVICE_NATIVE_ENDIAN,
131
'translate-sve.c',
460
};
132
+ 'translate-sme.c',
461
462
-void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops)
463
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops,
464
+ const MemoryRegionOps *tops)
465
{
466
SysBusDevice *sbd = SYS_BUS_DEVICE(s);
467
468
memory_region_init_io(&s->iomem_its_cntrl, OBJECT(s), ops, s,
469
"control", ITS_CONTROL_SIZE);
470
memory_region_init_io(&s->iomem_its_translation, OBJECT(s),
471
- &gicv3_its_trans_ops, s,
472
+ tops ? tops : &gicv3_its_trans_ops, s,
473
"translation", ITS_TRANS_SIZE);
474
475
/* Our two regions are always adjacent, therefore we now combine them
476
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
477
index XXXXXXX..XXXXXXX 100644
478
--- a/hw/intc/arm_gicv3_its_kvm.c
479
+++ b/hw/intc/arm_gicv3_its_kvm.c
480
@@ -XXX,XX +XXX,XX @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp)
481
kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
482
KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd, 0);
483
484
- gicv3_its_init_mmio(s, NULL);
485
+ gicv3_its_init_mmio(s, NULL, NULL);
486
487
if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
488
GITS_CTLR)) {
489
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
490
index XXXXXXX..XXXXXXX 100644
491
--- a/hw/intc/meson.build
492
+++ b/hw/intc/meson.build
493
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ARM_GIC', if_true: files(
494
'arm_gicv3_dist.c',
495
'arm_gicv3_its_common.c',
496
'arm_gicv3_redist.c',
497
+ 'arm_gicv3_its.c',
498
))
133
))
499
softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_pic.c'))
134
500
softmmu_ss.add(when: 'CONFIG_HEATHROW_PIC', if_true: files('heathrow_pic.c'))
135
arm_softmmu_ss = ss.source_set()
501
--
136
--
502
2.20.1
137
2.25.1
503
504
diff view generated by jsdifflib
1
In v8A, the PSTATE.IL bit is set for various kinds of illegal
1
From: Richard Henderson <richard.henderson@linaro.org>
2
exception return or mode-change attempts. We already set PSTATE.IL
2
3
(or its AArch32 equivalent CPSR.IL) in all those cases, but we
3
This new behaviour is in the ARM pseudocode function
4
weren't implementing the part of the behaviour where attempting to
4
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
5
execute an instruction with PSTATE.IL takes an immediate exception
5
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
6
with an appropriate syndrome value.
6
the trap would be delivered is in AArch64 mode.
7
7
8
Add a new TB flags bit tracking PSTATE.IL/CPSR.IL, and generate code
8
Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
9
to take an exception instead of whatever the instruction would have
9
detection ought to be trivially true, but the pseudocode still contains
10
been.
10
a number of conditions, and QEMU has not yet committed to dropping A32
11
11
support for EL[12] when v9 features are present.
12
PSTATE.IL and CPSR.IL change only on exception entry, attempted
12
13
exception exit, and various AArch32 mode changes via cpsr_write().
13
Since the computation of SME_TRAP_NONSTREAMING is necessarily different
14
These places generally already rebuild the hflags, so the only place
14
for the two modes, we might as well preserve bits within TBFLAG_ANY and
15
we need an extra rebuild_hflags call is in the illegal-return
15
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.
16
codepath of the AArch64 exception_return helper.
16
17
17
Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
18
of instructions illegal in streaming mode.
19
20
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20220708151540.18136-4-richard.henderson@linaro.org
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-id: 20210821195958.41312-2-richard.henderson@linaro.org
22
Message-Id: <20210817162118.24319-1-peter.maydell@linaro.org>
23
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
[rth: Added missing returns; set IL bit in syndrome]
25
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
26
---
24
---
27
target/arm/cpu.h | 1 +
25
target/arm/cpu.h | 7 +++
28
target/arm/syndrome.h | 5 +++++
26
target/arm/translate.h | 4 ++
29
target/arm/translate.h | 2 ++
27
target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++
30
target/arm/helper-a64.c | 1 +
28
target/arm/helper.c | 41 +++++++++++++++++
31
target/arm/helper.c | 8 ++++++++
29
target/arm/translate-a64.c | 40 ++++++++++++++++-
32
target/arm/translate-a64.c | 11 +++++++++++
30
target/arm/translate-vfp.c | 12 +++++
33
target/arm/translate.c | 21 +++++++++++++++++++++
31
target/arm/translate.c | 2 +
34
7 files changed, 49 insertions(+)
32
target/arm/meson.build | 1 +
33
8 files changed, 195 insertions(+), 2 deletions(-)
34
create mode 100644 target/arm/sme-fa64.decode
35
35
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
37
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.h
38
--- a/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2)
40
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
41
FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2)
41
* the same thing as the current security state of the processor!
42
/* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */
42
*/
43
FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1)
43
FIELD(TBFLAG_A32, NS, 10, 1)
44
+FIELD(TBFLAG_ANY, PSTATE__IL, 13, 1)
44
+/*
45
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
46
+ * This requires an SME trap from AArch32 mode when using NEON.
47
+ */
48
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
45
49
46
/*
50
/*
47
* Bit usage when in AArch32 state, both A- and M-profile.
51
* Bit usage when in AArch32 state, for M-profile only.
48
diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
52
@@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
49
index XXXXXXX..XXXXXXX 100644
53
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
50
--- a/target/arm/syndrome.h
54
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
51
+++ b/target/arm/syndrome.h
55
FIELD(TBFLAG_A64, SVL, 24, 4)
52
@@ -XXX,XX +XXX,XX @@ static inline uint32_t syn_wfx(int cv, int cond, int ti, bool is_16bit)
56
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
53
(cv << 24) | (cond << 20) | ti;
57
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
54
}
58
55
59
/*
56
+static inline uint32_t syn_illegalstate(void)
60
* Helpers for using the above.
57
+{
58
+ return (EC_ILLEGALSTATE << ARM_EL_EC_SHIFT) | ARM_EL_IL;
59
+}
60
+
61
#endif /* TARGET_ARM_SYNDROME_H */
62
diff --git a/target/arm/translate.h b/target/arm/translate.h
61
diff --git a/target/arm/translate.h b/target/arm/translate.h
63
index XXXXXXX..XXXXXXX 100644
62
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/translate.h
63
--- a/target/arm/translate.h
65
+++ b/target/arm/translate.h
64
+++ b/target/arm/translate.h
66
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
65
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
67
bool hstr_active;
66
bool pstate_sm;
68
/* True if memory operations require alignment */
67
/* True if PSTATE.ZA is set. */
69
bool align_mem;
68
bool pstate_za;
70
+ /* True if PSTATE.IL is set */
69
+ /* True if non-streaming insns should raise an SME Streaming exception. */
71
+ bool pstate_il;
70
+ bool sme_trap_nonstreaming;
71
+ /* True if the current instruction is non-streaming. */
72
+ bool is_nonstreaming;
73
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
74
bool mve_no_pred;
72
/*
75
/*
73
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
76
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
74
* < 0, set by the current instruction.
77
new file mode 100644
75
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
78
index XXXXXXX..XXXXXXX
76
index XXXXXXX..XXXXXXX 100644
79
--- /dev/null
77
--- a/target/arm/helper-a64.c
80
+++ b/target/arm/sme-fa64.decode
78
+++ b/target/arm/helper-a64.c
81
@@ -XXX,XX +XXX,XX @@
79
@@ -XXX,XX +XXX,XX @@ illegal_return:
82
+# AArch64 SME allowed instruction decoding
80
if (!arm_singlestep_active(env)) {
83
+#
81
env->pstate &= ~PSTATE_SS;
84
+# Copyright (c) 2022 Linaro, Ltd
82
}
85
+#
83
+ helper_rebuild_hflags_a64(env, cur_el);
86
+# This library is free software; you can redistribute it and/or
84
qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
87
+# modify it under the terms of the GNU Lesser General Public
85
"resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
88
+# License as published by the Free Software Foundation; either
86
}
89
+# version 2.1 of the License, or (at your option) any later version.
90
+#
91
+# This library is distributed in the hope that it will be useful,
92
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
93
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
94
+# Lesser General Public License for more details.
95
+#
96
+# You should have received a copy of the GNU Lesser General Public
97
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
98
+
99
+#
100
+# This file is processed by scripts/decodetree.py
101
+#
102
+
103
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
104
+# Arm Architecture Reference Manual Supplement,
105
+# The Scalable Matrix Extension (SME), for Armv9-A
106
+
107
+{
108
+ [
109
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
110
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
111
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
112
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
113
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
114
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
115
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
116
+ ]
117
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
118
+}
119
+
120
+{
121
+ [
122
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
123
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
124
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
125
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
126
+ ]
127
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
128
+}
129
+
130
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
131
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
132
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
133
+
134
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
135
+# We don't actually need to include these, as the default is OK.
136
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
137
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
138
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
139
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
140
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
141
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
142
+
143
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
144
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
145
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
146
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
147
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
148
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
149
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
150
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
151
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
152
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
153
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
154
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
155
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
156
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
157
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
158
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
159
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
160
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
161
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
162
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
163
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
164
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
165
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
166
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
167
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
168
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
169
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
170
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
171
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
87
diff --git a/target/arm/helper.c b/target/arm/helper.c
172
diff --git a/target/arm/helper.c b/target/arm/helper.c
88
index XXXXXXX..XXXXXXX 100644
173
index XXXXXXX..XXXXXXX 100644
89
--- a/target/arm/helper.c
174
--- a/target/arm/helper.c
90
+++ b/target/arm/helper.c
175
+++ b/target/arm/helper.c
176
@@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el)
177
return 0;
178
}
179
180
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
181
+static bool sme_fa64(CPUARMState *env, int el)
182
+{
183
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
184
+ return false;
185
+ }
186
+
187
+ if (el <= 1 && !el_is_in_host(env, el)) {
188
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
189
+ return false;
190
+ }
191
+ }
192
+ if (el <= 2 && arm_is_el2_enabled(env)) {
193
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
194
+ return false;
195
+ }
196
+ }
197
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
198
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ return true;
204
+}
205
+
206
/*
207
* Given that SVE is enabled, return the vector length for EL.
208
*/
91
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
209
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
92
DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1);
210
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
93
}
211
}
94
212
95
+ if (env->uncached_cpsr & CPSR_IL) {
213
+ /*
96
+ DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
214
+ * The SME exception we are testing for is raised via
215
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
216
+ * AArch32.CheckAdvSIMDOrFPEnabled().
217
+ */
218
+ if (el == 0
219
+ && FIELD_EX64(env->svcr, SVCR, SM)
220
+ && (!arm_is_el2_enabled(env)
221
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
222
+ && arm_el_is_aa64(env, 1)
223
+ && !sme_fa64(env, el)) {
224
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
97
+ }
225
+ }
98
+
226
+
99
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
227
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
100
}
228
}
101
229
102
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
230
@@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
103
}
231
}
104
}
232
if (FIELD_EX64(env->svcr, SVCR, SM)) {
105
233
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
106
+ if (env->pstate & PSTATE_IL) {
234
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
107
+ DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
235
}
108
+ }
236
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
109
+
237
}
110
if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
111
/*
112
* Set MTE_ACTIVE if any access may be Checked, and leave clear
113
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
238
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
114
index XXXXXXX..XXXXXXX 100644
239
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/translate-a64.c
240
--- a/target/arm/translate-a64.c
116
+++ b/target/arm/translate-a64.c
241
+++ b/target/arm/translate-a64.c
117
@@ -XXX,XX +XXX,XX @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
242
@@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
118
s->fp_access_checked = false;
243
* unallocated-encoding checks (otherwise the syndrome information
119
s->sve_access_checked = false;
244
* for the resulting exception will be incorrect).
120
245
*/
121
+ if (s->pstate_il) {
246
-static bool fp_access_check(DisasContext *s)
122
+ /*
247
+static bool fp_access_check_only(DisasContext *s)
123
+ * Illegal execution state. This has priority over BTI
248
{
124
+ * exceptions, but comes after instruction abort exceptions.
249
if (s->fp_excp_el) {
125
+ */
250
assert(!s->fp_access_checked);
251
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
252
return true;
253
}
254
255
+static bool fp_access_check(DisasContext *s)
256
+{
257
+ if (!fp_access_check_only(s)) {
258
+ return false;
259
+ }
260
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
126
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
261
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
127
+ syn_illegalstate(), default_exception_el(s));
262
+ syn_smetrap(SME_ET_Streaming, false));
128
+ return;
263
+ return false;
129
+ }
264
+ }
130
+
265
+ return true;
131
if (dc_isar_feature(aa64_bti, s)) {
266
+}
132
if (s->base.num_insns == 1) {
267
+
133
/*
268
/* Check that SVE access is enabled. If it is, return true.
269
* If not, emit code to generate an appropriate exception and return false.
270
*/
271
@@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
272
default:
273
g_assert_not_reached();
274
}
275
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
276
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
277
return;
278
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
279
return;
280
@@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
281
}
282
}
283
284
+/*
285
+ * Include the generated SME FA64 decoder.
286
+ */
287
+
288
+#include "decode-sme-fa64.c.inc"
289
+
290
+static bool trans_OK(DisasContext *s, arg_OK *a)
291
+{
292
+ return true;
293
+}
294
+
295
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
296
+{
297
+ s->is_nonstreaming = true;
298
+ return true;
299
+}
300
+
301
/**
302
* is_guarded_page:
303
* @env: The cpu environment
134
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
304
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
135
#endif
305
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
136
dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
306
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
137
dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
307
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
138
+ dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
308
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
139
dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
309
dc->vec_len = 0;
140
dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16;
310
dc->vec_stride = 0;
141
dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
311
dc->cp_regs = arm_cpu->cp_regs;
312
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
313
}
314
}
315
316
+ s->is_nonstreaming = false;
317
+ if (s->sme_trap_nonstreaming) {
318
+ disas_sme_fa64(s, insn);
319
+ }
320
+
321
switch (extract32(insn, 25, 4)) {
322
case 0x0:
323
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
324
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/target/arm/translate-vfp.c
327
+++ b/target/arm/translate-vfp.c
328
@@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
329
return false;
330
}
331
332
+ /*
333
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
334
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
335
+ * appear to be any insns which touch VFP which are allowed.
336
+ */
337
+ if (s->sme_trap_nonstreaming) {
338
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
339
+ syn_smetrap(SME_ET_Streaming,
340
+ s->base.pc_next - s->pc_curr == 2));
341
+ return false;
342
+ }
343
+
344
if (!s->vfp_enabled && !ignore_vfp_enabled) {
345
assert(!arm_dc_feature(s, ARM_FEATURE_M));
346
unallocated_encoding(s);
142
diff --git a/target/arm/translate.c b/target/arm/translate.c
347
diff --git a/target/arm/translate.c b/target/arm/translate.c
143
index XXXXXXX..XXXXXXX 100644
348
index XXXXXXX..XXXXXXX 100644
144
--- a/target/arm/translate.c
349
--- a/target/arm/translate.c
145
+++ b/target/arm/translate.c
350
+++ b/target/arm/translate.c
146
@@ -XXX,XX +XXX,XX @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
147
return;
148
}
149
150
+ if (s->pstate_il) {
151
+ /*
152
+ * Illegal execution state. This has priority over BTI
153
+ * exceptions, but comes after instruction abort exceptions.
154
+ */
155
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
156
+ syn_illegalstate(), default_exception_el(s));
157
+ return;
158
+ }
159
+
160
if (cond == 0xf) {
161
/* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
162
* choose to UNDEF. In ARMv5 and above the space is used
163
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
351
@@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
164
#endif
352
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
165
dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
353
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
166
dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
354
}
167
+ dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
355
+ dc->sme_trap_nonstreaming =
168
356
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
169
if (arm_feature(env, ARM_FEATURE_M)) {
357
}
170
dc->vfp_enabled = 1;
358
dc->cp_regs = cpu->cp_regs;
171
@@ -XXX,XX +XXX,XX @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
359
dc->features = env->features;
172
}
360
diff --git a/target/arm/meson.build b/target/arm/meson.build
173
dc->insn = insn;
361
index XXXXXXX..XXXXXXX 100644
174
362
--- a/target/arm/meson.build
175
+ if (dc->pstate_il) {
363
+++ b/target/arm/meson.build
176
+ /*
364
@@ -XXX,XX +XXX,XX @@
177
+ * Illegal execution state. This has priority over BTI
365
gen = [
178
+ * exceptions, but comes after instruction abort exceptions.
366
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
179
+ */
367
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
180
+ gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF,
368
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
181
+ syn_illegalstate(), default_exception_el(dc));
369
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
182
+ return;
370
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
183
+ }
371
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
184
+
185
if (dc->eci) {
186
/*
187
* For M-profile continuable instructions, ECI/ICI handling
188
--
372
--
189
2.20.1
373
2.25.1
190
191
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark ADR as a non-streaming instruction, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Removing entries from sme-fa64.decode is an easy way to see
7
what remains to be done.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-id: 20220708151540.18136-5-richard.henderson@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
target/arm/translate.h | 7 +++++++
15
target/arm/sme-fa64.decode | 1 -
16
target/arm/translate-sve.c | 8 ++++----
17
3 files changed, 11 insertions(+), 5 deletions(-)
18
19
diff --git a/target/arm/translate.h b/target/arm/translate.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/arm/translate.h
22
+++ b/target/arm/translate.h
23
@@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
24
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
25
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
26
27
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
28
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
29
+ { \
30
+ s->is_nonstreaming = true; \
31
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
32
+ }
33
+
34
#endif /* TARGET_ARM_TRANSLATE_H */
35
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/arm/sme-fa64.decode
38
+++ b/target/arm/sme-fa64.decode
39
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
40
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
41
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
42
43
-FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
44
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
45
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
46
FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
47
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-sve.c
50
+++ b/target/arm/translate-sve.c
51
@@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
52
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
53
}
54
55
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
56
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
57
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
58
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
59
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
60
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
61
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
62
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
63
64
/*
65
*** SVE Integer Misc - Unpredicated Group
66
--
67
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-6-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 9 ++++++---
13
2 files changed, 6 insertions(+), 5 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
21
FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
22
FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
23
-FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
24
-FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
25
FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
33
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
34
35
/* Note pat == 31 is #all, to set all elements. */
36
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
37
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
38
+ do_predset, 0, FFR_PRED_NUM, 31, false)
39
40
/* Note pat == 32 is #unimp, to set no elements. */
41
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
42
@@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
43
.rd = a->rd, .pg = a->pg, .s = a->s,
44
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
45
};
46
+
47
+ s->is_nonstreaming = true;
48
return trans_AND_pppp(s, &alt_a);
49
}
50
51
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
52
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
53
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
54
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
55
56
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
57
void (*gen_fn)(TCGv_i32, TCGv_ptr,
58
--
59
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-7-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 22 ++++++++++++----------
13
2 files changed, 12 insertions(+), 13 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
24
-FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
25
-FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
26
FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
27
FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
28
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
34
NULL, gen_helper_sve_fexpa_h,
35
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
36
};
37
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
38
- fexpa_fns[a->esz], a->rd, a->rn, 0)
39
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
40
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
41
42
static gen_helper_gvec_3 * const ftssel_fns[4] = {
43
NULL, gen_helper_sve_ftssel_h,
44
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
45
};
46
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
47
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
48
+ ftssel_fns[a->esz], a, 0)
49
50
/*
51
*** SVE Predicate Logical Operations Group
52
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
53
static gen_helper_gvec_3 * const compact_fns[4] = {
54
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
55
};
56
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
57
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
58
+ compact_fns[a->esz], a, 0)
59
60
/* Call the helper that computes the ARM LastActiveElement pseudocode
61
* function, scaled by the element size. This includes the not found
62
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = {
63
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
64
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
65
};
66
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
67
- bext_fns[a->esz], a, 0)
68
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
69
+ bext_fns[a->esz], a, 0)
70
71
static gen_helper_gvec_3 * const bdep_fns[4] = {
72
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
73
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
74
};
75
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
76
- bdep_fns[a->esz], a, 0)
77
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
78
+ bdep_fns[a->esz], a, 0)
79
80
static gen_helper_gvec_3 * const bgrp_fns[4] = {
81
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
82
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
83
};
84
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
85
- bgrp_fns[a->esz], a, 0)
86
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
87
+ bgrp_fns[a->esz], a, 0)
88
89
static gen_helper_gvec_3 * const cadd_fns[4] = {
90
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
91
--
92
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-8-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 24 +++++++++++++++---------
13
2 files changed, 15 insertions(+), 11 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
24
-FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
25
FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
26
FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
27
FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
28
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/translate-sve.c
31
+++ b/target/arm/translate-sve.c
32
@@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
33
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
34
NULL, gen_helper_sve2_pmull_d,
35
};
36
- if (a->esz == 0
37
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
38
- : !dc_isar_feature(aa64_sve, s)) {
39
+
40
+ if (a->esz == 0) {
41
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
42
+ return false;
43
+ }
44
+ s->is_nonstreaming = true;
45
+ } else if (!dc_isar_feature(aa64_sve, s)) {
46
return false;
47
}
48
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
49
@@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
50
* SVE Integer Multiply-Add (unpredicated)
51
*/
52
53
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
54
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
55
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
56
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
57
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
58
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
59
+ 0, FPST_FPCR)
60
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
61
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
62
+ 0, FPST_FPCR)
63
64
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
65
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
66
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
67
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
68
gen_helper_gvec_bfdot_idx, a)
69
70
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
71
- gen_helper_gvec_bfmmla, a, 0)
72
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
73
+ gen_helper_gvec_bfmmla, a, 0)
74
75
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
76
{
77
--
78
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-9-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 15 +++++++++++----
13
2 files changed, 11 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
24
-FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
25
-FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
26
FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
27
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
28
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
29
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/arm/translate-sve.c
32
+++ b/target/arm/translate-sve.c
33
@@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
34
NULL, gen_helper_sve_ftmad_h,
35
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
36
};
37
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
38
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
39
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
40
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
41
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
42
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
43
44
/*
45
*** SVE Floating Point Accumulating Reduction Group
46
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
47
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
48
return false;
49
}
50
+ s->is_nonstreaming = true;
51
if (!sve_access_check(s)) {
52
return true;
53
}
54
@@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
55
DO_FP3(FADD_zzz, fadd)
56
DO_FP3(FSUB_zzz, fsub)
57
DO_FP3(FMUL_zzz, fmul)
58
-DO_FP3(FTSMUL, ftsmul)
59
DO_FP3(FRECPS, recps)
60
DO_FP3(FRSQRTS, rsqrts)
61
62
#undef DO_FP3
63
64
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
65
+ NULL, gen_helper_gvec_ftsmul_h,
66
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
67
+};
68
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
69
+ ftsmul_fns[a->esz], a, 0)
70
+
71
/*
72
*** SVE Floating Point Arithmetic - Predicated Group
73
*/
74
--
75
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-10-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 12 ++++++------
13
2 files changed, 6 insertions(+), 7 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
24
FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
25
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
26
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
32
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
33
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
34
35
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
36
- gen_helper_gvec_smmla_b, a, 0)
37
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
38
- gen_helper_gvec_usmmla_b, a, 0)
39
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
40
- gen_helper_gvec_ummla_b, a, 0)
41
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
42
+ gen_helper_gvec_smmla_b, a, 0)
43
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
44
+ gen_helper_gvec_usmmla_b, a, 0)
45
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
46
+ gen_helper_gvec_ummla_b, a, 0)
47
48
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
49
gen_helper_gvec_bfdot, a, 0)
50
--
51
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-11-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 1 -
12
target/arm/translate-sve.c | 35 ++++++++++++++++++-----------------
13
2 files changed, 18 insertions(+), 18 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
24
FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
25
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
32
static gen_helper_gvec_flags_4 * const match_fns[4] = {
33
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
34
};
35
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
36
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
37
38
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
39
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
40
};
41
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
42
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
43
44
static gen_helper_gvec_4 * const histcnt_fns[4] = {
45
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
46
};
47
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
48
- histcnt_fns[a->esz], a, 0)
49
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
50
+ histcnt_fns[a->esz], a, 0)
51
52
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
53
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
54
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
55
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
56
57
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
58
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
59
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
60
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
61
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
62
63
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
64
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
65
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
66
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
67
68
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
69
- gen_helper_crypto_aese, a, false)
70
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
71
- gen_helper_crypto_aese, a, true)
72
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
73
+ gen_helper_crypto_aese, a, false)
74
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
75
+ gen_helper_crypto_aese, a, true)
76
77
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
78
- gen_helper_crypto_sm4e, a, 0)
79
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
80
- gen_helper_crypto_sm4ekey, a, 0)
81
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
82
+ gen_helper_crypto_sm4e, a, 0)
83
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
84
+ gen_helper_crypto_sm4ekey, a, 0)
85
86
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
87
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
88
+ gen_gvec_rax1, a)
89
90
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
91
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
92
--
93
2.25.1
diff view generated by jsdifflib
1
From: Marc Zyngier <maz@kernel.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Although we probe for the IPA limits imposed by KVM (and the hardware)
3
Mark these as a non-streaming instructions, which should trap
4
when computing the memory map, we still use the old style '0' when
4
if full a64 support is not enabled in streaming mode.
5
creating a scratch VM in kvm_arm_create_scratch_host_vcpu().
6
5
7
On systems that are severely IPA challenged (such as the Apple M1),
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
this results in a failure as KVM cannot use the default 40bit that
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
'0' represents.
8
Message-id: 20220708151540.18136-12-richard.henderson@linaro.org
10
11
Instead, probe for the extension and use the reported IPA limit
12
if available.
13
14
Cc: Andrew Jones <drjones@redhat.com>
15
Cc: Eric Auger <eric.auger@redhat.com>
16
Cc: Peter Maydell <peter.maydell@linaro.org>
17
Signed-off-by: Marc Zyngier <maz@kernel.org>
18
Reviewed-by: Andrew Jones <drjones@redhat.com>
19
Message-id: 20210822144441.1290891-2-maz@kernel.org
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
---
10
---
22
target/arm/kvm.c | 7 ++++++-
11
target/arm/sme-fa64.decode | 9 ---------
23
1 file changed, 6 insertions(+), 1 deletion(-)
12
target/arm/translate-sve.c | 6 ++++++
13
2 files changed, 6 insertions(+), 9 deletions(-)
24
14
25
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
26
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/kvm.c
17
--- a/target/arm/sme-fa64.decode
28
+++ b/target/arm/kvm.c
18
+++ b/target/arm/sme-fa64.decode
29
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
30
struct kvm_vcpu_init *init)
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
31
{
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
32
int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
22
33
+ int max_vm_pa_size;
23
-FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
34
24
FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
35
kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
25
FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
36
if (kvmfd < 0) {
26
-FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
37
goto err;
27
-FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
28
-FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
29
-FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
30
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
31
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
32
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
33
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
34
FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
35
-FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
36
-FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
37
-FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
38
-FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
39
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/target/arm/translate-sve.c
42
+++ b/target/arm/translate-sve.c
43
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
44
if (!dc_isar_feature(aa64_sve, s)) {
45
return false;
38
}
46
}
39
- vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
47
+ s->is_nonstreaming = true;
40
+ max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE);
48
if (!sve_access_check(s)) {
41
+ if (max_vm_pa_size < 0) {
49
return true;
42
+ max_vm_pa_size = 0;
50
}
43
+ }
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
44
+ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size);
52
if (!dc_isar_feature(aa64_sve, s)) {
45
if (vmfd < 0) {
53
return false;
46
goto err;
54
}
55
+ s->is_nonstreaming = true;
56
if (!sve_access_check(s)) {
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
60
if (!dc_isar_feature(aa64_sve2, s)) {
61
return false;
62
}
63
+ s->is_nonstreaming = true;
64
if (!sve_access_check(s)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
68
if (!dc_isar_feature(aa64_sve, s)) {
69
return false;
70
}
71
+ s->is_nonstreaming = true;
72
if (!sve_access_check(s)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
76
if (!dc_isar_feature(aa64_sve, s)) {
77
return false;
78
}
79
+ s->is_nonstreaming = true;
80
if (!sve_access_check(s)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
84
if (!dc_isar_feature(aa64_sve2, s)) {
85
return false;
86
}
87
+ s->is_nonstreaming = true;
88
if (!sve_access_check(s)) {
89
return true;
47
}
90
}
48
--
91
--
49
2.20.1
92
2.25.1
50
51
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap if full
4
a64 support is not enabled in streaming mode. In this case, introduce
5
PRF_ns (prefetch non-streaming) to handle the checks.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-13-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
target/arm/sme-fa64.decode | 3 ---
13
target/arm/sve.decode | 10 +++++-----
14
target/arm/translate-sve.c | 11 +++++++++++
15
3 files changed, 16 insertions(+), 8 deletions(-)
16
17
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/sme-fa64.decode
20
+++ b/target/arm/sme-fa64.decode
21
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
22
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
23
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
24
25
-FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
26
-FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
27
FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
28
FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
29
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
30
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
31
-FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
32
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/arm/sve.decode
35
+++ b/target/arm/sve.decode
36
@@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
37
@rpri_load_msz nreg=0
38
39
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
40
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
41
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
42
43
# SVE 32-bit gather prefetch (vector plus immediate)
44
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
45
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
46
47
# SVE contiguous prefetch (scalar plus immediate)
48
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
49
@@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
50
@rpri_g_load esz=3
51
52
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
53
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
54
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
55
56
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
57
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
58
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
59
60
# SVE 64-bit gather prefetch (vector plus immediate)
61
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
62
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
63
64
### SVE Memory Store Group
65
66
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/arm/translate-sve.c
69
+++ b/target/arm/translate-sve.c
70
@@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
71
return true;
72
}
73
74
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
75
+{
76
+ if (!dc_isar_feature(aa64_sve, s)) {
77
+ return false;
78
+ }
79
+ /* Prefetch is a nop within QEMU. */
80
+ s->is_nonstreaming = true;
81
+ (void)sve_access_check(s);
82
+ return true;
83
+}
84
+
85
/*
86
* Move Prefix
87
*
88
--
89
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-14-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 2 --
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 2 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
21
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
22
23
-FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
24
-FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
25
FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
26
FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/translate-sve.c
30
+++ b/target/arm/translate-sve.c
31
@@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
32
if (!dc_isar_feature(aa64_sve, s)) {
33
return false;
34
}
35
+ s->is_nonstreaming = true;
36
if (sve_access_check(s)) {
37
TCGv_i64 addr = new_tmp_a64(s);
38
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
39
@@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
40
if (!dc_isar_feature(aa64_sve, s)) {
41
return false;
42
}
43
+ s->is_nonstreaming = true;
44
if (sve_access_check(s)) {
45
int vsz = vec_full_reg_size(s);
46
int elements = vsz >> dtype_esz[a->dtype];
47
--
48
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Mark these as a non-streaming instructions, which should trap
4
if full a64 support is not enabled in streaming mode.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-15-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/sme-fa64.decode | 3 ---
12
target/arm/translate-sve.c | 2 ++
13
2 files changed, 2 insertions(+), 3 deletions(-)
14
15
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/sme-fa64.decode
18
+++ b/target/arm/sme-fa64.decode
19
@@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
20
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
21
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
22
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
23
-
24
-FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
25
-FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
26
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/arm/translate-sve.c
29
+++ b/target/arm/translate-sve.c
30
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
31
if (a->rm == 31) {
32
return false;
33
}
34
+ s->is_nonstreaming = true;
35
if (sve_access_check(s)) {
36
TCGv_i64 addr = new_tmp_a64(s);
37
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
38
@@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
39
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
40
return false;
41
}
42
+ s->is_nonstreaming = true;
43
if (sve_access_check(s)) {
44
TCGv_i64 addr = new_tmp_a64(s);
45
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
46
--
47
2.25.1
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Added functionality to trigger ITS command queue processing on
3
These functions will be used to verify that the cpu
4
write to CWRITE register and process each command queue entry to
4
is in the correct state for a given instruction.
5
identify the command type and handle commands like MAPD,MAPC,SYNC.
6
5
7
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
8
Message-id: 20220708151540.18136-16-richard.henderson@linaro.org
11
Message-id: 20210910143951.92242-4-shashi.mallela@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
10
---
14
hw/intc/gicv3_internal.h | 40 +++++
11
target/arm/translate-a64.h | 21 +++++++++++++++++++++
15
hw/intc/arm_gicv3_its.c | 319 +++++++++++++++++++++++++++++++++++++++
12
target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++
16
2 files changed, 359 insertions(+)
13
2 files changed, 55 insertions(+)
17
14
18
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/intc/gicv3_internal.h
17
--- a/target/arm/translate-a64.h
21
+++ b/hw/intc/gicv3_internal.h
18
+++ b/target/arm/translate-a64.h
22
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1)
19
@@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
23
#define L1TABLE_ENTRY_SIZE 8
20
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
24
21
unsigned int imms, unsigned int immr);
25
#define GITS_CMDQ_ENTRY_SIZE 32
22
bool sve_access_check(DisasContext *s);
26
+#define NUM_BYTES_IN_DW 8
23
+bool sme_enabled_check(DisasContext *s);
24
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
27
+
25
+
28
+#define CMD_MASK 0xff
26
+/* This function corresponds to CheckStreamingSVEEnabled. */
29
+
27
+static inline bool sme_sm_enabled_check(DisasContext *s)
30
+/* ITS Commands */
31
+#define GITS_CMD_CLEAR 0x04
32
+#define GITS_CMD_DISCARD 0x0F
33
+#define GITS_CMD_INT 0x03
34
+#define GITS_CMD_MAPC 0x09
35
+#define GITS_CMD_MAPD 0x08
36
+#define GITS_CMD_MAPI 0x0B
37
+#define GITS_CMD_MAPTI 0x0A
38
+#define GITS_CMD_INV 0x0C
39
+#define GITS_CMD_INVALL 0x0D
40
+#define GITS_CMD_SYNC 0x05
41
+
42
+/* MAPC command fields */
43
+#define ICID_LENGTH 16
44
+#define ICID_MASK ((1U << ICID_LENGTH) - 1)
45
+FIELD(MAPC, RDBASE, 16, 32)
46
+
47
+#define RDBASE_PROCNUM_LENGTH 16
48
+#define RDBASE_PROCNUM_MASK ((1ULL << RDBASE_PROCNUM_LENGTH) - 1)
49
+
50
+/* MAPD command fields */
51
+#define ITTADDR_LENGTH 44
52
+#define ITTADDR_SHIFT 8
53
+#define ITTADDR_MASK MAKE_64BIT_MASK(ITTADDR_SHIFT, ITTADDR_LENGTH)
54
+#define SIZE_MASK 0x1f
55
+
56
+#define DEVID_SHIFT 32
57
+#define DEVID_MASK MAKE_64BIT_MASK(32, 32)
58
+
59
+#define VALID_SHIFT 63
60
+#define CMD_FIELD_VALID_MASK (1ULL << VALID_SHIFT)
61
+#define L2_TABLE_VALID_MASK CMD_FIELD_VALID_MASK
62
+#define TABLE_ENTRY_VALID_MASK (1ULL << 0)
63
64
/**
65
* Default features advertised by this version of ITS
66
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1)
67
* Valid = 1 bit,ITTAddr = 44 bits,Size = 5 bits
68
*/
69
#define GITS_DTE_SIZE (0x8ULL)
70
+#define GITS_DTE_ITTADDR_SHIFT 6
71
+#define GITS_DTE_ITTADDR_MASK MAKE_64BIT_MASK(GITS_DTE_ITTADDR_SHIFT, \
72
+ ITTADDR_LENGTH)
73
74
/*
75
* 8 bytes Collection Table Entry size
76
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/intc/arm_gicv3_its.c
79
+++ b/hw/intc/arm_gicv3_its.c
80
@@ -XXX,XX +XXX,XX @@ static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
81
return result;
82
}
83
84
+static bool update_cte(GICv3ITSState *s, uint16_t icid, bool valid,
85
+ uint64_t rdbase)
86
+{
28
+{
87
+ AddressSpace *as = &s->gicv3->dma_as;
29
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
88
+ uint64_t value;
89
+ uint64_t l2t_addr;
90
+ bool valid_l2t;
91
+ uint32_t l2t_id;
92
+ uint32_t max_l2_entries;
93
+ uint64_t cte = 0;
94
+ MemTxResult res = MEMTX_OK;
95
+
96
+ if (!s->ct.valid) {
97
+ return true;
98
+ }
99
+
100
+ if (valid) {
101
+ /* add mapping entry to collection table */
102
+ cte = (valid & TABLE_ENTRY_VALID_MASK) | (rdbase << 1ULL);
103
+ }
104
+
105
+ /*
106
+ * The specification defines the format of level 1 entries of a
107
+ * 2-level table, but the format of level 2 entries and the format
108
+ * of flat-mapped tables is IMPDEF.
109
+ */
110
+ if (s->ct.indirect) {
111
+ l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
112
+
113
+ value = address_space_ldq_le(as,
114
+ s->ct.base_addr +
115
+ (l2t_id * L1TABLE_ENTRY_SIZE),
116
+ MEMTXATTRS_UNSPECIFIED, &res);
117
+
118
+ if (res != MEMTX_OK) {
119
+ return false;
120
+ }
121
+
122
+ valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
123
+
124
+ if (valid_l2t) {
125
+ max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
126
+
127
+ l2t_addr = value & ((1ULL << 51) - 1);
128
+
129
+ address_space_stq_le(as, l2t_addr +
130
+ ((icid % max_l2_entries) * GITS_CTE_SIZE),
131
+ cte, MEMTXATTRS_UNSPECIFIED, &res);
132
+ }
133
+ } else {
134
+ /* Flat level table */
135
+ address_space_stq_le(as, s->ct.base_addr + (icid * GITS_CTE_SIZE),
136
+ cte, MEMTXATTRS_UNSPECIFIED, &res);
137
+ }
138
+ if (res != MEMTX_OK) {
139
+ return false;
140
+ } else {
141
+ return true;
142
+ }
143
+}
30
+}
144
+
31
+
145
+static bool process_mapc(GICv3ITSState *s, uint32_t offset)
32
+/* This function corresponds to CheckSMEAndZAEnabled. */
33
+static inline bool sme_za_enabled_check(DisasContext *s)
146
+{
34
+{
147
+ AddressSpace *as = &s->gicv3->dma_as;
35
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
148
+ uint16_t icid;
149
+ uint64_t rdbase;
150
+ bool valid;
151
+ MemTxResult res = MEMTX_OK;
152
+ bool result = false;
153
+ uint64_t value;
154
+
155
+ offset += NUM_BYTES_IN_DW;
156
+ offset += NUM_BYTES_IN_DW;
157
+
158
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
159
+ MEMTXATTRS_UNSPECIFIED, &res);
160
+
161
+ if (res != MEMTX_OK) {
162
+ return result;
163
+ }
164
+
165
+ icid = value & ICID_MASK;
166
+
167
+ rdbase = (value & R_MAPC_RDBASE_MASK) >> R_MAPC_RDBASE_SHIFT;
168
+ rdbase &= RDBASE_PROCNUM_MASK;
169
+
170
+ valid = (value & CMD_FIELD_VALID_MASK);
171
+
172
+ if ((icid > s->ct.maxids.max_collids) || (rdbase > s->gicv3->num_cpu)) {
173
+ qemu_log_mask(LOG_GUEST_ERROR,
174
+ "ITS MAPC: invalid collection table attributes "
175
+ "icid %d rdbase %lu\n", icid, rdbase);
176
+ /*
177
+ * in this implementation, in case of error
178
+ * we ignore this command and move onto the next
179
+ * command in the queue
180
+ */
181
+ } else {
182
+ result = update_cte(s, icid, valid, rdbase);
183
+ }
184
+
185
+ return result;
186
+}
36
+}
187
+
37
+
188
+static bool update_dte(GICv3ITSState *s, uint32_t devid, bool valid,
38
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
189
+ uint8_t size, uint64_t itt_addr)
39
+static inline bool sme_smza_enabled_check(DisasContext *s)
190
+{
40
+{
191
+ AddressSpace *as = &s->gicv3->dma_as;
41
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
192
+ uint64_t value;
193
+ uint64_t l2t_addr;
194
+ bool valid_l2t;
195
+ uint32_t l2t_id;
196
+ uint32_t max_l2_entries;
197
+ uint64_t dte = 0;
198
+ MemTxResult res = MEMTX_OK;
199
+
200
+ if (s->dt.valid) {
201
+ if (valid) {
202
+ /* add mapping entry to device table */
203
+ dte = (valid & TABLE_ENTRY_VALID_MASK) |
204
+ ((size & SIZE_MASK) << 1U) |
205
+ (itt_addr << GITS_DTE_ITTADDR_SHIFT);
206
+ }
207
+ } else {
208
+ return true;
209
+ }
210
+
211
+ /*
212
+ * The specification defines the format of level 1 entries of a
213
+ * 2-level table, but the format of level 2 entries and the format
214
+ * of flat-mapped tables is IMPDEF.
215
+ */
216
+ if (s->dt.indirect) {
217
+ l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE);
218
+
219
+ value = address_space_ldq_le(as,
220
+ s->dt.base_addr +
221
+ (l2t_id * L1TABLE_ENTRY_SIZE),
222
+ MEMTXATTRS_UNSPECIFIED, &res);
223
+
224
+ if (res != MEMTX_OK) {
225
+ return false;
226
+ }
227
+
228
+ valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
229
+
230
+ if (valid_l2t) {
231
+ max_l2_entries = s->dt.page_sz / s->dt.entry_sz;
232
+
233
+ l2t_addr = value & ((1ULL << 51) - 1);
234
+
235
+ address_space_stq_le(as, l2t_addr +
236
+ ((devid % max_l2_entries) * GITS_DTE_SIZE),
237
+ dte, MEMTXATTRS_UNSPECIFIED, &res);
238
+ }
239
+ } else {
240
+ /* Flat level table */
241
+ address_space_stq_le(as, s->dt.base_addr + (devid * GITS_DTE_SIZE),
242
+ dte, MEMTXATTRS_UNSPECIFIED, &res);
243
+ }
244
+ if (res != MEMTX_OK) {
245
+ return false;
246
+ } else {
247
+ return true;
248
+ }
249
+}
42
+}
250
+
43
+
251
+static bool process_mapd(GICv3ITSState *s, uint64_t value, uint32_t offset)
44
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
45
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
46
bool tag_checked, int log2_size);
47
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/target/arm/translate-a64.c
50
+++ b/target/arm/translate-a64.c
51
@@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s)
52
return true;
53
}
54
55
+/* This function corresponds to CheckSMEEnabled. */
56
+bool sme_enabled_check(DisasContext *s)
252
+{
57
+{
253
+ AddressSpace *as = &s->gicv3->dma_as;
58
+ /*
254
+ uint32_t devid;
59
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
255
+ uint8_t size;
60
+ * to be zero when fp_excp_el has priority. This is because we need
256
+ uint64_t itt_addr;
61
+ * sme_excp_el by itself for cpregs access checks.
257
+ bool valid;
62
+ */
258
+ MemTxResult res = MEMTX_OK;
63
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
259
+ bool result = false;
64
+ s->fp_access_checked = true;
260
+
65
+ return sme_access_check(s);
261
+ devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
262
+
263
+ offset += NUM_BYTES_IN_DW;
264
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
265
+ MEMTXATTRS_UNSPECIFIED, &res);
266
+
267
+ if (res != MEMTX_OK) {
268
+ return result;
269
+ }
66
+ }
270
+
67
+ return fp_access_check_only(s);
271
+ size = (value & SIZE_MASK);
272
+
273
+ offset += NUM_BYTES_IN_DW;
274
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
275
+ MEMTXATTRS_UNSPECIFIED, &res);
276
+
277
+ if (res != MEMTX_OK) {
278
+ return result;
279
+ }
280
+
281
+ itt_addr = (value & ITTADDR_MASK) >> ITTADDR_SHIFT;
282
+
283
+ valid = (value & CMD_FIELD_VALID_MASK);
284
+
285
+ if ((devid > s->dt.maxids.max_devids) ||
286
+ (size > FIELD_EX64(s->typer, GITS_TYPER, IDBITS))) {
287
+ qemu_log_mask(LOG_GUEST_ERROR,
288
+ "ITS MAPD: invalid device table attributes "
289
+ "devid %d or size %d\n", devid, size);
290
+ /*
291
+ * in this implementation, in case of error
292
+ * we ignore this command and move onto the next
293
+ * command in the queue
294
+ */
295
+ } else {
296
+ result = update_dte(s, devid, valid, size, itt_addr);
297
+ }
298
+
299
+ return result;
300
+}
68
+}
301
+
69
+
302
+/*
70
+/* Common subroutine for CheckSMEAnd*Enabled. */
303
+ * Current implementation blocks until all
71
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
304
+ * commands are processed
305
+ */
306
+static void process_cmdq(GICv3ITSState *s)
307
+{
72
+{
308
+ uint32_t wr_offset = 0;
73
+ if (!sme_enabled_check(s)) {
309
+ uint32_t rd_offset = 0;
74
+ return false;
310
+ uint32_t cq_offset = 0;
311
+ uint64_t data;
312
+ AddressSpace *as = &s->gicv3->dma_as;
313
+ MemTxResult res = MEMTX_OK;
314
+ bool result = true;
315
+ uint8_t cmd;
316
+
317
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
318
+ return;
319
+ }
75
+ }
320
+
76
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
321
+ wr_offset = FIELD_EX64(s->cwriter, GITS_CWRITER, OFFSET);
77
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
322
+
78
+ syn_smetrap(SME_ET_NotStreaming, false));
323
+ if (wr_offset > s->cq.max_entries) {
79
+ return false;
324
+ qemu_log_mask(LOG_GUEST_ERROR,
325
+ "%s: invalid write offset "
326
+ "%d\n", __func__, wr_offset);
327
+ return;
328
+ }
80
+ }
329
+
81
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
330
+ rd_offset = FIELD_EX64(s->creadr, GITS_CREADR, OFFSET);
82
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
331
+
83
+ syn_smetrap(SME_ET_InactiveZA, false));
332
+ if (rd_offset > s->cq.max_entries) {
84
+ return false;
333
+ qemu_log_mask(LOG_GUEST_ERROR,
334
+ "%s: invalid read offset "
335
+ "%d\n", __func__, rd_offset);
336
+ return;
337
+ }
85
+ }
338
+
86
+ return true;
339
+ while (wr_offset != rd_offset) {
340
+ cq_offset = (rd_offset * GITS_CMDQ_ENTRY_SIZE);
341
+ data = address_space_ldq_le(as, s->cq.base_addr + cq_offset,
342
+ MEMTXATTRS_UNSPECIFIED, &res);
343
+ if (res != MEMTX_OK) {
344
+ result = false;
345
+ }
346
+ cmd = (data & CMD_MASK);
347
+
348
+ switch (cmd) {
349
+ case GITS_CMD_INT:
350
+ break;
351
+ case GITS_CMD_CLEAR:
352
+ break;
353
+ case GITS_CMD_SYNC:
354
+ /*
355
+ * Current implementation makes a blocking synchronous call
356
+ * for every command issued earlier, hence the internal state
357
+ * is already consistent by the time SYNC command is executed.
358
+ * Hence no further processing is required for SYNC command.
359
+ */
360
+ break;
361
+ case GITS_CMD_MAPD:
362
+ result = process_mapd(s, data, cq_offset);
363
+ break;
364
+ case GITS_CMD_MAPC:
365
+ result = process_mapc(s, cq_offset);
366
+ break;
367
+ case GITS_CMD_MAPTI:
368
+ break;
369
+ case GITS_CMD_MAPI:
370
+ break;
371
+ case GITS_CMD_DISCARD:
372
+ break;
373
+ case GITS_CMD_INV:
374
+ case GITS_CMD_INVALL:
375
+ break;
376
+ default:
377
+ break;
378
+ }
379
+ if (result) {
380
+ rd_offset++;
381
+ rd_offset %= s->cq.max_entries;
382
+ s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, OFFSET, rd_offset);
383
+ } else {
384
+ /*
385
+ * in this implementation, in case of dma read/write error
386
+ * we stall the command processing
387
+ */
388
+ s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, STALLED, 1);
389
+ qemu_log_mask(LOG_GUEST_ERROR,
390
+ "%s: %x cmd processing failed\n", __func__, cmd);
391
+ break;
392
+ }
393
+ }
394
+}
87
+}
395
+
88
+
396
/*
89
/*
397
* This function extracts the ITS Device and Collection table specific
90
* This utility function is for doing register extension with an
398
* parameters (like base_addr, size etc) from GITS_BASER register.
91
* optional shift. You will likely want to pass a temporary for the
399
@@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset,
400
extract_table_params(s);
401
extract_cmdq_params(s);
402
s->creadr = 0;
403
+ process_cmdq(s);
404
}
405
break;
406
case GITS_CBASER:
407
@@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset,
408
case GITS_CWRITER:
409
s->cwriter = deposit64(s->cwriter, 0, 32,
410
(value & ~R_GITS_CWRITER_RETRY_MASK));
411
+ if (s->cwriter != s->creadr) {
412
+ process_cmdq(s);
413
+ }
414
break;
415
case GITS_CWRITER + 4:
416
s->cwriter = deposit64(s->cwriter, 32, 32, value);
417
@@ -XXX,XX +XXX,XX @@ static bool its_writell(GICv3ITSState *s, hwaddr offset,
418
break;
419
case GITS_CWRITER:
420
s->cwriter = value & ~R_GITS_CWRITER_RETRY_MASK;
421
+ if (s->cwriter != s->creadr) {
422
+ process_cmdq(s);
423
+ }
424
break;
425
case GITS_CREADR:
426
if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
427
--
92
--
428
2.20.1
93
2.25.1
429
430
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
It is confusing to have different exits from translation
3
The pseudocode for CheckSVEEnabled gains a check for Streaming
4
for various conditions in separate functions.
4
SVE mode, and for SME present but SVE absent.
5
6
Merge disas_a64_insn into its only caller. Standardize
7
on the "s" name for the DisasContext, as the code from
8
disas_a64_insn had more instances.
9
5
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20210821195958.41312-3-richard.henderson@linaro.org
8
Message-id: 20220708151540.18136-17-richard.henderson@linaro.org
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
10
---
15
target/arm/translate-a64.c | 224 ++++++++++++++++++-------------------
11
target/arm/translate-a64.c | 22 ++++++++++++++++------
16
1 file changed, 109 insertions(+), 115 deletions(-)
12
1 file changed, 16 insertions(+), 6 deletions(-)
17
13
18
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
14
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/target/arm/translate-a64.c
16
--- a/target/arm/translate-a64.c
21
+++ b/target/arm/translate-a64.c
17
+++ b/target/arm/translate-a64.c
22
@@ -XXX,XX +XXX,XX @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
18
@@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s)
23
return false;
19
return true;
24
}
20
}
25
21
26
-/* C3.1 A64 instruction index by encoding */
22
-/* Check that SVE access is enabled. If it is, return true.
27
-static void disas_a64_insn(CPUARMState *env, DisasContext *s)
23
+/*
28
-{
24
+ * Check that SVE access is enabled. If it is, return true.
29
- uint32_t insn;
25
* If not, emit code to generate an appropriate exception and return false.
26
+ * This function corresponds to CheckSVEEnabled().
27
*/
28
bool sve_access_check(DisasContext *s)
29
{
30
- if (s->sve_excp_el) {
31
- assert(!s->sve_access_checked);
32
- s->sve_access_checked = true;
30
-
33
-
31
- s->pc_curr = s->base.pc_next;
34
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
32
- insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
35
+ assert(dc_isar_feature(aa64_sme, s));
33
- s->insn = insn;
36
+ if (!sme_sm_enabled_check(s)) {
34
- s->base.pc_next += 4;
37
+ goto fail_exit;
35
-
38
+ }
36
- s->fp_access_checked = false;
39
+ } else if (s->sve_excp_el) {
37
- s->sve_access_checked = false;
40
gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
38
-
41
syn_sve_access_trap(), s->sve_excp_el);
39
- if (s->pstate_il) {
42
- return false;
40
- /*
43
+ goto fail_exit;
41
- * Illegal execution state. This has priority over BTI
42
- * exceptions, but comes after instruction abort exceptions.
43
- */
44
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
45
- syn_illegalstate(), default_exception_el(s));
46
- return;
47
- }
48
-
49
- if (dc_isar_feature(aa64_bti, s)) {
50
- if (s->base.num_insns == 1) {
51
- /*
52
- * At the first insn of the TB, compute s->guarded_page.
53
- * We delayed computing this until successfully reading
54
- * the first insn of the TB, above. This (mostly) ensures
55
- * that the softmmu tlb entry has been populated, and the
56
- * page table GP bit is available.
57
- *
58
- * Note that we need to compute this even if btype == 0,
59
- * because this value is used for BR instructions later
60
- * where ENV is not available.
61
- */
62
- s->guarded_page = is_guarded_page(env, s);
63
-
64
- /* First insn can have btype set to non-zero. */
65
- tcg_debug_assert(s->btype >= 0);
66
-
67
- /*
68
- * Note that the Branch Target Exception has fairly high
69
- * priority -- below debugging exceptions but above most
70
- * everything else. This allows us to handle this now
71
- * instead of waiting until the insn is otherwise decoded.
72
- */
73
- if (s->btype != 0
74
- && s->guarded_page
75
- && !btype_destination_ok(insn, s->bt, s->btype)) {
76
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
77
- syn_btitrap(s->btype),
78
- default_exception_el(s));
79
- return;
80
- }
81
- } else {
82
- /* Not the first insn: btype must be 0. */
83
- tcg_debug_assert(s->btype == 0);
84
- }
85
- }
86
-
87
- switch (extract32(insn, 25, 4)) {
88
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
89
- unallocated_encoding(s);
90
- break;
91
- case 0x2:
92
- if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
93
- unallocated_encoding(s);
94
- }
95
- break;
96
- case 0x8: case 0x9: /* Data processing - immediate */
97
- disas_data_proc_imm(s, insn);
98
- break;
99
- case 0xa: case 0xb: /* Branch, exception generation and system insns */
100
- disas_b_exc_sys(s, insn);
101
- break;
102
- case 0x4:
103
- case 0x6:
104
- case 0xc:
105
- case 0xe: /* Loads and stores */
106
- disas_ldst(s, insn);
107
- break;
108
- case 0x5:
109
- case 0xd: /* Data processing - register */
110
- disas_data_proc_reg(s, insn);
111
- break;
112
- case 0x7:
113
- case 0xf: /* Data processing - SIMD and floating point */
114
- disas_data_proc_simd_fp(s, insn);
115
- break;
116
- default:
117
- assert(FALSE); /* all 15 cases should be handled above */
118
- break;
119
- }
120
-
121
- /* if we allocated any temporaries, free them here */
122
- free_tmp_a64(s);
123
-
124
- /*
125
- * After execution of most insns, btype is reset to 0.
126
- * Note that we set btype == -1 when the insn sets btype.
127
- */
128
- if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
129
- reset_btype(s);
130
- }
131
-}
132
-
133
static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
134
CPUState *cpu)
135
{
136
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
137
138
static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
139
{
140
- DisasContext *dc = container_of(dcbase, DisasContext, base);
141
+ DisasContext *s = container_of(dcbase, DisasContext, base);
142
CPUARMState *env = cpu->env_ptr;
143
+ uint32_t insn;
144
145
- if (dc->ss_active && !dc->pstate_ss) {
146
+ if (s->ss_active && !s->pstate_ss) {
147
/* Singlestep state is Active-pending.
148
* If we're in this state at the start of a TB then either
149
* a) we just took an exception to an EL which is being debugged
150
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
151
* "did not step an insn" case, and so the syndrome ISV and EX
152
* bits should be zero.
153
*/
154
- assert(dc->base.num_insns == 1);
155
- gen_swstep_exception(dc, 0, 0);
156
- dc->base.is_jmp = DISAS_NORETURN;
157
- } else {
158
- disas_a64_insn(env, dc);
159
+ assert(s->base.num_insns == 1);
160
+ gen_swstep_exception(s, 0, 0);
161
+ s->base.is_jmp = DISAS_NORETURN;
162
+ return;
163
}
44
}
164
45
s->sve_access_checked = true;
165
- translator_loop_temp_check(&dc->base);
46
return fp_access_check(s);
166
+ s->pc_curr = s->base.pc_next;
167
+ insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
168
+ s->insn = insn;
169
+ s->base.pc_next += 4;
170
+
47
+
171
+ s->fp_access_checked = false;
48
+ fail_exit:
172
+ s->sve_access_checked = false;
49
+ /* Assert that we only raise one exception per instruction. */
173
+
50
+ assert(!s->sve_access_checked);
174
+ if (s->pstate_il) {
51
+ s->sve_access_checked = true;
175
+ /*
52
+ return false;
176
+ * Illegal execution state. This has priority over BTI
177
+ * exceptions, but comes after instruction abort exceptions.
178
+ */
179
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
180
+ syn_illegalstate(), default_exception_el(s));
181
+ return;
182
+ }
183
+
184
+ if (dc_isar_feature(aa64_bti, s)) {
185
+ if (s->base.num_insns == 1) {
186
+ /*
187
+ * At the first insn of the TB, compute s->guarded_page.
188
+ * We delayed computing this until successfully reading
189
+ * the first insn of the TB, above. This (mostly) ensures
190
+ * that the softmmu tlb entry has been populated, and the
191
+ * page table GP bit is available.
192
+ *
193
+ * Note that we need to compute this even if btype == 0,
194
+ * because this value is used for BR instructions later
195
+ * where ENV is not available.
196
+ */
197
+ s->guarded_page = is_guarded_page(env, s);
198
+
199
+ /* First insn can have btype set to non-zero. */
200
+ tcg_debug_assert(s->btype >= 0);
201
+
202
+ /*
203
+ * Note that the Branch Target Exception has fairly high
204
+ * priority -- below debugging exceptions but above most
205
+ * everything else. This allows us to handle this now
206
+ * instead of waiting until the insn is otherwise decoded.
207
+ */
208
+ if (s->btype != 0
209
+ && s->guarded_page
210
+ && !btype_destination_ok(insn, s->bt, s->btype)) {
211
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
212
+ syn_btitrap(s->btype),
213
+ default_exception_el(s));
214
+ return;
215
+ }
216
+ } else {
217
+ /* Not the first insn: btype must be 0. */
218
+ tcg_debug_assert(s->btype == 0);
219
+ }
220
+ }
221
+
222
+ switch (extract32(insn, 25, 4)) {
223
+ case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
224
+ unallocated_encoding(s);
225
+ break;
226
+ case 0x2:
227
+ if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
228
+ unallocated_encoding(s);
229
+ }
230
+ break;
231
+ case 0x8: case 0x9: /* Data processing - immediate */
232
+ disas_data_proc_imm(s, insn);
233
+ break;
234
+ case 0xa: case 0xb: /* Branch, exception generation and system insns */
235
+ disas_b_exc_sys(s, insn);
236
+ break;
237
+ case 0x4:
238
+ case 0x6:
239
+ case 0xc:
240
+ case 0xe: /* Loads and stores */
241
+ disas_ldst(s, insn);
242
+ break;
243
+ case 0x5:
244
+ case 0xd: /* Data processing - register */
245
+ disas_data_proc_reg(s, insn);
246
+ break;
247
+ case 0x7:
248
+ case 0xf: /* Data processing - SIMD and floating point */
249
+ disas_data_proc_simd_fp(s, insn);
250
+ break;
251
+ default:
252
+ assert(FALSE); /* all 15 cases should be handled above */
253
+ break;
254
+ }
255
+
256
+ /* if we allocated any temporaries, free them here */
257
+ free_tmp_a64(s);
258
+
259
+ /*
260
+ * After execution of most insns, btype is reset to 0.
261
+ * Note that we set btype == -1 when the insn sets btype.
262
+ */
263
+ if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
264
+ reset_btype(s);
265
+ }
266
+
267
+ translator_loop_temp_check(&s->base);
268
}
53
}
269
54
270
static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
55
/*
271
--
56
--
272
2.20.1
57
2.25.1
273
274
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
As of today, when booting upstream U-Boot for Xilinx Zynq, the UART
3
These SME instructions are nominally within the SVE decode space,
4
does not receive anything. Debugging shows that the UART input clock
4
so we add them to sve.decode and translate-sve.c.
5
frequency is zero which prevents the UART from receiving anything as
6
per the logic in uart_receive().
7
5
8
From zynq_slcr_reset_exit() comment, it intends to compute output
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
clocks according to ps_clk and registers. zynq_slcr_compute_clocks()
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
is called to accomplish the task, inside which device_is_in_reset()
8
Message-id: 20220708151540.18136-18-richard.henderson@linaro.org
11
is called to actually make the attempt in vain.
12
13
Rework reset_hold() and reset_exit() so that in the reset exit phase,
14
the logic can really compute output clocks in reset_exit().
15
16
With this change, upstream U-Boot boots properly again with:
17
18
$ qemu-system-arm -M xilinx-zynq-a9 -m 1G -display none -serial null -serial stdio \
19
-device loader,file=u-boot-dtb.bin,addr=0x4000000,cpu-num=0
20
21
Fixes: 38867cb7ec90 ("hw/misc/zynq_slcr: add clock generation for uarts")
22
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
23
Acked-by: Alistair Francis <alistair.francis@wdc.com>
24
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
25
Message-id: 20210901124521.30599-2-bmeng.cn@gmail.com
26
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
27
---
10
---
28
hw/misc/zynq_slcr.c | 31 ++++++++++++++++++-------------
11
target/arm/translate-a64.h | 12 ++++++++++++
29
1 file changed, 18 insertions(+), 13 deletions(-)
12
target/arm/sve.decode | 5 ++++-
13
target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++
14
3 files changed, 54 insertions(+), 1 deletion(-)
30
15
31
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
16
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
32
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/misc/zynq_slcr.c
18
--- a/target/arm/translate-a64.h
34
+++ b/hw/misc/zynq_slcr.c
19
+++ b/target/arm/translate-a64.h
35
@@ -XXX,XX +XXX,XX @@ static uint64_t zynq_slcr_compute_clock(const uint64_t periods[],
20
@@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s)
36
zynq_slcr_compute_clock((plls), (state)->regs[reg], \
21
return s->vl;
37
reg ## _ ## enable_field ## _SHIFT)
22
}
38
23
39
+static void zynq_slcr_compute_clocks_internal(ZynqSLCRState *s, uint64_t ps_clk)
24
+/* Return the byte size of the vector register, SVL / 8. */
25
+static inline int streaming_vec_reg_size(DisasContext *s)
40
+{
26
+{
41
+ uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]);
27
+ return s->svl;
42
+ uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]);
43
+ uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]);
44
+
45
+ uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll};
46
+
47
+ /* compute uartX reference clocks */
48
+ clock_set(s->uart0_ref_clk,
49
+ ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0));
50
+ clock_set(s->uart1_ref_clk,
51
+ ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1));
52
+}
28
+}
53
+
29
+
54
/**
30
/*
55
* Compute and set the ouputs clocks periods.
31
* Return the offset info CPUARMState of the predicate vector register Pn.
56
* But do not propagate them further. Connected clocks
32
* Note for this purpose, FFR is P16.
57
@@ -XXX,XX +XXX,XX @@ static void zynq_slcr_compute_clocks(ZynqSLCRState *s)
33
@@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s)
58
ps_clk = 0;
34
return s->vl >> 3;
59
}
60
61
- uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]);
62
- uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]);
63
- uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]);
64
-
65
- uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll};
66
-
67
- /* compute uartX reference clocks */
68
- clock_set(s->uart0_ref_clk,
69
- ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0));
70
- clock_set(s->uart1_ref_clk,
71
- ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1));
72
+ zynq_slcr_compute_clocks_internal(s, ps_clk);
73
}
35
}
74
36
75
/**
37
+/* Return the byte size of the predicate register, SVL / 64. */
76
@@ -XXX,XX +XXX,XX @@ static void zynq_slcr_reset_hold(Object *obj)
38
+static inline int streaming_pred_reg_size(DisasContext *s)
77
ZynqSLCRState *s = ZYNQ_SLCR(obj);
39
+{
78
40
+ return s->svl >> 3;
79
/* will disable all output clocks */
41
+}
80
- zynq_slcr_compute_clocks(s);
42
+
81
+ zynq_slcr_compute_clocks_internal(s, 0);
43
/*
82
zynq_slcr_propagate_clocks(s);
44
* Round up the size of a register to a size allowed by
45
* the tcg vector infrastructure. Any operation which uses this
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
51
# SVE index generation (register start, register increment)
52
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
53
54
-### SVE Stack Allocation Group
55
+### SVE / Streaming SVE Stack Allocation Group
56
57
# SVE stack frame adjustment
58
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
59
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
60
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
61
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
62
63
# SVE stack frame size
64
RDVL 00000100 101 11111 01010 imm:s6 rd:5
65
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
66
67
### SVE Bitwise Shift - Unpredicated Group
68
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
74
return true;
83
}
75
}
84
76
85
@@ -XXX,XX +XXX,XX @@ static void zynq_slcr_reset_exit(Object *obj)
77
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
86
ZynqSLCRState *s = ZYNQ_SLCR(obj);
78
+{
87
79
+ if (!dc_isar_feature(aa64_sme, s)) {
88
/* will compute output clocks according to ps_clk and registers */
80
+ return false;
89
- zynq_slcr_compute_clocks(s);
81
+ }
90
+ zynq_slcr_compute_clocks_internal(s, clock_get(s->ps_clk));
82
+ if (sme_enabled_check(s)) {
91
zynq_slcr_propagate_clocks(s);
83
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
84
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
85
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
86
+ }
87
+ return true;
88
+}
89
+
90
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
91
{
92
if (!dc_isar_feature(aa64_sve, s)) {
93
@@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
94
return true;
92
}
95
}
93
96
97
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
98
+{
99
+ if (!dc_isar_feature(aa64_sme, s)) {
100
+ return false;
101
+ }
102
+ if (sme_enabled_check(s)) {
103
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
104
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
105
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
106
+ }
107
+ return true;
108
+}
109
+
110
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
111
{
112
if (!dc_isar_feature(aa64_sve, s)) {
113
@@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
114
return true;
115
}
116
117
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
118
+{
119
+ if (!dc_isar_feature(aa64_sme, s)) {
120
+ return false;
121
+ }
122
+ if (sme_enabled_check(s)) {
123
+ TCGv_i64 reg = cpu_reg(s, a->rd);
124
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
125
+ }
126
+ return true;
127
+}
128
+
129
/*
130
*** SVE Compute Vector Address Group
131
*/
94
--
132
--
95
2.20.1
133
2.25.1
96
97
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
At present when input clock is disabled, any character transmitted
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
to tx fifo can still show on the serial line, which is wrong.
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
5
Message-id: 20220708151540.18136-19-richard.henderson@linaro.org
6
Fixes: b636db306e06 ("hw/char/cadence_uart: add clock support")
7
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
10
Message-id: 20210901124521.30599-3-bmeng.cn@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
7
---
13
hw/char/cadence_uart.c | 5 +++++
8
target/arm/helper-sme.h | 2 ++
14
1 file changed, 5 insertions(+)
9
target/arm/sme.decode | 4 ++++
10
target/arm/sme_helper.c | 25 +++++++++++++++++++++++++
11
target/arm/translate-sme.c | 13 +++++++++++++
12
4 files changed, 44 insertions(+)
15
13
16
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/char/cadence_uart.c
16
--- a/target/arm/helper-sme.h
19
+++ b/hw/char/cadence_uart.c
17
+++ b/target/arm/helper-sme.h
20
@@ -XXX,XX +XXX,XX @@ static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond,
18
@@ -XXX,XX +XXX,XX @@
21
static void uart_write_tx_fifo(CadenceUARTState *s, const uint8_t *buf,
19
22
int size)
20
DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
23
{
21
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
24
+ /* ignore characters when unclocked or in reset */
22
+
25
+ if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
23
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@
29
#
30
# This file is processed by scripts/decodetree.py
31
#
32
+
33
+### SME Misc
34
+
35
+ZERO 11000000 00 001 00000000000 imm:8
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/sme_helper.c
39
+++ b/target/arm/sme_helper.c
40
@@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
41
memset(env->zarray, 0, sizeof(env->zarray));
42
}
43
}
44
+
45
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
46
+{
47
+ uint32_t i;
48
+
49
+ /*
50
+ * Special case clearing the entire ZA space.
51
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
52
+ * parts of the ZA storage outside of SVL.
53
+ */
54
+ if (imm == 0xff) {
55
+ memset(env->zarray, 0, sizeof(env->zarray));
26
+ return;
56
+ return;
27
+ }
57
+ }
28
+
58
+
29
if ((s->r[R_CR] & UART_CR_TX_DIS) || !(s->r[R_CR] & UART_CR_TX_EN)) {
59
+ /*
30
return;
60
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
31
}
61
+ * so each row is discontiguous within ZA[].
62
+ */
63
+ for (i = 0; i < svl; i++) {
64
+ if (imm & (1 << (i % 8))) {
65
+ memset(&env->zarray[i], 0, svl);
66
+ }
67
+ }
68
+}
69
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sme.c
72
+++ b/target/arm/translate-sme.c
73
@@ -XXX,XX +XXX,XX @@
74
*/
75
76
#include "decode-sme.c.inc"
77
+
78
+
79
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
80
+{
81
+ if (!dc_isar_feature(aa64_sme, s)) {
82
+ return false;
83
+ }
84
+ if (sme_za_enabled_check(s)) {
85
+ gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
86
+ tcg_constant_i32(streaming_vec_reg_size(s)));
87
+ }
88
+ return true;
89
+}
32
--
90
--
33
2.20.1
91
2.25.1
34
35
diff view generated by jsdifflib
1
The various MPS2 boards have multiple I2C buses: typically a bus
1
From: Richard Henderson <richard.henderson@linaro.org>
2
dedicated to the audio configuration, one for the LCD touchscreen
3
controller, one for a DDR4 EEPROM, and two which are connected to the
4
external Shield expansion connector. Mark the buses which are used
5
only for board-internal devices as 'full' so that if the user creates
6
i2c devices on the commandline without specifying a bus name then
7
they will be connected to the I2C controller used for the Shield
8
connector, where guest software will expect them.
9
2
3
We can reuse the SVE functions for implementing moves to/from
4
horizontal tile slices, but we need new ones for moves to/from
5
vertical tile slices.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20210903151435.22379-4-peter.maydell@linaro.org
13
---
11
---
14
hw/arm/mps2-tz.c | 57 ++++++++++++++++++++++++++++++++++++------------
12
target/arm/helper-sme.h | 12 +++
15
1 file changed, 43 insertions(+), 14 deletions(-)
13
target/arm/helper-sve.h | 2 +
14
target/arm/translate-a64.h | 8 ++
15
target/arm/translate.h | 5 ++
16
target/arm/sme.decode | 15 ++++
17
target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++-
18
target/arm/sve_helper.c | 12 +++
19
target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++
20
8 files changed, 331 insertions(+), 1 deletion(-)
16
21
17
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
22
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
18
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/arm/mps2-tz.c
24
--- a/target/arm/helper-sme.h
20
+++ b/hw/arm/mps2-tz.c
25
+++ b/target/arm/helper-sme.h
21
@@ -XXX,XX +XXX,XX @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno)
26
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
22
27
DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
23
/* Union describing the device-specific extra data we pass to the devfn. */
28
24
typedef union PPCExtraData {
29
DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
25
+ bool i2c_internal;
30
+
26
} PPCExtraData;
31
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
27
32
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
/* Most of the devices in the AN505 FPGA image sit behind
33
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque,
34
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
object_initialize_child(OBJECT(mms), name, i2c, TYPE_ARM_SBCON_I2C);
35
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
31
s = SYS_BUS_DEVICE(i2c);
36
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
32
sysbus_realize(s, &error_fatal);
37
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
39
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
41
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
42
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/helper-sve.h
45
+++ b/target/arm/helper-sve.h
46
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
47
void, ptr, ptr, ptr, ptr, i32)
48
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
49
void, ptr, ptr, ptr, ptr, i32)
50
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
51
+ void, ptr, ptr, ptr, ptr, i32)
52
53
DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
54
void, ptr, ptr, ptr, ptr, i32)
55
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/arm/translate-a64.h
58
+++ b/target/arm/translate-a64.h
59
@@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s)
60
return size_for_gvec(pred_full_reg_size(s));
61
}
62
63
+/* Return a newly allocated pointer to the predicate register. */
64
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
65
+{
66
+ TCGv_ptr ret = tcg_temp_new_ptr();
67
+ tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
68
+ return ret;
69
+}
70
+
71
bool disas_sve(DisasContext *, uint32_t);
72
bool disas_sme(DisasContext *, uint32_t);
73
74
diff --git a/target/arm/translate.h b/target/arm/translate.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/arm/translate.h
77
+++ b/target/arm/translate.h
78
@@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x)
79
return x + 2;
80
}
81
82
+static inline int plus_12(DisasContext *s, int x)
83
+{
84
+ return x + 12;
85
+}
86
+
87
static inline int times_2(DisasContext *s, int x)
88
{
89
return x * 2;
90
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/arm/sme.decode
93
+++ b/target/arm/sme.decode
94
@@ -XXX,XX +XXX,XX @@
95
### SME Misc
96
97
ZERO 11000000 00 001 00000000000 imm:8
98
+
99
+### SME Move into/from Array
100
+
101
+%mova_rs 13:2 !function=plus_12
102
+&mova esz rs pg zr za_imm v:bool to_vec:bool
103
+
104
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
105
+ &mova to_vec=0 rs=%mova_rs
106
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
107
+ &mova to_vec=0 rs=%mova_rs esz=4
108
+
109
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
110
+ &mova to_vec=1 rs=%mova_rs
111
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
112
+ &mova to_vec=1 rs=%mova_rs esz=4
113
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/target/arm/sme_helper.c
116
+++ b/target/arm/sme_helper.c
117
@@ -XXX,XX +XXX,XX @@
118
119
#include "qemu/osdep.h"
120
#include "cpu.h"
121
-#include "internals.h"
122
+#include "tcg/tcg-gvec-desc.h"
123
#include "exec/helper-proto.h"
124
+#include "qemu/int128.h"
125
+#include "vec_internal.h"
126
127
/* ResetSVEState */
128
void arm_reset_sve_state(CPUARMState *env)
129
@@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
130
}
131
}
132
}
133
+
134
+
135
+/*
136
+ * When considering the ZA storage as an array of elements of
137
+ * type T, the index within that array of the Nth element of
138
+ * a vertical slice of a tile can be calculated like this,
139
+ * regardless of the size of type T. This is because the tiles
140
+ * are interleaved, so if type T is size N bytes then row 1 of
141
+ * the tile is N rows away from row 0. The division by N to
142
+ * convert a byte offset into an array index and the multiplication
143
+ * by N to convert from vslice-index-within-the-tile to
144
+ * the index within the ZA storage cancel out.
145
+ */
146
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
147
+
148
+/*
149
+ * When doing byte arithmetic on the ZA storage, the element
150
+ * byteoff bytes away in a tile vertical slice is always this
151
+ * many bytes away in the ZA storage, regardless of the
152
+ * size of the tile element, assuming that byteoff is a multiple
153
+ * of the element size. Again this is because of the interleaving
154
+ * of the tiles. For instance if we have 1 byte per element then
155
+ * each row of the ZA storage has one byte of the vslice data,
156
+ * and (counting from 0) byte 8 goes in row 8 of the storage
157
+ * at offset (8 * row-size-in-bytes).
158
+ * If we have 8 bytes per element then each row of the ZA storage
159
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
160
+ * so byte 8 of the data goes into row 1 of the tile,
161
+ * which is again row 8 of the storage, so the offset is still
162
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
163
+ */
164
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
165
+
166
+
167
+/*
168
+ * Move Zreg vector to ZArray column.
169
+ */
170
+#define DO_MOVA_C(NAME, TYPE, H) \
171
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
172
+{ \
173
+ int i, oprsz = simd_oprsz(desc); \
174
+ for (i = 0; i < oprsz; ) { \
175
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
176
+ do { \
177
+ if (pg & 1) { \
178
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
179
+ } \
180
+ i += sizeof(TYPE); \
181
+ pg >>= sizeof(TYPE); \
182
+ } while (i & 15); \
183
+ } \
184
+}
185
+
186
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
187
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
188
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
189
+
190
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
191
+{
192
+ int i, oprsz = simd_oprsz(desc) / 8;
193
+ uint8_t *pg = vg;
194
+ uint64_t *n = vn;
195
+ uint64_t *a = za;
196
+
197
+ for (i = 0; i < oprsz; i++) {
198
+ if (pg[H1(i)] & 1) {
199
+ a[tile_vslice_index(i)] = n[i];
200
+ }
201
+ }
202
+}
203
+
204
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
205
+{
206
+ int i, oprsz = simd_oprsz(desc) / 16;
207
+ uint16_t *pg = vg;
208
+ Int128 *n = vn;
209
+ Int128 *a = za;
33
+
210
+
34
+ /*
211
+ /*
35
+ * If this is an internal-use-only i2c bus, mark it full
212
+ * Int128 is used here simply to copy 16 bytes, and to simplify
36
+ * so that user-created i2c devices are not plugged into it.
213
+ * the address arithmetic.
37
+ * If we implement models of any on-board i2c devices that
38
+ * plug in to one of the internal-use-only buses, then we will
39
+ * need to create and plugging those in here before we mark the
40
+ * bus as full.
41
+ */
214
+ */
42
+ if (extradata->i2c_internal) {
215
+ for (i = 0; i < oprsz; i++) {
43
+ BusState *qbus = qdev_get_child_bus(DEVICE(i2c), "i2c");
216
+ if (pg[H2(i)] & 1) {
44
+ qbus_mark_full(qbus);
217
+ a[tile_vslice_index(i)] = n[i];
45
+ }
218
+ }
46
+
219
+ }
47
return sysbus_mmio_get_region(s, 0);
220
+}
221
+
222
+#undef DO_MOVA_C
223
+
224
+/*
225
+ * Move ZArray column to Zreg vector.
226
+ */
227
+#define DO_MOVA_Z(NAME, TYPE, H) \
228
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
229
+{ \
230
+ int i, oprsz = simd_oprsz(desc); \
231
+ for (i = 0; i < oprsz; ) { \
232
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
233
+ do { \
234
+ if (pg & 1) { \
235
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
236
+ } \
237
+ i += sizeof(TYPE); \
238
+ pg >>= sizeof(TYPE); \
239
+ } while (i & 15); \
240
+ } \
241
+}
242
+
243
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
244
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
245
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
246
+
247
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
248
+{
249
+ int i, oprsz = simd_oprsz(desc) / 8;
250
+ uint8_t *pg = vg;
251
+ uint64_t *d = vd;
252
+ uint64_t *a = za;
253
+
254
+ for (i = 0; i < oprsz; i++) {
255
+ if (pg[H1(i)] & 1) {
256
+ d[i] = a[tile_vslice_index(i)];
257
+ }
258
+ }
259
+}
260
+
261
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
262
+{
263
+ int i, oprsz = simd_oprsz(desc) / 16;
264
+ uint16_t *pg = vg;
265
+ Int128 *d = vd;
266
+ Int128 *a = za;
267
+
268
+ /*
269
+ * Int128 is used here simply to copy 16 bytes, and to simplify
270
+ * the address arithmetic.
271
+ */
272
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
273
+ if (pg[H2(i)] & 1) {
274
+ d[i] = a[tile_vslice_index(i)];
275
+ }
276
+ }
277
+}
278
+
279
+#undef DO_MOVA_Z
280
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
281
index XXXXXXX..XXXXXXX 100644
282
--- a/target/arm/sve_helper.c
283
+++ b/target/arm/sve_helper.c
284
@@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
285
}
48
}
286
}
49
287
50
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
288
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
51
{ "uart2", make_uart, &mms->uart[2], 0x40202000, 0x1000, { 36, 37, 44 } },
289
+ void *vg, uint32_t desc)
52
{ "uart3", make_uart, &mms->uart[3], 0x40203000, 0x1000, { 38, 39, 45 } },
290
+{
53
{ "uart4", make_uart, &mms->uart[4], 0x40204000, 0x1000, { 40, 41, 46 } },
291
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
54
- { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000 },
292
+ Int128 *d = vd, *n = vn, *m = vm;
55
- { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000 },
293
+ uint16_t *pg = vg;
56
- { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000 },
294
+
57
- { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000 },
295
+ for (i = 0; i < opr_sz; i += 1) {
58
+ { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000, {},
296
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
59
+ { .i2c_internal = true /* touchscreen */ } },
297
+ }
60
+ { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000, {},
298
+}
61
+ { .i2c_internal = true /* audio conf */ } },
299
+
62
+ { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000, {},
300
/* Two operand comparison controlled by a predicate.
63
+ { .i2c_internal = false /* shield 0 */ } },
301
* ??? It is very tempting to want to be able to expand this inline
64
+ { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000, {},
302
* with x86 instructions, e.g.
65
+ { .i2c_internal = false /* shield 1 */ } },
303
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
66
},
304
index XXXXXXX..XXXXXXX 100644
67
}, {
305
--- a/target/arm/translate-sme.c
68
.name = "apb_ppcexp2",
306
+++ b/target/arm/translate-sme.c
69
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
307
@@ -XXX,XX +XXX,XX @@
70
}, {
308
#include "decode-sme.c.inc"
71
.name = "apb_ppcexp1",
309
72
.ports = {
310
73
- { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000 },
311
+/*
74
- { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000 },
312
+ * Resolve tile.size[index] to a host pointer, where tile and index
75
+ { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000, {},
313
+ * are always decoded together, dependent on the element size.
76
+ { .i2c_internal = true /* touchscreen */ } },
314
+ */
77
+ { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000, {},
315
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
78
+ { .i2c_internal = true /* audio conf */ } },
316
+ int tile_index, bool vertical)
79
{ "spi0", make_spi, &mms->spi[0], 0x41202000, 0x1000, { 52 } },
317
+{
80
{ "spi1", make_spi, &mms->spi[1], 0x41203000, 0x1000, { 53 } },
318
+ int tile = tile_index >> (4 - esz);
81
{ "spi2", make_spi, &mms->spi[2], 0x41204000, 0x1000, { 54 } },
319
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
82
- { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000 },
320
+ int pos, len, offset;
83
- { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000 },
321
+ TCGv_i32 tmp;
84
+ { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000, {},
322
+ TCGv_ptr addr;
85
+ { .i2c_internal = false /* shield 0 */ } },
323
+
86
+ { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000, {},
324
+ /* Compute the final index, which is Rs+imm. */
87
+ { .i2c_internal = false /* shield 1 */ } },
325
+ tmp = tcg_temp_new_i32();
88
{ /* port 7 reserved */ },
326
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
89
- { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000 },
327
+ tcg_gen_addi_i32(tmp, tmp, index);
90
+ { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000, {},
328
+
91
+ { .i2c_internal = true /* DDR4 EEPROM */ } },
329
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
92
},
330
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
93
}, {
331
+
94
.name = "apb_ppcexp2",
332
+ if (vertical) {
95
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
333
+ /*
96
}, {
334
+ * Compute the byte offset of the index within the tile:
97
.name = "apb_ppcexp1",
335
+ * (index % (svl / size)) * size
98
.ports = {
336
+ * = (index % (svl >> esz)) << esz
99
- { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000 },
337
+ * Perform the power-of-two modulo via extraction of the low @len bits.
100
- { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000 },
338
+ * Perform the multiply by shifting left by @pos bits.
101
+ { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000, {},
339
+ * Perform these operations simultaneously via deposit into zero.
102
+ { .i2c_internal = true /* touchscreen */ } },
340
+ */
103
+ { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000, {},
341
+ pos = esz;
104
+ { .i2c_internal = true /* audio conf */ } },
342
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
105
{ "spi0", make_spi, &mms->spi[0], 0x49202000, 0x1000, { 53 } },
343
+
106
{ "spi1", make_spi, &mms->spi[1], 0x49203000, 0x1000, { 54 } },
344
+ /*
107
{ "spi2", make_spi, &mms->spi[2], 0x49204000, 0x1000, { 55 } },
345
+ * For big-endian, adjust the indexed column byte offset within
108
- { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000 },
346
+ * the uint64_t host words that make up env->zarray[].
109
- { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000 },
347
+ */
110
+ { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000, {},
348
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
111
+ { .i2c_internal = false /* shield 0 */ } },
349
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
112
+ { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000, {},
350
+ }
113
+ { .i2c_internal = false /* shield 1 */ } },
351
+ } else {
114
{ /* port 7 reserved */ },
352
+ /*
115
- { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000 },
353
+ * Compute the byte offset of the index within the tile:
116
+ { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000, {},
354
+ * (index % (svl / size)) * (size * sizeof(row))
117
+ { .i2c_internal = true /* DDR4 EEPROM */ } },
355
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
118
},
356
+ */
119
}, {
357
+ pos = esz + ctz32(sizeof(ARMVectorReg));
120
.name = "apb_ppcexp2",
358
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
359
+
360
+ /* Row slices are always aligned and need no endian adjustment. */
361
+ }
362
+
363
+ /* The tile byte offset within env->zarray is the row. */
364
+ offset = tile * sizeof(ARMVectorReg);
365
+
366
+ /* Include the byte offset of zarray to make this relative to env. */
367
+ offset += offsetof(CPUARMState, zarray);
368
+ tcg_gen_addi_i32(tmp, tmp, offset);
369
+
370
+ /* Add the byte offset to env to produce the final pointer. */
371
+ addr = tcg_temp_new_ptr();
372
+ tcg_gen_ext_i32_ptr(addr, tmp);
373
+ tcg_temp_free_i32(tmp);
374
+ tcg_gen_add_ptr(addr, addr, cpu_env);
375
+
376
+ return addr;
377
+}
378
+
379
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
380
{
381
if (!dc_isar_feature(aa64_sme, s)) {
382
@@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
383
}
384
return true;
385
}
386
+
387
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
388
+{
389
+ static gen_helper_gvec_4 * const h_fns[5] = {
390
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
391
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
392
+ gen_helper_sve_sel_zpzz_q
393
+ };
394
+ static gen_helper_gvec_3 * const cz_fns[5] = {
395
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
396
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
397
+ gen_helper_sme_mova_cz_q,
398
+ };
399
+ static gen_helper_gvec_3 * const zc_fns[5] = {
400
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
401
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
402
+ gen_helper_sme_mova_zc_q,
403
+ };
404
+
405
+ TCGv_ptr t_za, t_zr, t_pg;
406
+ TCGv_i32 t_desc;
407
+ int svl;
408
+
409
+ if (!dc_isar_feature(aa64_sme, s)) {
410
+ return false;
411
+ }
412
+ if (!sme_smza_enabled_check(s)) {
413
+ return true;
414
+ }
415
+
416
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
417
+ t_zr = vec_full_reg_ptr(s, a->zr);
418
+ t_pg = pred_full_reg_ptr(s, a->pg);
419
+
420
+ svl = streaming_vec_reg_size(s);
421
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
422
+
423
+ if (a->v) {
424
+ /* Vertical slice -- use sme mova helpers. */
425
+ if (a->to_vec) {
426
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
427
+ } else {
428
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
429
+ }
430
+ } else {
431
+ /* Horizontal slice -- reuse sve sel helpers. */
432
+ if (a->to_vec) {
433
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
434
+ } else {
435
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
436
+ }
437
+ }
438
+
439
+ tcg_temp_free_ptr(t_za);
440
+ tcg_temp_free_ptr(t_zr);
441
+ tcg_temp_free_ptr(t_pg);
442
+
443
+ return true;
444
+}
121
--
445
--
122
2.20.1
446
2.25.1
123
124
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Added properties to enable ITS feature and define qemu system
3
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
4
address space memory in gicv3 common,setup distributor and
4
because those functions accept only a Zreg register number.
5
redistributor registers to indicate LPI support.
5
For SME, we want to pass a pointer into ZA storage.
6
6
7
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20210910143951.92242-6-shashi.mallela@linaro.org
9
Message-id: 20220708151540.18136-21-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
11
---
13
hw/intc/gicv3_internal.h | 2 ++
12
target/arm/helper-sme.h | 82 +++++
14
include/hw/intc/arm_gicv3_common.h | 1 +
13
target/arm/sme.decode | 9 +
15
hw/intc/arm_gicv3_common.c | 12 ++++++++++++
14
target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++
16
hw/intc/arm_gicv3_dist.c | 5 ++++-
15
target/arm/translate-sme.c | 70 +++++
17
hw/intc/arm_gicv3_redist.c | 12 +++++++++---
16
4 files changed, 756 insertions(+)
18
5 files changed, 28 insertions(+), 4 deletions(-)
19
17
20
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
18
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
21
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/intc/gicv3_internal.h
20
--- a/target/arm/helper-sme.h
23
+++ b/hw/intc/gicv3_internal.h
21
+++ b/target/arm/helper-sme.h
22
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
25
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+
27
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
28
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
29
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
30
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
+
32
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
34
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
35
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
36
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
37
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
38
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
39
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
40
+
41
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
42
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
43
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
44
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
45
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
46
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
47
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
48
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
49
+
50
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
51
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
52
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
53
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
54
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
55
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
56
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
57
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
58
+
59
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
60
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
61
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
62
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
63
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
64
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
65
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
66
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
67
+
68
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
69
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
70
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
71
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
72
+
73
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
74
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
75
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
76
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
77
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
78
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
79
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
80
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
81
+
82
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
83
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
84
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
85
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
86
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
87
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
88
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
89
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
90
+
91
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
92
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
93
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
94
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
95
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
96
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
97
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
98
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
99
+
100
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
101
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
102
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
103
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
104
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
105
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
106
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
107
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
108
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
109
index XXXXXXX..XXXXXXX 100644
110
--- a/target/arm/sme.decode
111
+++ b/target/arm/sme.decode
112
@@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
113
&mova to_vec=1 rs=%mova_rs
114
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
115
&mova to_vec=1 rs=%mova_rs esz=4
116
+
117
+### SME Memory
118
+
119
+&ldst esz rs pg rn rm za_imm v:bool st:bool
120
+
121
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
122
+ &ldst rs=%mova_rs
123
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
124
+ &ldst esz=4 rs=%mova_rs
125
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/arm/sme_helper.c
128
+++ b/target/arm/sme_helper.c
24
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@
25
#define GICD_CTLR_E1NWF (1U << 7)
130
26
#define GICD_CTLR_RWP (1U << 31)
131
#include "qemu/osdep.h"
27
132
#include "cpu.h"
28
+#define GICD_TYPER_LPIS_SHIFT 17
133
+#include "internals.h"
29
+
134
#include "tcg/tcg-gvec-desc.h"
30
/* 16 bits EventId */
135
#include "exec/helper-proto.h"
31
#define GICD_TYPER_IDBITS 0xf
136
+#include "exec/cpu_ldst.h"
32
137
+#include "exec/exec-all.h"
33
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
138
#include "qemu/int128.h"
34
index XXXXXXX..XXXXXXX 100644
139
#include "vec_internal.h"
35
--- a/include/hw/intc/arm_gicv3_common.h
140
+#include "sve_ldst_internal.h"
36
+++ b/include/hw/intc/arm_gicv3_common.h
141
37
@@ -XXX,XX +XXX,XX @@ struct GICv3State {
142
/* ResetSVEState */
38
uint32_t num_cpu;
143
void arm_reset_sve_state(CPUARMState *env)
39
uint32_t num_irq;
144
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
40
uint32_t revision;
145
}
41
+ bool lpi_enable;
146
42
bool security_extn;
147
#undef DO_MOVA_Z
43
bool irq_reset_nonsecure;
148
+
44
bool gicd_no_migration_shift_bug;
149
+/*
45
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
150
+ * Clear elements in a tile slice comprising len bytes.
46
index XXXXXXX..XXXXXXX 100644
151
+ */
47
--- a/hw/intc/arm_gicv3_common.c
152
+
48
+++ b/hw/intc/arm_gicv3_common.c
153
+typedef void ClearFn(void *ptr, size_t off, size_t len);
49
@@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
154
+
50
return;
155
+static void clear_horizontal(void *ptr, size_t off, size_t len)
51
}
156
+{
52
157
+ memset(ptr + off, 0, len);
53
+ if (s->lpi_enable && !s->dma) {
158
+}
54
+ error_setg(errp, "Redist-ITS: Guest 'sysmem' reference link not set");
159
+
160
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
161
+{
162
+ for (size_t i = 0; i < len; ++i) {
163
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
164
+ }
165
+}
166
+
167
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
168
+{
169
+ for (size_t i = 0; i < len; i += 2) {
170
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
171
+ }
172
+}
173
+
174
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
175
+{
176
+ for (size_t i = 0; i < len; i += 4) {
177
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
178
+ }
179
+}
180
+
181
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
182
+{
183
+ for (size_t i = 0; i < len; i += 8) {
184
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
185
+ }
186
+}
187
+
188
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
189
+{
190
+ for (size_t i = 0; i < len; i += 16) {
191
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
192
+ }
193
+}
194
+
195
+/*
196
+ * Copy elements from an array into a tile slice comprising len bytes.
197
+ */
198
+
199
+typedef void CopyFn(void *dst, const void *src, size_t len);
200
+
201
+static void copy_horizontal(void *dst, const void *src, size_t len)
202
+{
203
+ memcpy(dst, src, len);
204
+}
205
+
206
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
207
+{
208
+ const uint8_t *src = vsrc;
209
+ uint8_t *dst = vdst;
210
+ size_t i;
211
+
212
+ for (i = 0; i < len; ++i) {
213
+ dst[tile_vslice_index(i)] = src[i];
214
+ }
215
+}
216
+
217
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
218
+{
219
+ const uint16_t *src = vsrc;
220
+ uint16_t *dst = vdst;
221
+ size_t i;
222
+
223
+ for (i = 0; i < len / 2; ++i) {
224
+ dst[tile_vslice_index(i)] = src[i];
225
+ }
226
+}
227
+
228
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
229
+{
230
+ const uint32_t *src = vsrc;
231
+ uint32_t *dst = vdst;
232
+ size_t i;
233
+
234
+ for (i = 0; i < len / 4; ++i) {
235
+ dst[tile_vslice_index(i)] = src[i];
236
+ }
237
+}
238
+
239
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
240
+{
241
+ const uint64_t *src = vsrc;
242
+ uint64_t *dst = vdst;
243
+ size_t i;
244
+
245
+ for (i = 0; i < len / 8; ++i) {
246
+ dst[tile_vslice_index(i)] = src[i];
247
+ }
248
+}
249
+
250
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
251
+{
252
+ for (size_t i = 0; i < len; i += 16) {
253
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
254
+ }
255
+}
256
+
257
+/*
258
+ * Host and TLB primitives for vertical tile slice addressing.
259
+ */
260
+
261
+#define DO_LD(NAME, TYPE, HOST, TLB) \
262
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
263
+{ \
264
+ TYPE val = HOST(host); \
265
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
266
+} \
267
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
268
+ intptr_t off, target_ulong addr, uintptr_t ra) \
269
+{ \
270
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
271
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
272
+}
273
+
274
+#define DO_ST(NAME, TYPE, HOST, TLB) \
275
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
276
+{ \
277
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
278
+ HOST(host, val); \
279
+} \
280
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
281
+ intptr_t off, target_ulong addr, uintptr_t ra) \
282
+{ \
283
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
284
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
285
+}
286
+
287
+/*
288
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
289
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
290
+ * corresponds to storing the two 64-bit pieces in little-endian order.
291
+ */
292
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
293
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
294
+{ \
295
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
296
+ uint64_t *ptr = za + off; \
297
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
298
+} \
299
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
300
+{ \
301
+ HNAME##_host(za, tile_vslice_offset(off), host); \
302
+} \
303
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
304
+ target_ulong addr, uintptr_t ra) \
305
+{ \
306
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
307
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
308
+ uint64_t *ptr = za + off; \
309
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
310
+} \
311
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
312
+ target_ulong addr, uintptr_t ra) \
313
+{ \
314
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
315
+}
316
+
317
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
318
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
319
+{ \
320
+ uint64_t *ptr = za + off; \
321
+ HOST(host, ptr[BE]); \
322
+ HOST(host + 1, ptr[!BE]); \
323
+} \
324
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
325
+{ \
326
+ HNAME##_host(za, tile_vslice_offset(off), host); \
327
+} \
328
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
329
+ target_ulong addr, uintptr_t ra) \
330
+{ \
331
+ uint64_t *ptr = za + off; \
332
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
333
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
334
+} \
335
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
336
+ target_ulong addr, uintptr_t ra) \
337
+{ \
338
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
339
+}
340
+
341
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
342
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
343
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
344
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
345
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
346
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
347
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
348
+
349
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
350
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
351
+
352
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
353
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
354
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
355
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
356
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
357
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
358
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
359
+
360
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
361
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
362
+
363
+#undef DO_LD
364
+#undef DO_ST
365
+#undef DO_LDQ
366
+#undef DO_STQ
367
+
368
+/*
369
+ * Common helper for all contiguous predicated loads.
370
+ */
371
+
372
+static inline QEMU_ALWAYS_INLINE
373
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
374
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
375
+ const int esz, uint32_t mtedesc, bool vertical,
376
+ sve_ldst1_host_fn *host_fn,
377
+ sve_ldst1_tlb_fn *tlb_fn,
378
+ ClearFn *clr_fn,
379
+ CopyFn *cpy_fn)
380
+{
381
+ const intptr_t reg_max = simd_oprsz(desc);
382
+ const intptr_t esize = 1 << esz;
383
+ intptr_t reg_off, reg_last;
384
+ SVEContLdSt info;
385
+ void *host;
386
+ int flags;
387
+
388
+ /* Find the active elements. */
389
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
390
+ /* The entire predicate was false; no load occurs. */
391
+ clr_fn(za, 0, reg_max);
55
+ return;
392
+ return;
56
+ }
393
+ }
57
+
394
+
58
s->cpu = g_new0(GICv3CPUState, s->num_cpu);
395
+ /* Probe the page(s). Exit with exception for any invalid page. */
59
396
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
60
for (i = 0; i < s->num_cpu; i++) {
397
+
61
@@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
398
+ /* Handle watchpoints for all active elements. */
62
(1 << 24) |
399
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
63
(i << 8) |
400
+ BP_MEM_READ, ra);
64
(last << 4);
401
+
65
+
402
+ /*
66
+ if (s->lpi_enable) {
403
+ * Handle mte checks for all active elements.
67
+ s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS;
404
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
68
+ }
405
+ */
69
}
406
+ if (mtedesc) {
70
}
407
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
71
408
+ mtedesc, ra);
72
@@ -XXX,XX +XXX,XX @@ static Property arm_gicv3_common_properties[] = {
409
+ }
73
DEFINE_PROP_UINT32("num-cpu", GICv3State, num_cpu, 1),
410
+
74
DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32),
411
+ flags = info.page[0].flags | info.page[1].flags;
75
DEFINE_PROP_UINT32("revision", GICv3State, revision, 3),
412
+ if (unlikely(flags != 0)) {
76
+ DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0),
413
+#ifdef CONFIG_USER_ONLY
77
DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0),
414
+ g_assert_not_reached();
78
DEFINE_PROP_ARRAY("redist-region-count", GICv3State, nb_redist_regions,
415
+#else
79
redist_region_count, qdev_prop_uint32, uint32_t),
416
+ /*
80
+ DEFINE_PROP_LINK("sysmem", GICv3State, dma, TYPE_MEMORY_REGION,
417
+ * At least one page includes MMIO.
81
+ MemoryRegion *),
418
+ * Any bus operation can fail with cpu_transaction_failed,
82
DEFINE_PROP_END_OF_LIST(),
419
+ * which for ARM will raise SyncExternal. Perform the load
83
};
420
+ * into scratch memory to preserve register state until the end.
84
421
+ */
85
diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c
422
+ ARMVectorReg scratch = { };
86
index XXXXXXX..XXXXXXX 100644
423
+
87
--- a/hw/intc/arm_gicv3_dist.c
424
+ reg_off = info.reg_off_first[0];
88
+++ b/hw/intc/arm_gicv3_dist.c
425
+ reg_last = info.reg_off_last[1];
89
@@ -XXX,XX +XXX,XX @@ static bool gicd_readl(GICv3State *s, hwaddr offset,
426
+ if (reg_last < 0) {
90
* A3V == 1 (non-zero values of Affinity level 3 supported)
427
+ reg_last = info.reg_off_split;
91
* IDbits == 0xf (we support 16-bit interrupt identifiers)
428
+ if (reg_last < 0) {
92
* DVIS == 0 (Direct virtual LPI injection not supported)
429
+ reg_last = info.reg_off_last[0];
93
- * LPIS == 0 (LPIs not supported)
94
+ * LPIS == 1 (LPIs are supported if affinity routing is enabled)
95
+ * num_LPIs == 0b00000 (bits [15:11],Number of LPIs as indicated
96
+ * by GICD_TYPER.IDbits)
97
* MBIS == 0 (message-based SPIs not supported)
98
* SecurityExtn == 1 if security extns supported
99
* CPUNumber == 0 since for us ARE is always 1
100
@@ -XXX,XX +XXX,XX @@ static bool gicd_readl(GICv3State *s, hwaddr offset,
101
bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS);
102
103
*data = (1 << 25) | (1 << 24) | (sec_extn << 10) |
104
+ (s->lpi_enable << GICD_TYPER_LPIS_SHIFT) |
105
(0xf << 19) | itlinesnumber;
106
return true;
107
}
108
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/hw/intc/arm_gicv3_redist.c
111
+++ b/hw/intc/arm_gicv3_redist.c
112
@@ -XXX,XX +XXX,XX @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
113
case GICR_CTLR:
114
/* For our implementation, GICR_TYPER.DPGS is 0 and so all
115
* the DPG bits are RAZ/WI. We don't do anything asynchronously,
116
- * so UWP and RWP are RAZ/WI. And GICR_TYPER.LPIS is 0 (we don't
117
- * implement LPIs) so Enable_LPIs is RES0. So there are no writable
118
- * bits for us.
119
+ * so UWP and RWP are RAZ/WI. GICR_TYPER.LPIS is 1 (we
120
+ * implement LPIs) so Enable_LPIs is programmable.
121
*/
122
+ if (cs->gicr_typer & GICR_TYPER_PLPIS) {
123
+ if (value & GICR_CTLR_ENABLE_LPIS) {
124
+ cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS;
125
+ } else {
126
+ cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS;
127
+ }
430
+ }
128
+ }
431
+ }
129
return MEMTX_OK;
432
+
130
case GICR_STATUSR:
433
+ do {
131
/* RAZ/WI for our implementation */
434
+ uint64_t pg = vg[reg_off >> 6];
435
+ do {
436
+ if ((pg >> (reg_off & 63)) & 1) {
437
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
438
+ }
439
+ reg_off += esize;
440
+ } while (reg_off & 63);
441
+ } while (reg_off <= reg_last);
442
+
443
+ cpy_fn(za, &scratch, reg_max);
444
+ return;
445
+#endif
446
+ }
447
+
448
+ /* The entire operation is in RAM, on valid pages. */
449
+
450
+ reg_off = info.reg_off_first[0];
451
+ reg_last = info.reg_off_last[0];
452
+ host = info.page[0].host;
453
+
454
+ if (!vertical) {
455
+ memset(za, 0, reg_max);
456
+ } else if (reg_off) {
457
+ clr_fn(za, 0, reg_off);
458
+ }
459
+
460
+ while (reg_off <= reg_last) {
461
+ uint64_t pg = vg[reg_off >> 6];
462
+ do {
463
+ if ((pg >> (reg_off & 63)) & 1) {
464
+ host_fn(za, reg_off, host + reg_off);
465
+ } else if (vertical) {
466
+ clr_fn(za, reg_off, esize);
467
+ }
468
+ reg_off += esize;
469
+ } while (reg_off <= reg_last && (reg_off & 63));
470
+ }
471
+
472
+ /*
473
+ * Use the slow path to manage the cross-page misalignment.
474
+ * But we know this is RAM and cannot trap.
475
+ */
476
+ reg_off = info.reg_off_split;
477
+ if (unlikely(reg_off >= 0)) {
478
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
479
+ }
480
+
481
+ reg_off = info.reg_off_first[1];
482
+ if (unlikely(reg_off >= 0)) {
483
+ reg_last = info.reg_off_last[1];
484
+ host = info.page[1].host;
485
+
486
+ do {
487
+ uint64_t pg = vg[reg_off >> 6];
488
+ do {
489
+ if ((pg >> (reg_off & 63)) & 1) {
490
+ host_fn(za, reg_off, host + reg_off);
491
+ } else if (vertical) {
492
+ clr_fn(za, reg_off, esize);
493
+ }
494
+ reg_off += esize;
495
+ } while (reg_off & 63);
496
+ } while (reg_off <= reg_last);
497
+ }
498
+}
499
+
500
+static inline QEMU_ALWAYS_INLINE
501
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
502
+ target_ulong addr, uint32_t desc, uintptr_t ra,
503
+ const int esz, bool vertical,
504
+ sve_ldst1_host_fn *host_fn,
505
+ sve_ldst1_tlb_fn *tlb_fn,
506
+ ClearFn *clr_fn,
507
+ CopyFn *cpy_fn)
508
+{
509
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
510
+ int bit55 = extract64(addr, 55, 1);
511
+
512
+ /* Remove mtedesc from the normal sve descriptor. */
513
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
514
+
515
+ /* Perform gross MTE suppression early. */
516
+ if (!tbi_check(desc, bit55) ||
517
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
518
+ mtedesc = 0;
519
+ }
520
+
521
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
522
+ host_fn, tlb_fn, clr_fn, cpy_fn);
523
+}
524
+
525
+#define DO_LD(L, END, ESZ) \
526
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
527
+ target_ulong addr, uint32_t desc) \
528
+{ \
529
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
530
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
531
+ clear_horizontal, copy_horizontal); \
532
+} \
533
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
534
+ target_ulong addr, uint32_t desc) \
535
+{ \
536
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
537
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
538
+ clear_vertical_##L, copy_vertical_##L); \
539
+} \
540
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
541
+ target_ulong addr, uint32_t desc) \
542
+{ \
543
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
544
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
545
+ clear_horizontal, copy_horizontal); \
546
+} \
547
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
548
+ target_ulong addr, uint32_t desc) \
549
+{ \
550
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
551
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
552
+ clear_vertical_##L, copy_vertical_##L); \
553
+}
554
+
555
+DO_LD(b, , MO_8)
556
+DO_LD(h, _be, MO_16)
557
+DO_LD(h, _le, MO_16)
558
+DO_LD(s, _be, MO_32)
559
+DO_LD(s, _le, MO_32)
560
+DO_LD(d, _be, MO_64)
561
+DO_LD(d, _le, MO_64)
562
+DO_LD(q, _be, MO_128)
563
+DO_LD(q, _le, MO_128)
564
+
565
+#undef DO_LD
566
+
567
+/*
568
+ * Common helper for all contiguous predicated stores.
569
+ */
570
+
571
+static inline QEMU_ALWAYS_INLINE
572
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
573
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
574
+ const int esz, uint32_t mtedesc, bool vertical,
575
+ sve_ldst1_host_fn *host_fn,
576
+ sve_ldst1_tlb_fn *tlb_fn)
577
+{
578
+ const intptr_t reg_max = simd_oprsz(desc);
579
+ const intptr_t esize = 1 << esz;
580
+ intptr_t reg_off, reg_last;
581
+ SVEContLdSt info;
582
+ void *host;
583
+ int flags;
584
+
585
+ /* Find the active elements. */
586
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
587
+ /* The entire predicate was false; no store occurs. */
588
+ return;
589
+ }
590
+
591
+ /* Probe the page(s). Exit with exception for any invalid page. */
592
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
593
+
594
+ /* Handle watchpoints for all active elements. */
595
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
596
+ BP_MEM_WRITE, ra);
597
+
598
+ /*
599
+ * Handle mte checks for all active elements.
600
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
601
+ */
602
+ if (mtedesc) {
603
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
604
+ mtedesc, ra);
605
+ }
606
+
607
+ flags = info.page[0].flags | info.page[1].flags;
608
+ if (unlikely(flags != 0)) {
609
+#ifdef CONFIG_USER_ONLY
610
+ g_assert_not_reached();
611
+#else
612
+ /*
613
+ * At least one page includes MMIO.
614
+ * Any bus operation can fail with cpu_transaction_failed,
615
+ * which for ARM will raise SyncExternal. We cannot avoid
616
+ * this fault and will leave with the store incomplete.
617
+ */
618
+ reg_off = info.reg_off_first[0];
619
+ reg_last = info.reg_off_last[1];
620
+ if (reg_last < 0) {
621
+ reg_last = info.reg_off_split;
622
+ if (reg_last < 0) {
623
+ reg_last = info.reg_off_last[0];
624
+ }
625
+ }
626
+
627
+ do {
628
+ uint64_t pg = vg[reg_off >> 6];
629
+ do {
630
+ if ((pg >> (reg_off & 63)) & 1) {
631
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
632
+ }
633
+ reg_off += esize;
634
+ } while (reg_off & 63);
635
+ } while (reg_off <= reg_last);
636
+ return;
637
+#endif
638
+ }
639
+
640
+ reg_off = info.reg_off_first[0];
641
+ reg_last = info.reg_off_last[0];
642
+ host = info.page[0].host;
643
+
644
+ while (reg_off <= reg_last) {
645
+ uint64_t pg = vg[reg_off >> 6];
646
+ do {
647
+ if ((pg >> (reg_off & 63)) & 1) {
648
+ host_fn(za, reg_off, host + reg_off);
649
+ }
650
+ reg_off += 1 << esz;
651
+ } while (reg_off <= reg_last && (reg_off & 63));
652
+ }
653
+
654
+ /*
655
+ * Use the slow path to manage the cross-page misalignment.
656
+ * But we know this is RAM and cannot trap.
657
+ */
658
+ reg_off = info.reg_off_split;
659
+ if (unlikely(reg_off >= 0)) {
660
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
661
+ }
662
+
663
+ reg_off = info.reg_off_first[1];
664
+ if (unlikely(reg_off >= 0)) {
665
+ reg_last = info.reg_off_last[1];
666
+ host = info.page[1].host;
667
+
668
+ do {
669
+ uint64_t pg = vg[reg_off >> 6];
670
+ do {
671
+ if ((pg >> (reg_off & 63)) & 1) {
672
+ host_fn(za, reg_off, host + reg_off);
673
+ }
674
+ reg_off += 1 << esz;
675
+ } while (reg_off & 63);
676
+ } while (reg_off <= reg_last);
677
+ }
678
+}
679
+
680
+static inline QEMU_ALWAYS_INLINE
681
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
682
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
683
+ sve_ldst1_host_fn *host_fn,
684
+ sve_ldst1_tlb_fn *tlb_fn)
685
+{
686
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
687
+ int bit55 = extract64(addr, 55, 1);
688
+
689
+ /* Remove mtedesc from the normal sve descriptor. */
690
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
691
+
692
+ /* Perform gross MTE suppression early. */
693
+ if (!tbi_check(desc, bit55) ||
694
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
695
+ mtedesc = 0;
696
+ }
697
+
698
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
699
+ vertical, host_fn, tlb_fn);
700
+}
701
+
702
+#define DO_ST(L, END, ESZ) \
703
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
704
+ target_ulong addr, uint32_t desc) \
705
+{ \
706
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
707
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
708
+} \
709
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
710
+ target_ulong addr, uint32_t desc) \
711
+{ \
712
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
713
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
714
+} \
715
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
716
+ target_ulong addr, uint32_t desc) \
717
+{ \
718
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
719
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
720
+} \
721
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
722
+ target_ulong addr, uint32_t desc) \
723
+{ \
724
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
725
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
726
+}
727
+
728
+DO_ST(b, , MO_8)
729
+DO_ST(h, _be, MO_16)
730
+DO_ST(h, _le, MO_16)
731
+DO_ST(s, _be, MO_32)
732
+DO_ST(s, _le, MO_32)
733
+DO_ST(d, _be, MO_64)
734
+DO_ST(d, _le, MO_64)
735
+DO_ST(q, _be, MO_128)
736
+DO_ST(q, _le, MO_128)
737
+
738
+#undef DO_ST
739
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/target/arm/translate-sme.c
742
+++ b/target/arm/translate-sme.c
743
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
744
745
return true;
746
}
747
+
748
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
749
+{
750
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
751
+
752
+ /*
753
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
754
+ * also the order in which the elements appear in the function names,
755
+ * and so how we must concatenate the pieces.
756
+ */
757
+
758
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
759
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
760
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
761
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
762
+
763
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
764
+ FN_END(b, b),
765
+ FN_END(h_le, h_be),
766
+ FN_END(s_le, s_be),
767
+ FN_END(d_le, d_be),
768
+ FN_END(q_le, q_be),
769
+ };
770
+
771
+#undef FN_LS
772
+#undef FN_MTE
773
+#undef FN_HV
774
+#undef FN_END
775
+
776
+ TCGv_ptr t_za, t_pg;
777
+ TCGv_i64 addr;
778
+ int svl, desc = 0;
779
+ bool be = s->be_data == MO_BE;
780
+ bool mte = s->mte_active[0];
781
+
782
+ if (!dc_isar_feature(aa64_sme, s)) {
783
+ return false;
784
+ }
785
+ if (!sme_smza_enabled_check(s)) {
786
+ return true;
787
+ }
788
+
789
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
790
+ t_pg = pred_full_reg_ptr(s, a->pg);
791
+ addr = tcg_temp_new_i64();
792
+
793
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
794
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
795
+
796
+ if (mte) {
797
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
798
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
799
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
800
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
801
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
802
+ desc <<= SVE_MTEDESC_SHIFT;
803
+ } else {
804
+ addr = clean_data_tbi(s, addr);
805
+ }
806
+ svl = streaming_vec_reg_size(s);
807
+ desc = simd_desc(svl, svl, desc);
808
+
809
+ fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
810
+ tcg_constant_i32(desc));
811
+
812
+ tcg_temp_free_ptr(t_za);
813
+ tcg_temp_free_ptr(t_pg);
814
+ tcg_temp_free_i64(addr);
815
+ return true;
816
+}
132
--
817
--
133
2.20.1
818
2.25.1
134
135
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Included creation of ITS as part of virt platform GIC
3
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
4
initialization. This Emulated ITS model now co-exists with kvm
4
We will reuse this for SME save and restore array insns.
5
ITS and is enabled in absence of kvm irq kernel support in a
6
platform.
7
5
8
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Message-id: 20210910143951.92242-9-shashi.mallela@linaro.org
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-22-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
include/hw/arm/virt.h | 2 ++
11
target/arm/translate-a64.h | 3 +++
14
target/arm/kvm_arm.h | 4 ++--
12
target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++----------
15
hw/arm/virt.c | 29 +++++++++++++++++++++++++++--
13
2 files changed, 39 insertions(+), 12 deletions(-)
16
3 files changed, 31 insertions(+), 4 deletions(-)
17
14
18
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
15
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/arm/virt.h
17
--- a/target/arm/translate-a64.h
21
+++ b/include/hw/arm/virt.h
18
+++ b/target/arm/translate-a64.h
22
@@ -XXX,XX +XXX,XX @@ struct VirtMachineClass {
19
@@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
23
MachineClass parent;
20
uint32_t rm_ofs, int64_t shift,
24
bool disallow_affinity_adjustment;
21
uint32_t opr_sz, uint32_t max_sz);
25
bool no_its;
22
26
+ bool no_tcg_its;
23
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
27
bool no_pmu;
24
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
28
bool claim_edge_triggered_timers;
25
+
29
bool smbios_old_sys_ver;
26
#endif /* TARGET_ARM_TRANSLATE_A64_H */
30
@@ -XXX,XX +XXX,XX @@ struct VirtMachineState {
27
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
31
bool highmem;
32
bool highmem_ecam;
33
bool its;
34
+ bool tcg_its;
35
bool virt;
36
bool ras;
37
bool mte;
38
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
39
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
40
--- a/target/arm/kvm_arm.h
29
--- a/target/arm/translate-sve.c
41
+++ b/target/arm/kvm_arm.h
30
+++ b/target/arm/translate-sve.c
42
@@ -XXX,XX +XXX,XX @@ static inline const char *its_class_name(void)
31
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
43
/* KVM implementation requires this capability */
32
* The load should begin at the address Rn + IMM.
44
return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL;
33
*/
45
} else {
34
46
- /* Software emulation is not implemented yet */
35
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
47
- return NULL;
36
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
48
+ /* Software emulation based model */
37
+ int len, int rn, int imm)
49
+ return "arm-gicv3-its";
38
{
39
int len_align = QEMU_ALIGN_DOWN(len, 8);
40
int len_remain = len % 8;
41
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
42
t0 = tcg_temp_new_i64();
43
for (i = 0; i < len_align; i += 8) {
44
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
45
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
46
+ tcg_gen_st_i64(t0, base, vofs + i);
47
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
48
}
49
tcg_temp_free_i64(t0);
50
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
51
clean_addr = new_tmp_a64_local(s);
52
tcg_gen_mov_i64(clean_addr, t0);
53
54
+ if (base != cpu_env) {
55
+ TCGv_ptr b = tcg_temp_local_new_ptr();
56
+ tcg_gen_mov_ptr(b, base);
57
+ base = b;
58
+ }
59
+
60
gen_set_label(loop);
61
62
t0 = tcg_temp_new_i64();
63
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
64
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
65
66
tp = tcg_temp_new_ptr();
67
- tcg_gen_add_ptr(tp, cpu_env, i);
68
+ tcg_gen_add_ptr(tp, base, i);
69
tcg_gen_addi_ptr(i, i, 8);
70
tcg_gen_st_i64(t0, tp, vofs);
71
tcg_temp_free_ptr(tp);
72
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
73
74
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
75
tcg_temp_free_ptr(i);
76
+
77
+ if (base != cpu_env) {
78
+ tcg_temp_free_ptr(base);
79
+ assert(len_remain == 0);
80
+ }
81
}
82
83
/*
84
@@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
85
default:
86
g_assert_not_reached();
87
}
88
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
89
+ tcg_gen_st_i64(t0, base, vofs + len_align);
90
tcg_temp_free_i64(t0);
50
}
91
}
51
}
92
}
52
93
53
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
94
/* Similarly for stores. */
54
index XXXXXXX..XXXXXXX 100644
95
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
55
--- a/hw/arm/virt.c
96
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
56
+++ b/hw/arm/virt.c
97
+ int len, int rn, int imm)
57
@@ -XXX,XX +XXX,XX @@ static void create_its(VirtMachineState *vms)
58
const char *itsclass = its_class_name();
59
DeviceState *dev;
60
61
+ if (!strcmp(itsclass, "arm-gicv3-its")) {
62
+ if (!vms->tcg_its) {
63
+ itsclass = NULL;
64
+ }
65
+ }
66
+
67
if (!itsclass) {
68
/* Do nothing if not supported */
69
return;
70
@@ -XXX,XX +XXX,XX @@ static void create_v2m(VirtMachineState *vms)
71
vms->msi_controller = VIRT_MSI_CTRL_GICV2M;
72
}
73
74
-static void create_gic(VirtMachineState *vms)
75
+static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
76
{
98
{
77
MachineState *ms = MACHINE(vms);
99
int len_align = QEMU_ALIGN_DOWN(len, 8);
78
/* We create a standalone GIC */
100
int len_remain = len % 8;
79
@@ -XXX,XX +XXX,XX @@ static void create_gic(VirtMachineState *vms)
101
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
80
nb_redist_regions);
102
81
qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", redist0_count);
103
t0 = tcg_temp_new_i64();
82
104
for (i = 0; i < len_align; i += 8) {
83
+ if (!kvm_irqchip_in_kernel()) {
105
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
84
+ if (vms->tcg_its) {
106
+ tcg_gen_ld_i64(t0, base, vofs + i);
85
+ object_property_set_link(OBJECT(vms->gic), "sysmem",
107
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
86
+ OBJECT(mem), &error_fatal);
108
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
87
+ qdev_prop_set_bit(vms->gic, "has-lpi", true);
109
}
88
+ }
110
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
111
clean_addr = new_tmp_a64_local(s);
112
tcg_gen_mov_i64(clean_addr, t0);
113
114
+ if (base != cpu_env) {
115
+ TCGv_ptr b = tcg_temp_local_new_ptr();
116
+ tcg_gen_mov_ptr(b, base);
117
+ base = b;
89
+ }
118
+ }
90
+
119
+
91
if (nb_redist_regions == 2) {
120
gen_set_label(loop);
92
uint32_t redist1_capacity =
121
93
vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE;
122
t0 = tcg_temp_new_i64();
94
@@ -XXX,XX +XXX,XX @@ static void machvirt_init(MachineState *machine)
123
tp = tcg_temp_new_ptr();
95
124
- tcg_gen_add_ptr(tp, cpu_env, i);
96
virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
125
+ tcg_gen_add_ptr(tp, base, i);
97
126
tcg_gen_ld_i64(t0, tp, vofs);
98
- create_gic(vms);
127
tcg_gen_addi_ptr(i, i, 8);
99
+ create_gic(vms, sysmem);
128
tcg_temp_free_ptr(tp);
100
129
@@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
101
virt_cpu_post_init(vms, sysmem);
130
102
131
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
103
@@ -XXX,XX +XXX,XX @@ static void virt_instance_init(Object *obj)
132
tcg_temp_free_ptr(i);
104
} else {
105
/* Default allows ITS instantiation */
106
vms->its = true;
107
+
133
+
108
+ if (vmc->no_tcg_its) {
134
+ if (base != cpu_env) {
109
+ vms->tcg_its = false;
135
+ tcg_temp_free_ptr(base);
110
+ } else {
136
+ assert(len_remain == 0);
111
+ vms->tcg_its = true;
112
+ }
137
+ }
113
}
138
}
114
139
115
/* Default disallows iommu instantiation */
140
/* Predicate register stores can be any multiple of 2. */
116
@@ -XXX,XX +XXX,XX @@ DEFINE_VIRT_MACHINE_AS_LATEST(6, 2)
141
if (len_remain) {
117
142
t0 = tcg_temp_new_i64();
118
static void virt_machine_6_1_options(MachineClass *mc)
143
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
119
{
144
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
120
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
145
121
+
146
switch (len_remain) {
122
virt_machine_6_2_options(mc);
147
case 2:
123
compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
148
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
124
+
149
if (sve_access_check(s)) {
125
+ /* qemu ITS was introduced with 6.2 */
150
int size = vec_full_reg_size(s);
126
+ vmc->no_tcg_its = true;
151
int off = vec_full_reg_offset(s, a->rd);
152
- do_ldr(s, off, size, a->rn, a->imm * size);
153
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
154
}
155
return true;
127
}
156
}
128
DEFINE_VIRT_MACHINE(6, 1)
157
@@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
129
158
if (sve_access_check(s)) {
159
int size = pred_full_reg_size(s);
160
int off = pred_full_reg_offset(s, a->rd);
161
- do_ldr(s, off, size, a->rn, a->imm * size);
162
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
163
}
164
return true;
165
}
166
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
167
if (sve_access_check(s)) {
168
int size = vec_full_reg_size(s);
169
int off = vec_full_reg_offset(s, a->rd);
170
- do_str(s, off, size, a->rn, a->imm * size);
171
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
172
}
173
return true;
174
}
175
@@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
176
if (sve_access_check(s)) {
177
int size = pred_full_reg_size(s);
178
int off = pred_full_reg_offset(s, a->rd);
179
- do_str(s, off, size, a->rn, a->imm * size);
180
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
181
}
182
return true;
183
}
130
--
184
--
131
2.20.1
185
2.25.1
132
133
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Currently the clock/reset check is done in uart_receive(), but we
3
We can reuse the SVE functions for LDR and STR, passing in the
4
can move the check to uart_can_receive() which is earlier.
4
base of the ZA vector and a zero offset.
5
5
6
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-23-richard.henderson@linaro.org
9
Message-id: 20210901124521.30599-4-bmeng.cn@gmail.com
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
10
---
12
hw/char/cadence_uart.c | 17 ++++++++++-------
11
target/arm/sme.decode | 7 +++++++
13
1 file changed, 10 insertions(+), 7 deletions(-)
12
target/arm/translate-sme.c | 24 ++++++++++++++++++++++++
13
2 files changed, 31 insertions(+)
14
14
15
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
15
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/char/cadence_uart.c
17
--- a/target/arm/sme.decode
18
+++ b/hw/char/cadence_uart.c
18
+++ b/target/arm/sme.decode
19
@@ -XXX,XX +XXX,XX @@ static void uart_parameters_setup(CadenceUARTState *s)
19
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
20
static int uart_can_receive(void *opaque)
20
&ldst rs=%mova_rs
21
{
21
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
22
CadenceUARTState *s = opaque;
22
&ldst esz=4 rs=%mova_rs
23
- int ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE);
24
- uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE;
25
+ int ret;
26
+ uint32_t ch_mode;
27
+
23
+
28
+ /* ignore characters when unclocked or in reset */
24
+&ldstr rv rn imm
29
+ if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
25
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
30
+ return 0;
26
+ &ldstr rv=%mova_rs
27
+
28
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
29
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
30
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/translate-sme.c
33
+++ b/target/arm/translate-sme.c
34
@@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
35
tcg_temp_free_i64(addr);
36
return true;
37
}
38
+
39
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
40
+
41
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
42
+{
43
+ int svl = streaming_vec_reg_size(s);
44
+ int imm = a->imm;
45
+ TCGv_ptr base;
46
+
47
+ if (!sme_za_enabled_check(s)) {
48
+ return true;
31
+ }
49
+ }
32
+
50
+
33
+ ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE);
51
+ /* ZA[n] equates to ZA0H.B[n]. */
34
+ ch_mode = s->r[R_MR] & UART_MR_CHMODE;
52
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
35
53
+
36
if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) {
54
+ fn(s, base, 0, svl, a->rn, imm * svl);
37
ret = MIN(ret, CADENCE_UART_RX_FIFO_SIZE - s->rx_count);
55
+
38
@@ -XXX,XX +XXX,XX @@ static void uart_receive(void *opaque, const uint8_t *buf, int size)
56
+ tcg_temp_free_ptr(base);
39
CadenceUARTState *s = opaque;
57
+ return true;
40
uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE;
58
+}
41
59
+
42
- /* ignore characters when unclocked or in reset */
60
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
43
- if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
61
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
44
- return;
45
- }
46
-
47
if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) {
48
uart_write_rx_fifo(opaque, buf, size);
49
}
50
--
62
--
51
2.20.1
63
2.25.1
52
53
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Implemented lpi processing at redistributor to get lpi config info
4
from lpi configuration table,determine priority,set pending state in
5
lpi pending table and forward the lpi to cpuif.Added logic to invoke
6
redistributor lpi processing with translated LPI which set/clear LPI
7
from ITS device as part of ITS INT,CLEAR,DISCARD command and
8
GITS_TRANSLATER processing.
9
10
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
11
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Message-id: 20210910143951.92242-7-shashi.mallela@linaro.org
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
7
---
16
hw/intc/gicv3_internal.h | 9 ++
8
target/arm/helper-sme.h | 5 +++
17
include/hw/intc/arm_gicv3_common.h | 7 ++
9
target/arm/sme.decode | 11 +++++
18
hw/intc/arm_gicv3.c | 14 +++
10
target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++
19
hw/intc/arm_gicv3_common.c | 1 +
11
target/arm/translate-sme.c | 31 +++++++++++++
20
hw/intc/arm_gicv3_cpuif.c | 7 +-
12
4 files changed, 137 insertions(+)
21
hw/intc/arm_gicv3_its.c | 23 +++++
22
hw/intc/arm_gicv3_redist.c | 141 +++++++++++++++++++++++++++++
23
7 files changed, 200 insertions(+), 2 deletions(-)
24
13
25
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
26
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
27
--- a/hw/intc/gicv3_internal.h
16
--- a/target/arm/helper-sme.h
28
+++ b/hw/intc/gicv3_internal.h
17
+++ b/target/arm/helper-sme.h
29
@@ -XXX,XX +XXX,XX @@ FIELD(GICR_PENDBASER, PHYADDR, 16, 36)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
30
FIELD(GICR_PENDBASER, OUTERCACHE, 56, 3)
19
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
31
FIELD(GICR_PENDBASER, PTZ, 62, 1)
20
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
32
21
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
33
+#define GICR_PROPBASER_IDBITS_THRESHOLD 0xd
34
+
22
+
35
#define ICC_CTLR_EL1_CBPR (1U << 0)
23
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
36
#define ICC_CTLR_EL1_EOIMODE (1U << 1)
24
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
37
#define ICC_CTLR_EL1_PMHE (1U << 6)
25
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
38
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1)
26
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
39
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
40
#define L1TABLE_ENTRY_SIZE 8
28
index XXXXXXX..XXXXXXX 100644
41
29
--- a/target/arm/sme.decode
42
+#define LPI_CTE_ENABLED TABLE_ENTRY_VALID_MASK
30
+++ b/target/arm/sme.decode
43
+#define LPI_PRIORITY_MASK 0xfc
31
@@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
32
33
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
34
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
44
+
35
+
45
#define GITS_CMDQ_ENTRY_SIZE 32
36
+### SME Add Vector to Array
46
#define NUM_BYTES_IN_DW 8
37
+
47
38
+&adda zad zn pm pn
48
@@ -XXX,XX +XXX,XX @@ FIELD(MAPC, RDBASE, 16, 32)
39
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
49
* Valid = 1 bit,RDBase = 36 bits(considering max RDBASE)
40
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
50
*/
41
+
51
#define GITS_CTE_SIZE (0x8ULL)
42
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
52
+#define GITS_CTE_RDBASE_PROCNUM_MASK MAKE_64BIT_MASK(1, RDBASE_PROCNUM_LENGTH)
43
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
53
44
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
54
/* Special interrupt IDs */
45
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
55
#define INTID_SECURE 1020
46
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
56
@@ -XXX,XX +XXX,XX @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
57
unsigned size, MemTxAttrs attrs);
58
void gicv3_dist_set_irq(GICv3State *s, int irq, int level);
59
void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level);
60
+void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level);
61
+void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level);
62
+void gicv3_redist_update_lpi(GICv3CPUState *cs);
63
void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns);
64
void gicv3_init_cpuif(GICv3State *s);
65
66
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
67
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
68
--- a/include/hw/intc/arm_gicv3_common.h
48
--- a/target/arm/sme_helper.c
69
+++ b/include/hw/intc/arm_gicv3_common.h
49
+++ b/target/arm/sme_helper.c
70
@@ -XXX,XX +XXX,XX @@ struct GICv3CPUState {
50
@@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128)
71
* real state above; it doesn't need to be migrated.
51
DO_ST(q, _le, MO_128)
72
*/
52
73
PendingIrq hppi;
53
#undef DO_ST
74
+
54
+
75
+ /*
55
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
76
+ * Cached information recalculated from LPI tables
56
+ void *vpm, uint32_t desc)
77
+ * in guest memory
57
+{
78
+ */
58
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
79
+ PendingIrq hpplpi;
59
+ uint64_t *pn = vpn, *pm = vpm;
60
+ uint32_t *zda = vzda, *zn = vzn;
80
+
61
+
81
/* This is temporary working state, to avoid a malloc in gicv3_update() */
62
+ for (row = 0; row < oprsz; ) {
82
bool seenbetter;
63
+ uint64_t pa = pn[row >> 4];
83
};
64
+ do {
84
diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
65
+ if (pa & 1) {
85
index XXXXXXX..XXXXXXX 100644
66
+ for (col = 0; col < oprsz; ) {
86
--- a/hw/intc/arm_gicv3.c
67
+ uint64_t pb = pm[col >> 4];
87
+++ b/hw/intc/arm_gicv3.c
68
+ do {
88
@@ -XXX,XX +XXX,XX @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs)
69
+ if (pb & 1) {
89
cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq);
70
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
90
}
71
+ }
91
72
+ pb >>= 4;
92
+ if ((cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) && cs->gic->lpi_enable &&
73
+ } while (++col & 15);
93
+ (cs->hpplpi.prio != 0xff)) {
74
+ }
94
+ if (irqbetter(cs, cs->hpplpi.irq, cs->hpplpi.prio)) {
95
+ cs->hppi.irq = cs->hpplpi.irq;
96
+ cs->hppi.prio = cs->hpplpi.prio;
97
+ cs->hppi.grp = cs->hpplpi.grp;
98
+ seenbetter = true;
99
+ }
100
+ }
101
+
102
/* If the best interrupt we just found would preempt whatever
103
* was the previous best interrupt before this update, then
104
* we know it's definitely the best one now.
105
@@ -XXX,XX +XXX,XX @@ static void gicv3_set_irq(void *opaque, int irq, int level)
106
107
static void arm_gicv3_post_load(GICv3State *s)
108
{
109
+ int i;
110
/* Recalculate our cached idea of the current highest priority
111
* pending interrupt, but don't set IRQ or FIQ lines.
112
*/
113
+ for (i = 0; i < s->num_cpu; i++) {
114
+ gicv3_redist_update_lpi(&s->cpu[i]);
115
+ }
116
gicv3_full_update_noirqset(s);
117
/* Repopulate the cache of GICv3CPUState pointers for target CPUs */
118
gicv3_cache_all_target_cpustates(s);
119
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/hw/intc/arm_gicv3_common.c
122
+++ b/hw/intc/arm_gicv3_common.c
123
@@ -XXX,XX +XXX,XX @@ static void arm_gicv3_common_reset(DeviceState *dev)
124
memset(cs->gicr_ipriorityr, 0, sizeof(cs->gicr_ipriorityr));
125
126
cs->hppi.prio = 0xff;
127
+ cs->hpplpi.prio = 0xff;
128
129
/* State in the CPU interface must *not* be reset here, because it
130
* is part of the CPU's reset domain, not the GIC device's.
131
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
132
index XXXXXXX..XXXXXXX 100644
133
--- a/hw/intc/arm_gicv3_cpuif.c
134
+++ b/hw/intc/arm_gicv3_cpuif.c
135
@@ -XXX,XX +XXX,XX @@ static void icc_activate_irq(GICv3CPUState *cs, int irq)
136
cs->gicr_iactiver0 = deposit32(cs->gicr_iactiver0, irq, 1, 1);
137
cs->gicr_ipendr0 = deposit32(cs->gicr_ipendr0, irq, 1, 0);
138
gicv3_redist_update(cs);
139
- } else {
140
+ } else if (irq < GICV3_LPI_INTID_START) {
141
gicv3_gicd_active_set(cs->gic, irq);
142
gicv3_gicd_pending_clear(cs->gic, irq);
143
gicv3_update(cs->gic, irq, 1);
144
+ } else {
145
+ gicv3_redist_lpi_pending(cs, irq, 0);
146
}
147
}
148
149
@@ -XXX,XX +XXX,XX @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
150
trace_gicv3_icc_eoir_write(is_eoir0 ? 0 : 1,
151
gicv3_redist_affid(cs), value);
152
153
- if (irq >= cs->gic->num_irq) {
154
+ if ((irq >= cs->gic->num_irq) &&
155
+ !(cs->gic->lpi_enable && (irq >= GICV3_LPI_INTID_START))) {
156
/* This handles two cases:
157
* 1. If software writes the ID of a spurious interrupt [ie 1020-1023]
158
* to the GICC_EOIR, the GIC ignores that write.
159
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/hw/intc/arm_gicv3_its.c
162
+++ b/hw/intc/arm_gicv3_its.c
163
@@ -XXX,XX +XXX,XX @@ static bool process_its_cmd(GICv3ITSState *s, uint64_t value, uint32_t offset,
164
uint64_t cte = 0;
165
bool cte_valid = false;
166
bool result = false;
167
+ uint64_t rdbase;
168
169
if (cmd == NONE) {
170
devid = offset;
171
@@ -XXX,XX +XXX,XX @@ static bool process_its_cmd(GICv3ITSState *s, uint64_t value, uint32_t offset,
172
* Current implementation only supports rdbase == procnum
173
* Hence rdbase physical address is ignored
174
*/
175
+ rdbase = (cte & GITS_CTE_RDBASE_PROCNUM_MASK) >> 1U;
176
+
177
+ if (rdbase > s->gicv3->num_cpu) {
178
+ return result;
179
+ }
180
+
181
+ if ((cmd == CLEAR) || (cmd == DISCARD)) {
182
+ gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 0);
183
+ } else {
184
+ gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 1);
185
+ }
186
+
187
if (cmd == DISCARD) {
188
IteEntry ite = {};
189
/* remove mapping from interrupt translation table */
190
@@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s)
191
MemTxResult res = MEMTX_OK;
192
bool result = true;
193
uint8_t cmd;
194
+ int i;
195
196
if (!(s->ctlr & ITS_CTLR_ENABLED)) {
197
return;
198
@@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s)
199
break;
200
case GITS_CMD_INV:
201
case GITS_CMD_INVALL:
202
+ /*
203
+ * Current implementation doesn't cache any ITS tables,
204
+ * but the calculated lpi priority information. We only
205
+ * need to trigger lpi priority re-calculation to be in
206
+ * sync with LPI config table or pending table changes.
207
+ */
208
+ for (i = 0; i < s->gicv3->num_cpu; i++) {
209
+ gicv3_redist_update_lpi(&s->gicv3->cpu[i]);
210
+ }
75
+ }
211
break;
76
+ pa >>= 4;
212
default:
77
+ } while (++row & 15);
213
break;
214
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
215
index XXXXXXX..XXXXXXX 100644
216
--- a/hw/intc/arm_gicv3_redist.c
217
+++ b/hw/intc/arm_gicv3_redist.c
218
@@ -XXX,XX +XXX,XX @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
219
if (cs->gicr_typer & GICR_TYPER_PLPIS) {
220
if (value & GICR_CTLR_ENABLE_LPIS) {
221
cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS;
222
+ /* Check for any pending interr in pending table */
223
+ gicv3_redist_update_lpi(cs);
224
+ gicv3_redist_update(cs);
225
} else {
226
cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS;
227
}
228
@@ -XXX,XX +XXX,XX @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
229
return r;
230
}
231
232
+static void gicv3_redist_check_lpi_priority(GICv3CPUState *cs, int irq)
233
+{
234
+ AddressSpace *as = &cs->gic->dma_as;
235
+ uint64_t lpict_baddr;
236
+ uint8_t lpite;
237
+ uint8_t prio;
238
+
239
+ lpict_baddr = cs->gicr_propbaser & R_GICR_PROPBASER_PHYADDR_MASK;
240
+
241
+ address_space_read(as, lpict_baddr + ((irq - GICV3_LPI_INTID_START) *
242
+ sizeof(lpite)), MEMTXATTRS_UNSPECIFIED, &lpite,
243
+ sizeof(lpite));
244
+
245
+ if (!(lpite & LPI_CTE_ENABLED)) {
246
+ return;
247
+ }
248
+
249
+ if (cs->gic->gicd_ctlr & GICD_CTLR_DS) {
250
+ prio = lpite & LPI_PRIORITY_MASK;
251
+ } else {
252
+ prio = ((lpite & LPI_PRIORITY_MASK) >> 1) | 0x80;
253
+ }
254
+
255
+ if ((prio < cs->hpplpi.prio) ||
256
+ ((prio == cs->hpplpi.prio) && (irq <= cs->hpplpi.irq))) {
257
+ cs->hpplpi.irq = irq;
258
+ cs->hpplpi.prio = prio;
259
+ /* LPIs are always non-secure Grp1 interrupts */
260
+ cs->hpplpi.grp = GICV3_G1NS;
261
+ }
78
+ }
262
+}
79
+}
263
+
80
+
264
+void gicv3_redist_update_lpi(GICv3CPUState *cs)
81
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
82
+ void *vpm, uint32_t desc)
265
+{
83
+{
266
+ /*
84
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
267
+ * This function scans the LPI pending table and for each pending
85
+ uint8_t *pn = vpn, *pm = vpm;
268
+ * LPI, reads the corresponding entry from LPI configuration table
86
+ uint64_t *zda = vzda, *zn = vzn;
269
+ * to extract the priority info and determine if the current LPI
270
+ * priority is lower than the last computed high priority lpi interrupt.
271
+ * If yes, replace current LPI as the new high priority lpi interrupt.
272
+ */
273
+ AddressSpace *as = &cs->gic->dma_as;
274
+ uint64_t lpipt_baddr;
275
+ uint32_t pendt_size = 0;
276
+ uint8_t pend;
277
+ int i, bit;
278
+ uint64_t idbits;
279
+
87
+
280
+ idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS),
88
+ for (row = 0; row < oprsz; ++row) {
281
+ GICD_TYPER_IDBITS);
89
+ if (pn[H1(row)] & 1) {
282
+
90
+ for (col = 0; col < oprsz; ++col) {
283
+ if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser ||
91
+ if (pm[H1(col)] & 1) {
284
+ !cs->gicr_pendbaser) {
92
+ zda[tile_vslice_index(row) + col] += zn[col];
285
+ return;
93
+ }
286
+ }
94
+ }
287
+
288
+ cs->hpplpi.prio = 0xff;
289
+
290
+ lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK;
291
+
292
+ /* Determine the highest priority pending interrupt among LPIs */
293
+ pendt_size = (1ULL << (idbits + 1));
294
+
295
+ for (i = GICV3_LPI_INTID_START / 8; i < pendt_size / 8; i++) {
296
+ address_space_read(as, lpipt_baddr + i, MEMTXATTRS_UNSPECIFIED, &pend,
297
+ sizeof(pend));
298
+
299
+ while (pend) {
300
+ bit = ctz32(pend);
301
+ gicv3_redist_check_lpi_priority(cs, i * 8 + bit);
302
+ pend &= ~(1 << bit);
303
+ }
95
+ }
304
+ }
96
+ }
305
+}
97
+}
306
+
98
+
307
+void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level)
99
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
100
+ void *vpm, uint32_t desc)
308
+{
101
+{
309
+ /*
102
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
310
+ * This function updates the pending bit in lpi pending table for
103
+ uint64_t *pn = vpn, *pm = vpm;
311
+ * the irq being activated or deactivated.
104
+ uint32_t *zda = vzda, *zn = vzn;
312
+ */
313
+ AddressSpace *as = &cs->gic->dma_as;
314
+ uint64_t lpipt_baddr;
315
+ bool ispend = false;
316
+ uint8_t pend;
317
+
105
+
318
+ /*
106
+ for (row = 0; row < oprsz; ) {
319
+ * get the bit value corresponding to this irq in the
107
+ uint64_t pa = pn[row >> 4];
320
+ * lpi pending table
108
+ do {
321
+ */
109
+ if (pa & 1) {
322
+ lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK;
110
+ uint32_t zn_row = zn[H4(row)];
111
+ for (col = 0; col < oprsz; ) {
112
+ uint64_t pb = pm[col >> 4];
113
+ do {
114
+ if (pb & 1) {
115
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
116
+ }
117
+ pb >>= 4;
118
+ } while (++col & 15);
119
+ }
120
+ }
121
+ pa >>= 4;
122
+ } while (++row & 15);
123
+ }
124
+}
323
+
125
+
324
+ address_space_read(as, lpipt_baddr + ((irq / 8) * sizeof(pend)),
126
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
325
+ MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend));
127
+ void *vpm, uint32_t desc)
128
+{
129
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
130
+ uint8_t *pn = vpn, *pm = vpm;
131
+ uint64_t *zda = vzda, *zn = vzn;
326
+
132
+
327
+ ispend = extract32(pend, irq % 8, 1);
133
+ for (row = 0; row < oprsz; ++row) {
328
+
134
+ if (pn[H1(row)] & 1) {
329
+ /* no change in the value of pending bit, return */
135
+ uint64_t zn_row = zn[row];
330
+ if (ispend == level) {
136
+ for (col = 0; col < oprsz; ++col) {
331
+ return;
137
+ if (pm[H1(col)] & 1) {
332
+ }
138
+ zda[tile_vslice_index(row) + col] += zn_row;
333
+ pend = deposit32(pend, irq % 8, 1, level ? 1 : 0);
139
+ }
334
+
140
+ }
335
+ address_space_write(as, lpipt_baddr + ((irq / 8) * sizeof(pend)),
336
+ MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend));
337
+
338
+ /*
339
+ * check if this LPI is better than the current hpplpi, if yes
340
+ * just set hpplpi.prio and .irq without doing a full rescan
341
+ */
342
+ if (level) {
343
+ gicv3_redist_check_lpi_priority(cs, irq);
344
+ } else {
345
+ if (irq == cs->hpplpi.irq) {
346
+ gicv3_redist_update_lpi(cs);
347
+ }
141
+ }
348
+ }
142
+ }
349
+}
143
+}
144
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/target/arm/translate-sme.c
147
+++ b/target/arm/translate-sme.c
148
@@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
149
150
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
151
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
350
+
152
+
351
+void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level)
153
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
154
+ gen_helper_gvec_4 *fn)
352
+{
155
+{
353
+ uint64_t idbits;
156
+ int svl = streaming_vec_reg_size(s);
157
+ uint32_t desc = simd_desc(svl, svl, 0);
158
+ TCGv_ptr za, zn, pn, pm;
354
+
159
+
355
+ idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS),
160
+ if (!sme_smza_enabled_check(s)) {
356
+ GICD_TYPER_IDBITS);
161
+ return true;
357
+
358
+ if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser ||
359
+ !cs->gicr_pendbaser || (irq > (1ULL << (idbits + 1)) - 1) ||
360
+ irq < GICV3_LPI_INTID_START) {
361
+ return;
362
+ }
162
+ }
363
+
163
+
364
+ /* set/clear the pending bit for this irq */
164
+ /* Sum XZR+zad to find ZAd. */
365
+ gicv3_redist_lpi_pending(cs, irq, level);
165
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
166
+ zn = vec_full_reg_ptr(s, a->zn);
167
+ pn = pred_full_reg_ptr(s, a->pn);
168
+ pm = pred_full_reg_ptr(s, a->pm);
366
+
169
+
367
+ gicv3_redist_update(cs);
170
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
171
+
172
+ tcg_temp_free_ptr(za);
173
+ tcg_temp_free_ptr(zn);
174
+ tcg_temp_free_ptr(pn);
175
+ tcg_temp_free_ptr(pm);
176
+ return true;
368
+}
177
+}
369
+
178
+
370
void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level)
179
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
371
{
180
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
372
/* Update redistributor state for a change in an external PPI input line */
181
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
182
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
373
--
183
--
374
2.20.1
184
2.25.1
375
376
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Added ITS command queue handling for MAPTI,MAPI commands,handled ITS
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
translation which triggers an LPI via INT command as well as write
4
Message-id: 20220708151540.18136-25-richard.henderson@linaro.org
5
to GITS_TRANSLATER register,defined enum to differentiate between ITS
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
command interrupt trigger and GITS_TRANSLATER based interrupt trigger.
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Each of these commands make use of other functionalities implemented to
7
---
8
get device table entry,collection table entry or interrupt translation
8
target/arm/helper-sme.h | 5 +++
9
table entry required for their processing.
9
target/arm/sme.decode | 9 +++++
10
target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 32 ++++++++++++++++++
12
4 files changed, 115 insertions(+)
10
13
11
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Message-id: 20210910143951.92242-5-shashi.mallela@linaro.org
14
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
---
16
hw/intc/gicv3_internal.h | 12 +
17
include/hw/intc/arm_gicv3_common.h | 2 +
18
hw/intc/arm_gicv3_its.c | 365 ++++++++++++++++++++++++++++-
19
3 files changed, 378 insertions(+), 1 deletion(-)
20
21
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
22
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/intc/gicv3_internal.h
16
--- a/target/arm/helper-sme.h
24
+++ b/hw/intc/gicv3_internal.h
17
+++ b/target/arm/helper-sme.h
25
@@ -XXX,XX +XXX,XX @@ FIELD(MAPC, RDBASE, 16, 32)
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
#define ITTADDR_MASK MAKE_64BIT_MASK(ITTADDR_SHIFT, ITTADDR_LENGTH)
19
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
27
#define SIZE_MASK 0x1f
20
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
28
21
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
29
+/* MAPI command fields */
30
+#define EVENTID_MASK ((1ULL << 32) - 1)
31
+
22
+
32
+/* MAPTI command fields */
23
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
33
+#define pINTID_SHIFT 32
24
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
34
+#define pINTID_MASK MAKE_64BIT_MASK(32, 32)
25
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/sme.decode
30
+++ b/target/arm/sme.decode
31
@@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
33
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
34
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
35
+
35
+
36
#define DEVID_SHIFT 32
36
+### SME Outer Product
37
#define DEVID_MASK MAKE_64BIT_MASK(32, 32)
37
+
38
38
+&op zad zn zm pm pn sub:bool
39
@@ -XXX,XX +XXX,XX @@ FIELD(MAPC, RDBASE, 16, 32)
39
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
40
* Values: | vPEID | ICID |
40
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
41
*/
41
+
42
#define ITS_ITT_ENTRY_SIZE 0xC
42
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
43
+#define ITE_ENTRY_INTTYPE_SHIFT 1
43
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
44
+#define ITE_ENTRY_INTID_SHIFT 2
44
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
45
+#define ITE_ENTRY_INTID_MASK MAKE_64BIT_MASK(2, 24)
46
+#define ITE_ENTRY_INTSP_SHIFT 26
47
+#define ITE_ENTRY_ICID_MASK MAKE_64BIT_MASK(0, 16)
48
49
/* 16 bits EventId */
50
#define ITS_IDBITS GICD_TYPER_IDBITS
51
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
52
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
53
--- a/include/hw/intc/arm_gicv3_common.h
46
--- a/target/arm/sme_helper.c
54
+++ b/include/hw/intc/arm_gicv3_common.h
47
+++ b/target/arm/sme_helper.c
55
@@ -XXX,XX +XXX,XX @@
48
@@ -XXX,XX +XXX,XX @@
56
#define GICV3_MAXIRQ 1020
49
#include "exec/cpu_ldst.h"
57
#define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL)
50
#include "exec/exec-all.h"
58
51
#include "qemu/int128.h"
59
+#define GICV3_LPI_INTID_START 8192
52
+#include "fpu/softfloat.h"
53
#include "vec_internal.h"
54
#include "sve_ldst_internal.h"
55
56
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
57
}
58
}
59
}
60
+
60
+
61
#define GICV3_REDIST_SIZE 0x20000
61
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
62
62
+ void *vpm, void *vst, uint32_t desc)
63
/* Number of SGI target-list bits */
63
+{
64
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
64
+ intptr_t row, col, oprsz = simd_maxsz(desc);
65
index XXXXXXX..XXXXXXX 100644
65
+ uint32_t neg = simd_data(desc) << 31;
66
--- a/hw/intc/arm_gicv3_its.c
66
+ uint16_t *pn = vpn, *pm = vpm;
67
+++ b/hw/intc/arm_gicv3_its.c
67
+ float_status fpst;
68
@@ -XXX,XX +XXX,XX @@ struct GICv3ITSClass {
69
void (*parent_reset)(DeviceState *dev);
70
};
71
72
+/*
73
+ * This is an internal enum used to distinguish between LPI triggered
74
+ * via command queue and LPI triggered via gits_translater write.
75
+ */
76
+typedef enum ItsCmdType {
77
+ NONE = 0, /* internal indication for GITS_TRANSLATER write */
78
+ CLEAR = 1,
79
+ DISCARD = 2,
80
+ INT = 3,
81
+} ItsCmdType;
82
+
68
+
83
+typedef struct {
69
+ /*
84
+ uint32_t iteh;
70
+ * Make a copy of float_status because this operation does not
85
+ uint64_t itel;
71
+ * update the cumulative fp exception status. It also produces
86
+} IteEntry;
72
+ * default nans.
73
+ */
74
+ fpst = *(float_status *)vst;
75
+ set_default_nan_mode(true, &fpst);
87
+
76
+
88
static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
77
+ for (row = 0; row < oprsz; ) {
89
{
78
+ uint16_t pa = pn[H2(row >> 4)];
90
uint64_t result = 0;
79
+ do {
91
@@ -XXX,XX +XXX,XX @@ static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
80
+ if (pa & 1) {
92
return result;
81
+ void *vza_row = vza + tile_vslice_offset(row);
93
}
82
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
94
95
+static bool get_cte(GICv3ITSState *s, uint16_t icid, uint64_t *cte,
96
+ MemTxResult *res)
97
+{
98
+ AddressSpace *as = &s->gicv3->dma_as;
99
+ uint64_t l2t_addr;
100
+ uint64_t value;
101
+ bool valid_l2t;
102
+ uint32_t l2t_id;
103
+ uint32_t max_l2_entries;
104
+
83
+
105
+ if (s->ct.indirect) {
84
+ for (col = 0; col < oprsz; ) {
106
+ l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
85
+ uint16_t pb = pm[H2(col >> 4)];
107
+
86
+ do {
108
+ value = address_space_ldq_le(as,
87
+ if (pb & 1) {
109
+ s->ct.base_addr +
88
+ uint32_t *a = vza_row + H1_4(col);
110
+ (l2t_id * L1TABLE_ENTRY_SIZE),
89
+ uint32_t *m = vzm + H1_4(col);
111
+ MEMTXATTRS_UNSPECIFIED, res);
90
+ *a = float32_muladd(n, *m, *a, 0, vst);
112
+
91
+ }
113
+ if (*res == MEMTX_OK) {
92
+ col += 4;
114
+ valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
93
+ pb >>= 4;
115
+
94
+ } while (col & 15);
116
+ if (valid_l2t) {
95
+ }
117
+ max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
96
+ }
118
+
97
+ row += 4;
119
+ l2t_addr = value & ((1ULL << 51) - 1);
98
+ pa >>= 4;
120
+
99
+ } while (row & 15);
121
+ *cte = address_space_ldq_le(as, l2t_addr +
122
+ ((icid % max_l2_entries) * GITS_CTE_SIZE),
123
+ MEMTXATTRS_UNSPECIFIED, res);
124
+ }
125
+ }
126
+ } else {
127
+ /* Flat level table */
128
+ *cte = address_space_ldq_le(as, s->ct.base_addr +
129
+ (icid * GITS_CTE_SIZE),
130
+ MEMTXATTRS_UNSPECIFIED, res);
131
+ }
132
+
133
+ return (*cte & TABLE_ENTRY_VALID_MASK) != 0;
134
+}
135
+
136
+static bool update_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
137
+ IteEntry ite)
138
+{
139
+ AddressSpace *as = &s->gicv3->dma_as;
140
+ uint64_t itt_addr;
141
+ MemTxResult res = MEMTX_OK;
142
+
143
+ itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
144
+ itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
145
+
146
+ address_space_stq_le(as, itt_addr + (eventid * (sizeof(uint64_t) +
147
+ sizeof(uint32_t))), ite.itel, MEMTXATTRS_UNSPECIFIED,
148
+ &res);
149
+
150
+ if (res == MEMTX_OK) {
151
+ address_space_stl_le(as, itt_addr + (eventid * (sizeof(uint64_t) +
152
+ sizeof(uint32_t))) + sizeof(uint32_t), ite.iteh,
153
+ MEMTXATTRS_UNSPECIFIED, &res);
154
+ }
155
+ if (res != MEMTX_OK) {
156
+ return false;
157
+ } else {
158
+ return true;
159
+ }
100
+ }
160
+}
101
+}
161
+
102
+
162
+static bool get_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
103
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
163
+ uint16_t *icid, uint32_t *pIntid, MemTxResult *res)
104
+ void *vpm, void *vst, uint32_t desc)
164
+{
105
+{
165
+ AddressSpace *as = &s->gicv3->dma_as;
106
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
166
+ uint64_t itt_addr;
107
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
167
+ bool status = false;
108
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
168
+ IteEntry ite = {};
109
+ uint8_t *pn = vpn, *pm = vpm;
110
+ float_status fpst = *(float_status *)vst;
169
+
111
+
170
+ itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
112
+ set_default_nan_mode(true, &fpst);
171
+ itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
172
+
113
+
173
+ ite.itel = address_space_ldq_le(as, itt_addr +
114
+ for (row = 0; row < oprsz; ++row) {
174
+ (eventid * (sizeof(uint64_t) +
115
+ if (pn[H1(row)] & 1) {
175
+ sizeof(uint32_t))), MEMTXATTRS_UNSPECIFIED,
116
+ uint64_t *za_row = &za[tile_vslice_index(row)];
176
+ res);
117
+ uint64_t n = zn[row] ^ neg;
177
+
118
+
178
+ if (*res == MEMTX_OK) {
119
+ for (col = 0; col < oprsz; ++col) {
179
+ ite.iteh = address_space_ldl_le(as, itt_addr +
120
+ if (pm[H1(col)] & 1) {
180
+ (eventid * (sizeof(uint64_t) +
121
+ uint64_t *a = &za_row[col];
181
+ sizeof(uint32_t))) + sizeof(uint32_t),
122
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
182
+ MEMTXATTRS_UNSPECIFIED, res);
183
+
184
+ if (*res == MEMTX_OK) {
185
+ if (ite.itel & TABLE_ENTRY_VALID_MASK) {
186
+ if ((ite.itel >> ITE_ENTRY_INTTYPE_SHIFT) &
187
+ GITS_TYPE_PHYSICAL) {
188
+ *pIntid = (ite.itel & ITE_ENTRY_INTID_MASK) >>
189
+ ITE_ENTRY_INTID_SHIFT;
190
+ *icid = ite.iteh & ITE_ENTRY_ICID_MASK;
191
+ status = true;
192
+ }
123
+ }
193
+ }
124
+ }
194
+ }
125
+ }
195
+ }
126
+ }
196
+ return status;
127
+}
128
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/target/arm/translate-sme.c
131
+++ b/target/arm/translate-sme.c
132
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
133
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
134
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
135
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
136
+
137
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
138
+ gen_helper_gvec_5_ptr *fn)
139
+{
140
+ int svl = streaming_vec_reg_size(s);
141
+ uint32_t desc = simd_desc(svl, svl, a->sub);
142
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
143
+
144
+ if (!sme_smza_enabled_check(s)) {
145
+ return true;
146
+ }
147
+
148
+ /* Sum XZR+zad to find ZAd. */
149
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
150
+ zn = vec_full_reg_ptr(s, a->zn);
151
+ zm = vec_full_reg_ptr(s, a->zm);
152
+ pn = pred_full_reg_ptr(s, a->pn);
153
+ pm = pred_full_reg_ptr(s, a->pm);
154
+ fpst = fpstatus_ptr(FPST_FPCR);
155
+
156
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
157
+
158
+ tcg_temp_free_ptr(za);
159
+ tcg_temp_free_ptr(zn);
160
+ tcg_temp_free_ptr(pn);
161
+ tcg_temp_free_ptr(pm);
162
+ tcg_temp_free_ptr(fpst);
163
+ return true;
197
+}
164
+}
198
+
165
+
199
+static uint64_t get_dte(GICv3ITSState *s, uint32_t devid, MemTxResult *res)
166
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
200
+{
167
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
201
+ AddressSpace *as = &s->gicv3->dma_as;
202
+ uint64_t l2t_addr;
203
+ uint64_t value;
204
+ bool valid_l2t;
205
+ uint32_t l2t_id;
206
+ uint32_t max_l2_entries;
207
+
208
+ if (s->dt.indirect) {
209
+ l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE);
210
+
211
+ value = address_space_ldq_le(as,
212
+ s->dt.base_addr +
213
+ (l2t_id * L1TABLE_ENTRY_SIZE),
214
+ MEMTXATTRS_UNSPECIFIED, res);
215
+
216
+ if (*res == MEMTX_OK) {
217
+ valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
218
+
219
+ if (valid_l2t) {
220
+ max_l2_entries = s->dt.page_sz / s->dt.entry_sz;
221
+
222
+ l2t_addr = value & ((1ULL << 51) - 1);
223
+
224
+ value = address_space_ldq_le(as, l2t_addr +
225
+ ((devid % max_l2_entries) * GITS_DTE_SIZE),
226
+ MEMTXATTRS_UNSPECIFIED, res);
227
+ }
228
+ }
229
+ } else {
230
+ /* Flat level table */
231
+ value = address_space_ldq_le(as, s->dt.base_addr +
232
+ (devid * GITS_DTE_SIZE),
233
+ MEMTXATTRS_UNSPECIFIED, res);
234
+ }
235
+
236
+ return value;
237
+}
238
+
239
+/*
240
+ * This function handles the processing of following commands based on
241
+ * the ItsCmdType parameter passed:-
242
+ * 1. triggering of lpi interrupt translation via ITS INT command
243
+ * 2. triggering of lpi interrupt translation via gits_translater register
244
+ * 3. handling of ITS CLEAR command
245
+ * 4. handling of ITS DISCARD command
246
+ */
247
+static bool process_its_cmd(GICv3ITSState *s, uint64_t value, uint32_t offset,
248
+ ItsCmdType cmd)
249
+{
250
+ AddressSpace *as = &s->gicv3->dma_as;
251
+ uint32_t devid, eventid;
252
+ MemTxResult res = MEMTX_OK;
253
+ bool dte_valid;
254
+ uint64_t dte = 0;
255
+ uint32_t max_eventid;
256
+ uint16_t icid = 0;
257
+ uint32_t pIntid = 0;
258
+ bool ite_valid = false;
259
+ uint64_t cte = 0;
260
+ bool cte_valid = false;
261
+ bool result = false;
262
+
263
+ if (cmd == NONE) {
264
+ devid = offset;
265
+ } else {
266
+ devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
267
+
268
+ offset += NUM_BYTES_IN_DW;
269
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
270
+ MEMTXATTRS_UNSPECIFIED, &res);
271
+ }
272
+
273
+ if (res != MEMTX_OK) {
274
+ return result;
275
+ }
276
+
277
+ eventid = (value & EVENTID_MASK);
278
+
279
+ dte = get_dte(s, devid, &res);
280
+
281
+ if (res != MEMTX_OK) {
282
+ return result;
283
+ }
284
+ dte_valid = dte & TABLE_ENTRY_VALID_MASK;
285
+
286
+ if (dte_valid) {
287
+ max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1));
288
+
289
+ ite_valid = get_ite(s, eventid, dte, &icid, &pIntid, &res);
290
+
291
+ if (res != MEMTX_OK) {
292
+ return result;
293
+ }
294
+
295
+ if (ite_valid) {
296
+ cte_valid = get_cte(s, icid, &cte, &res);
297
+ }
298
+
299
+ if (res != MEMTX_OK) {
300
+ return result;
301
+ }
302
+ }
303
+
304
+ if ((devid > s->dt.maxids.max_devids) || !dte_valid || !ite_valid ||
305
+ !cte_valid || (eventid > max_eventid)) {
306
+ qemu_log_mask(LOG_GUEST_ERROR,
307
+ "%s: invalid command attributes "
308
+ "devid %d or eventid %d or invalid dte %d or"
309
+ "invalid cte %d or invalid ite %d\n",
310
+ __func__, devid, eventid, dte_valid, cte_valid,
311
+ ite_valid);
312
+ /*
313
+ * in this implementation, in case of error
314
+ * we ignore this command and move onto the next
315
+ * command in the queue
316
+ */
317
+ } else {
318
+ /*
319
+ * Current implementation only supports rdbase == procnum
320
+ * Hence rdbase physical address is ignored
321
+ */
322
+ if (cmd == DISCARD) {
323
+ IteEntry ite = {};
324
+ /* remove mapping from interrupt translation table */
325
+ result = update_ite(s, eventid, dte, ite);
326
+ }
327
+ }
328
+
329
+ return result;
330
+}
331
+
332
+static bool process_mapti(GICv3ITSState *s, uint64_t value, uint32_t offset,
333
+ bool ignore_pInt)
334
+{
335
+ AddressSpace *as = &s->gicv3->dma_as;
336
+ uint32_t devid, eventid;
337
+ uint32_t pIntid = 0;
338
+ uint32_t max_eventid, max_Intid;
339
+ bool dte_valid;
340
+ MemTxResult res = MEMTX_OK;
341
+ uint16_t icid = 0;
342
+ uint64_t dte = 0;
343
+ IteEntry ite;
344
+ uint32_t int_spurious = INTID_SPURIOUS;
345
+ bool result = false;
346
+
347
+ devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
348
+ offset += NUM_BYTES_IN_DW;
349
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
350
+ MEMTXATTRS_UNSPECIFIED, &res);
351
+
352
+ if (res != MEMTX_OK) {
353
+ return result;
354
+ }
355
+
356
+ eventid = (value & EVENTID_MASK);
357
+
358
+ if (!ignore_pInt) {
359
+ pIntid = ((value & pINTID_MASK) >> pINTID_SHIFT);
360
+ }
361
+
362
+ offset += NUM_BYTES_IN_DW;
363
+ value = address_space_ldq_le(as, s->cq.base_addr + offset,
364
+ MEMTXATTRS_UNSPECIFIED, &res);
365
+
366
+ if (res != MEMTX_OK) {
367
+ return result;
368
+ }
369
+
370
+ icid = value & ICID_MASK;
371
+
372
+ dte = get_dte(s, devid, &res);
373
+
374
+ if (res != MEMTX_OK) {
375
+ return result;
376
+ }
377
+ dte_valid = dte & TABLE_ENTRY_VALID_MASK;
378
+
379
+ max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1));
380
+
381
+ if (!ignore_pInt) {
382
+ max_Intid = (1ULL << (GICD_TYPER_IDBITS + 1)) - 1;
383
+ }
384
+
385
+ if ((devid > s->dt.maxids.max_devids) || (icid > s->ct.maxids.max_collids)
386
+ || !dte_valid || (eventid > max_eventid) ||
387
+ (!ignore_pInt && (((pIntid < GICV3_LPI_INTID_START) ||
388
+ (pIntid > max_Intid)) && (pIntid != INTID_SPURIOUS)))) {
389
+ qemu_log_mask(LOG_GUEST_ERROR,
390
+ "%s: invalid command attributes "
391
+ "devid %d or icid %d or eventid %d or pIntid %d or"
392
+ "unmapped dte %d\n", __func__, devid, icid, eventid,
393
+ pIntid, dte_valid);
394
+ /*
395
+ * in this implementation, in case of error
396
+ * we ignore this command and move onto the next
397
+ * command in the queue
398
+ */
399
+ } else {
400
+ /* add ite entry to interrupt translation table */
401
+ ite.itel = (dte_valid & TABLE_ENTRY_VALID_MASK) |
402
+ (GITS_TYPE_PHYSICAL << ITE_ENTRY_INTTYPE_SHIFT);
403
+
404
+ if (ignore_pInt) {
405
+ ite.itel |= (eventid << ITE_ENTRY_INTID_SHIFT);
406
+ } else {
407
+ ite.itel |= (pIntid << ITE_ENTRY_INTID_SHIFT);
408
+ }
409
+ ite.itel |= (int_spurious << ITE_ENTRY_INTSP_SHIFT);
410
+ ite.iteh = icid;
411
+
412
+ result = update_ite(s, eventid, dte, ite);
413
+ }
414
+
415
+ return result;
416
+}
417
+
418
static bool update_cte(GICv3ITSState *s, uint16_t icid, bool valid,
419
uint64_t rdbase)
420
{
421
@@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s)
422
423
switch (cmd) {
424
case GITS_CMD_INT:
425
+ res = process_its_cmd(s, data, cq_offset, INT);
426
break;
427
case GITS_CMD_CLEAR:
428
+ res = process_its_cmd(s, data, cq_offset, CLEAR);
429
break;
430
case GITS_CMD_SYNC:
431
/*
432
@@ -XXX,XX +XXX,XX @@ static void process_cmdq(GICv3ITSState *s)
433
result = process_mapc(s, cq_offset);
434
break;
435
case GITS_CMD_MAPTI:
436
+ result = process_mapti(s, data, cq_offset, false);
437
break;
438
case GITS_CMD_MAPI:
439
+ result = process_mapti(s, data, cq_offset, true);
440
break;
441
case GITS_CMD_DISCARD:
442
+ result = process_its_cmd(s, data, cq_offset, DISCARD);
443
break;
444
case GITS_CMD_INV:
445
case GITS_CMD_INVALL:
446
@@ -XXX,XX +XXX,XX @@ static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset,
447
uint64_t data, unsigned size,
448
MemTxAttrs attrs)
449
{
450
- return MEMTX_OK;
451
+ GICv3ITSState *s = (GICv3ITSState *)opaque;
452
+ bool result = true;
453
+ uint32_t devid = 0;
454
+
455
+ switch (offset) {
456
+ case GITS_TRANSLATER:
457
+ if (s->ctlr & ITS_CTLR_ENABLED) {
458
+ devid = attrs.requester_id;
459
+ result = process_its_cmd(s, data, devid, NONE);
460
+ }
461
+ break;
462
+ default:
463
+ break;
464
+ }
465
+
466
+ if (result) {
467
+ return MEMTX_OK;
468
+ } else {
469
+ return MEMTX_ERROR;
470
+ }
471
}
472
473
static bool its_writel(GICv3ITSState *s, hwaddr offset,
474
--
168
--
475
2.20.1
169
2.25.1
476
477
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-id: 20220708151540.18136-26-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 2 ++
10
target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 30 ++++++++++++++++++++
12
4 files changed, 90 insertions(+)
13
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/helper-sme.h
17
+++ b/target/arm/helper-sme.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
19
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
20
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
22
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
23
+ void, ptr, ptr, ptr, ptr, ptr, i32)
24
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/arm/sme.decode
27
+++ b/target/arm/sme.decode
28
@@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
29
30
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
31
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
32
+
33
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
34
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/arm/sme_helper.c
37
+++ b/target/arm/sme_helper.c
38
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
39
}
40
}
41
}
42
+
43
+/*
44
+ * Alter PAIR as needed for controlling predicates being false,
45
+ * and for NEG on an enabled row element.
46
+ */
47
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
48
+{
49
+ /*
50
+ * The pseudocode uses a conditional negate after the conditional zero.
51
+ * It is simpler here to unconditionally negate before conditional zero.
52
+ */
53
+ pair ^= neg;
54
+ if (!(pg & 1)) {
55
+ pair &= 0xffff0000u;
56
+ }
57
+ if (!(pg & 4)) {
58
+ pair &= 0x0000ffffu;
59
+ }
60
+ return pair;
61
+}
62
+
63
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
64
+ void *vpm, uint32_t desc)
65
+{
66
+ intptr_t row, col, oprsz = simd_maxsz(desc);
67
+ uint32_t neg = simd_data(desc) * 0x80008000u;
68
+ uint16_t *pn = vpn, *pm = vpm;
69
+
70
+ for (row = 0; row < oprsz; ) {
71
+ uint16_t prow = pn[H2(row >> 4)];
72
+ do {
73
+ void *vza_row = vza + tile_vslice_offset(row);
74
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
75
+
76
+ n = f16mop_adj_pair(n, prow, neg);
77
+
78
+ for (col = 0; col < oprsz; ) {
79
+ uint16_t pcol = pm[H2(col >> 4)];
80
+ do {
81
+ if (prow & pcol & 0b0101) {
82
+ uint32_t *a = vza_row + H1_4(col);
83
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
84
+
85
+ m = f16mop_adj_pair(m, pcol, 0);
86
+ *a = bfdotadd(*a, n, m);
87
+
88
+ col += 4;
89
+ pcol >>= 4;
90
+ }
91
+ } while (col & 15);
92
+ }
93
+ row += 4;
94
+ prow >>= 4;
95
+ } while (row & 15);
96
+ }
97
+}
98
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/arm/translate-sme.c
101
+++ b/target/arm/translate-sme.c
102
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
103
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
104
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
105
106
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
107
+ gen_helper_gvec_5 *fn)
108
+{
109
+ int svl = streaming_vec_reg_size(s);
110
+ uint32_t desc = simd_desc(svl, svl, a->sub);
111
+ TCGv_ptr za, zn, zm, pn, pm;
112
+
113
+ if (!sme_smza_enabled_check(s)) {
114
+ return true;
115
+ }
116
+
117
+ /* Sum XZR+zad to find ZAd. */
118
+ za = get_tile_rowcol(s, esz, 31, a->zad, false);
119
+ zn = vec_full_reg_ptr(s, a->zn);
120
+ zm = vec_full_reg_ptr(s, a->zm);
121
+ pn = pred_full_reg_ptr(s, a->pn);
122
+ pm = pred_full_reg_ptr(s, a->pm);
123
+
124
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
125
+
126
+ tcg_temp_free_ptr(za);
127
+ tcg_temp_free_ptr(zn);
128
+ tcg_temp_free_ptr(pn);
129
+ tcg_temp_free_ptr(pm);
130
+ return true;
131
+}
132
+
133
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
134
gen_helper_gvec_5_ptr *fn)
135
{
136
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
137
138
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
139
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
140
+
141
+/* TODO: FEAT_EBF16 */
142
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
143
--
144
2.25.1
diff view generated by jsdifflib
1
From: Chris Rauer <crauer@google.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
kudo-bmc is a board supported by OpenBMC.
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
https://github.com/openbmc/openbmc/tree/master/meta-fii/meta-kudo
4
Message-id: 20220708151540.18136-27-richard.henderson@linaro.org
5
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
target/arm/helper-sme.h | 2 ++
9
target/arm/sme.decode | 1 +
10
target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++
11
target/arm/translate-sme.c | 1 +
12
4 files changed, 78 insertions(+)
5
13
6
Since v1:
14
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
7
- hyphenated Cortex-A9
8
9
Tested: Booted kudo firmware.
10
Signed-off-by: Chris Rauer <crauer@google.com>
11
Reviewed-by: Patrick Venture <venture@google.com>
12
Message-id: 20210907223234.1165705-1-crauer@google.com
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
docs/system/arm/nuvoton.rst | 1 +
16
hw/arm/npcm7xx_boards.c | 34 ++++++++++++++++++++++++++++++++++
17
2 files changed, 35 insertions(+)
18
19
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/docs/system/arm/nuvoton.rst
16
--- a/target/arm/helper-sme.h
22
+++ b/docs/system/arm/nuvoton.rst
17
+++ b/target/arm/helper-sme.h
23
@@ -XXX,XX +XXX,XX @@ Hyperscale applications. The following machines are based on this chip :
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
24
19
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
25
- ``quanta-gbs-bmc`` Quanta GBS server BMC
20
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
26
- ``quanta-gsj`` Quanta GSJ server BMC
21
27
+- ``kudo-bmc`` Fii USA Kudo server BMC
22
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
28
23
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
29
There are also two more SoCs, NPCM710 and NPCM705, which are single-core
24
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
30
variants of NPCM750 and NPCM730, respectively. These are currently not
25
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
31
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
26
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
27
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
32
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/arm/npcm7xx_boards.c
29
--- a/target/arm/sme.decode
34
+++ b/hw/arm/npcm7xx_boards.c
30
+++ b/target/arm/sme.decode
35
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
36
#define NPCM750_EVB_POWER_ON_STRAPS 0x00001ff7
32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
37
#define QUANTA_GSJ_POWER_ON_STRAPS 0x00001fff
33
38
#define QUANTA_GBS_POWER_ON_STRAPS 0x000017ff
34
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
39
+#define KUDO_BMC_POWER_ON_STRAPS 0x00001fff
35
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
40
36
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
41
static const char npcm7xx_default_bootrom[] = "npcm7xx_bootrom.bin";
37
index XXXXXXX..XXXXXXX 100644
42
38
--- a/target/arm/sme_helper.c
43
@@ -XXX,XX +XXX,XX @@ static void quanta_gbs_init(MachineState *machine)
39
+++ b/target/arm/sme_helper.c
44
npcm7xx_load_kernel(machine, soc);
40
@@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
41
return pair;
45
}
42
}
46
43
47
+static void kudo_bmc_init(MachineState *machine)
44
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
45
+ float_status *s_std, float_status *s_odd)
48
+{
46
+{
49
+ NPCM7xxState *soc;
47
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
48
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
49
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
50
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
51
+ float64 t64;
52
+ float32 t32;
50
+
53
+
51
+ soc = npcm7xx_create_soc(machine, KUDO_BMC_POWER_ON_STRAPS);
54
+ /*
52
+ npcm7xx_connect_dram(soc, machine->ram);
55
+ * The ARM pseudocode function FPDot performs both multiplies
53
+ qdev_realize(DEVICE(soc), NULL, &error_fatal);
56
+ * and the add with a single rounding operation. Emulate this
57
+ * by performing the first multiply in round-to-odd, then doing
58
+ * the second multiply as fused multiply-add, and rounding to
59
+ * float32 all in one step.
60
+ */
61
+ t64 = float64_mul(e1r, e2r, s_odd);
62
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
54
+
63
+
55
+ npcm7xx_load_bootrom(machine, soc);
64
+ /* This conversion is exact, because we've already rounded. */
56
+ npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f",
65
+ t32 = float64_to_float32(t64, s_std);
57
+ drive_get(IF_MTD, 0, 0));
58
+ npcm7xx_connect_flash(&soc->fiu[1], 0, "mx66u51235f",
59
+ drive_get(IF_MTD, 3, 0));
60
+
66
+
61
+ npcm7xx_load_kernel(machine, soc);
67
+ /* The final accumulation step is not fused. */
68
+ return float32_add(sum, t32, s_std);
62
+}
69
+}
63
+
70
+
64
static void npcm7xx_set_soc_type(NPCM7xxMachineClass *nmc, const char *type)
71
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
72
+ void *vpm, void *vst, uint32_t desc)
73
+{
74
+ intptr_t row, col, oprsz = simd_maxsz(desc);
75
+ uint32_t neg = simd_data(desc) * 0x80008000u;
76
+ uint16_t *pn = vpn, *pm = vpm;
77
+ float_status fpst_odd, fpst_std;
78
+
79
+ /*
80
+ * Make a copy of float_status because this operation does not
81
+ * update the cumulative fp exception status. It also produces
82
+ * default nans. Make a second copy with round-to-odd -- see above.
83
+ */
84
+ fpst_std = *(float_status *)vst;
85
+ set_default_nan_mode(true, &fpst_std);
86
+ fpst_odd = fpst_std;
87
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
88
+
89
+ for (row = 0; row < oprsz; ) {
90
+ uint16_t prow = pn[H2(row >> 4)];
91
+ do {
92
+ void *vza_row = vza + tile_vslice_offset(row);
93
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
94
+
95
+ n = f16mop_adj_pair(n, prow, neg);
96
+
97
+ for (col = 0; col < oprsz; ) {
98
+ uint16_t pcol = pm[H2(col >> 4)];
99
+ do {
100
+ if (prow & pcol & 0b0101) {
101
+ uint32_t *a = vza_row + H1_4(col);
102
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
103
+
104
+ m = f16mop_adj_pair(m, pcol, 0);
105
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
106
+
107
+ col += 4;
108
+ pcol >>= 4;
109
+ }
110
+ } while (col & 15);
111
+ }
112
+ row += 4;
113
+ prow >>= 4;
114
+ } while (row & 15);
115
+ }
116
+}
117
+
118
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
119
void *vpm, uint32_t desc)
65
{
120
{
66
NPCM7xxClass *sc = NPCM7XX_CLASS(object_class_by_name(type));
121
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
67
@@ -XXX,XX +XXX,XX @@ static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data)
122
index XXXXXXX..XXXXXXX 100644
68
mc->default_ram_size = 1 * GiB;
123
--- a/target/arm/translate-sme.c
124
+++ b/target/arm/translate-sme.c
125
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
126
return true;
69
}
127
}
70
128
71
+static void kudo_bmc_machine_class_init(ObjectClass *oc, void *data)
129
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
72
+{
130
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
73
+ NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc);
131
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
74
+ MachineClass *mc = MACHINE_CLASS(oc);
75
+
76
+ npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
77
+
78
+ mc->desc = "Kudo BMC (Cortex-A9)";
79
+ mc->init = kudo_bmc_init;
80
+ mc->default_ram_size = 1 * GiB;
81
+};
82
+
83
static const TypeInfo npcm7xx_machine_types[] = {
84
{
85
.name = TYPE_NPCM7XX_MACHINE,
86
@@ -XXX,XX +XXX,XX @@ static const TypeInfo npcm7xx_machine_types[] = {
87
.name = MACHINE_TYPE_NAME("quanta-gbs-bmc"),
88
.parent = TYPE_NPCM7XX_MACHINE,
89
.class_init = gbs_bmc_machine_class_init,
90
+ }, {
91
+ .name = MACHINE_TYPE_NAME("kudo-bmc"),
92
+ .parent = TYPE_NPCM7XX_MACHINE,
93
+ .class_init = kudo_bmc_machine_class_init,
94
},
95
};
96
132
97
--
133
--
98
2.20.1
134
2.25.1
99
100
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Defined descriptors for ITS device table,collection table and ITS
3
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.
4
command queue entities.Implemented register read/write functions,
5
extract ITS table parameters and command queue parameters,extended
6
gicv3 common to capture qemu address space(which host the ITS table
7
platform memories required for subsequent ITS processing) and
8
initialize the same in ITS device.
9
4
10
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Eric Auger <eric.auger@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
7
Message-id: 20220708151540.18136-28-richard.henderson@linaro.org
14
Message-id: 20210910143951.92242-3-shashi.mallela@linaro.org
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
9
---
17
hw/intc/gicv3_internal.h | 29 ++
10
target/arm/helper-sme.h | 16 ++++++++
18
include/hw/intc/arm_gicv3_common.h | 3 +
11
target/arm/sme.decode | 10 +++++
19
include/hw/intc/arm_gicv3_its_common.h | 23 ++
12
target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++
20
hw/intc/arm_gicv3_its.c | 376 +++++++++++++++++++++++++
13
target/arm/translate-sme.c | 10 +++++
21
4 files changed, 431 insertions(+)
14
4 files changed, 118 insertions(+)
22
15
23
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
16
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
24
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/intc/gicv3_internal.h
18
--- a/target/arm/helper-sme.h
26
+++ b/hw/intc/gicv3_internal.h
19
+++ b/target/arm/helper-sme.h
27
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_BASER, INNERCACHE, 59, 3)
20
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
28
FIELD(GITS_BASER, INDIRECT, 62, 1)
21
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
29
FIELD(GITS_BASER, VALID, 63, 1)
22
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
30
23
void, ptr, ptr, ptr, ptr, ptr, i32)
31
+FIELD(GITS_CBASER, SIZE, 0, 8)
24
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
32
+FIELD(GITS_CBASER, SHAREABILITY, 10, 2)
25
+ void, ptr, ptr, ptr, ptr, ptr, i32)
33
+FIELD(GITS_CBASER, PHYADDR, 12, 40)
26
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
34
+FIELD(GITS_CBASER, OUTERCACHE, 53, 3)
27
+ void, ptr, ptr, ptr, ptr, ptr, i32)
35
+FIELD(GITS_CBASER, INNERCACHE, 59, 3)
28
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
36
+FIELD(GITS_CBASER, VALID, 63, 1)
29
+ void, ptr, ptr, ptr, ptr, ptr, i32)
30
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
31
+ void, ptr, ptr, ptr, ptr, ptr, i32)
32
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
33
+ void, ptr, ptr, ptr, ptr, ptr, i32)
34
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, ptr, i32)
40
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/arm/sme.decode
43
+++ b/target/arm/sme.decode
44
@@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
45
46
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
47
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
37
+
48
+
38
+FIELD(GITS_CREADR, STALLED, 0, 1)
49
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
39
+FIELD(GITS_CREADR, OFFSET, 5, 15)
50
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
51
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
52
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
40
+
53
+
41
+FIELD(GITS_CWRITER, RETRY, 0, 1)
54
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
42
+FIELD(GITS_CWRITER, OFFSET, 5, 15)
55
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
56
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
57
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
58
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/sme_helper.c
61
+++ b/target/arm/sme_helper.c
62
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
63
} while (row & 15);
64
}
65
}
43
+
66
+
44
+FIELD(GITS_CTLR, ENABLED, 0, 1)
67
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
45
FIELD(GITS_CTLR, QUIESCENT, 31, 1)
46
47
FIELD(GITS_TYPER, PHYSICAL, 0, 1)
48
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, PTA, 19, 1)
49
FIELD(GITS_TYPER, CIDBITS, 32, 4)
50
FIELD(GITS_TYPER, CIL, 36, 1)
51
52
+#define GITS_IDREGS 0xFFD0
53
+
68
+
54
+#define ITS_CTLR_ENABLED (1U) /* ITS Enabled */
69
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
70
+ uint8_t *pn, uint8_t *pm,
71
+ uint32_t desc, IMOPFn *fn)
72
+{
73
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
74
+ bool neg = simd_data(desc);
55
+
75
+
56
+#define GITS_BASER_RO_MASK (R_GITS_BASER_ENTRYSIZE_MASK | \
76
+ for (row = 0; row < oprsz; ++row) {
57
+ R_GITS_BASER_TYPE_MASK)
77
+ uint8_t pa = pn[H1(row)];
78
+ uint64_t *za_row = &za[tile_vslice_index(row)];
79
+ uint64_t n = zn[row];
58
+
80
+
59
#define GITS_BASER_PAGESIZE_4K 0
81
+ for (col = 0; col < oprsz; ++col) {
60
#define GITS_BASER_PAGESIZE_16K 1
82
+ uint8_t pb = pm[H1(col)];
61
#define GITS_BASER_PAGESIZE_64K 2
83
+ uint64_t *a = &za_row[col];
62
@@ -XXX,XX +XXX,XX @@ FIELD(GITS_TYPER, CIL, 36, 1)
63
#define GITS_BASER_TYPE_DEVICE 1ULL
64
#define GITS_BASER_TYPE_COLLECTION 4ULL
65
66
+#define GITS_PAGE_SIZE_4K 0x1000
67
+#define GITS_PAGE_SIZE_16K 0x4000
68
+#define GITS_PAGE_SIZE_64K 0x10000
69
+
84
+
70
+#define L1TABLE_ENTRY_SIZE 8
85
+ *a = fn(n, zm[col], *a, pa & pb, neg);
71
+
72
+#define GITS_CMDQ_ENTRY_SIZE 32
73
+
74
/**
75
* Default features advertised by this version of ITS
76
*/
77
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/hw/intc/arm_gicv3_common.h
80
+++ b/include/hw/intc/arm_gicv3_common.h
81
@@ -XXX,XX +XXX,XX @@ struct GICv3State {
82
int dev_fd; /* kvm device fd if backed by kvm vgic support */
83
Error *migration_blocker;
84
85
+ MemoryRegion *dma;
86
+ AddressSpace dma_as;
87
+
88
/* Distributor */
89
90
/* for a GIC with the security extensions the NS banked version of this
91
diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
92
index XXXXXXX..XXXXXXX 100644
93
--- a/include/hw/intc/arm_gicv3_its_common.h
94
+++ b/include/hw/intc/arm_gicv3_its_common.h
95
@@ -XXX,XX +XXX,XX @@
96
97
#define GITS_TRANSLATER 0x0040
98
99
+typedef struct {
100
+ bool valid;
101
+ bool indirect;
102
+ uint16_t entry_sz;
103
+ uint32_t page_sz;
104
+ uint32_t max_entries;
105
+ union {
106
+ uint32_t max_devids;
107
+ uint32_t max_collids;
108
+ } maxids;
109
+ uint64_t base_addr;
110
+} TableDesc;
111
+
112
+typedef struct {
113
+ bool valid;
114
+ uint32_t max_entries;
115
+ uint64_t base_addr;
116
+} CmdQDesc;
117
+
118
struct GICv3ITSState {
119
SysBusDevice parent_obj;
120
121
@@ -XXX,XX +XXX,XX @@ struct GICv3ITSState {
122
uint64_t creadr;
123
uint64_t baser[8];
124
125
+ TableDesc dt;
126
+ TableDesc ct;
127
+ CmdQDesc cq;
128
+
129
Error *migration_blocker;
130
};
131
132
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/hw/intc/arm_gicv3_its.c
135
+++ b/hw/intc/arm_gicv3_its.c
136
@@ -XXX,XX +XXX,XX @@ struct GICv3ITSClass {
137
void (*parent_reset)(DeviceState *dev);
138
};
139
140
+static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
141
+{
142
+ uint64_t result = 0;
143
+
144
+ switch (page_sz) {
145
+ case GITS_PAGE_SIZE_4K:
146
+ case GITS_PAGE_SIZE_16K:
147
+ result = FIELD_EX64(value, GITS_BASER, PHYADDR) << 12;
148
+ break;
149
+
150
+ case GITS_PAGE_SIZE_64K:
151
+ result = FIELD_EX64(value, GITS_BASER, PHYADDRL_64K) << 16;
152
+ result |= FIELD_EX64(value, GITS_BASER, PHYADDRH_64K) << 48;
153
+ break;
154
+
155
+ default:
156
+ break;
157
+ }
158
+ return result;
159
+}
160
+
161
+/*
162
+ * This function extracts the ITS Device and Collection table specific
163
+ * parameters (like base_addr, size etc) from GITS_BASER register.
164
+ * It is called during ITS enable and also during post_load migration
165
+ */
166
+static void extract_table_params(GICv3ITSState *s)
167
+{
168
+ uint16_t num_pages = 0;
169
+ uint8_t page_sz_type;
170
+ uint8_t type;
171
+ uint32_t page_sz = 0;
172
+ uint64_t value;
173
+
174
+ for (int i = 0; i < 8; i++) {
175
+ value = s->baser[i];
176
+
177
+ if (!value) {
178
+ continue;
179
+ }
180
+
181
+ page_sz_type = FIELD_EX64(value, GITS_BASER, PAGESIZE);
182
+
183
+ switch (page_sz_type) {
184
+ case 0:
185
+ page_sz = GITS_PAGE_SIZE_4K;
186
+ break;
187
+
188
+ case 1:
189
+ page_sz = GITS_PAGE_SIZE_16K;
190
+ break;
191
+
192
+ case 2:
193
+ case 3:
194
+ page_sz = GITS_PAGE_SIZE_64K;
195
+ break;
196
+
197
+ default:
198
+ g_assert_not_reached();
199
+ }
200
+
201
+ num_pages = FIELD_EX64(value, GITS_BASER, SIZE) + 1;
202
+
203
+ type = FIELD_EX64(value, GITS_BASER, TYPE);
204
+
205
+ switch (type) {
206
+
207
+ case GITS_BASER_TYPE_DEVICE:
208
+ memset(&s->dt, 0 , sizeof(s->dt));
209
+ s->dt.valid = FIELD_EX64(value, GITS_BASER, VALID);
210
+
211
+ if (!s->dt.valid) {
212
+ return;
213
+ }
214
+
215
+ s->dt.page_sz = page_sz;
216
+ s->dt.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
217
+ s->dt.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
218
+
219
+ if (!s->dt.indirect) {
220
+ s->dt.max_entries = (num_pages * page_sz) / s->dt.entry_sz;
221
+ } else {
222
+ s->dt.max_entries = (((num_pages * page_sz) /
223
+ L1TABLE_ENTRY_SIZE) *
224
+ (page_sz / s->dt.entry_sz));
225
+ }
226
+
227
+ s->dt.maxids.max_devids = (1UL << (FIELD_EX64(s->typer, GITS_TYPER,
228
+ DEVBITS) + 1));
229
+
230
+ s->dt.base_addr = baser_base_addr(value, page_sz);
231
+
232
+ break;
233
+
234
+ case GITS_BASER_TYPE_COLLECTION:
235
+ memset(&s->ct, 0 , sizeof(s->ct));
236
+ s->ct.valid = FIELD_EX64(value, GITS_BASER, VALID);
237
+
238
+ /*
239
+ * GITS_TYPER.HCC is 0 for this implementation
240
+ * hence writes are discarded if ct.valid is 0
241
+ */
242
+ if (!s->ct.valid) {
243
+ return;
244
+ }
245
+
246
+ s->ct.page_sz = page_sz;
247
+ s->ct.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
248
+ s->ct.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
249
+
250
+ if (!s->ct.indirect) {
251
+ s->ct.max_entries = (num_pages * page_sz) / s->ct.entry_sz;
252
+ } else {
253
+ s->ct.max_entries = (((num_pages * page_sz) /
254
+ L1TABLE_ENTRY_SIZE) *
255
+ (page_sz / s->ct.entry_sz));
256
+ }
257
+
258
+ if (FIELD_EX64(s->typer, GITS_TYPER, CIL)) {
259
+ s->ct.maxids.max_collids = (1UL << (FIELD_EX64(s->typer,
260
+ GITS_TYPER, CIDBITS) + 1));
261
+ } else {
262
+ /* 16-bit CollectionId supported when CIL == 0 */
263
+ s->ct.maxids.max_collids = (1UL << 16);
264
+ }
265
+
266
+ s->ct.base_addr = baser_base_addr(value, page_sz);
267
+
268
+ break;
269
+
270
+ default:
271
+ break;
272
+ }
86
+ }
273
+ }
87
+ }
274
+}
88
+}
275
+
89
+
276
+static void extract_cmdq_params(GICv3ITSState *s)
90
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
277
+{
91
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
278
+ uint16_t num_pages = 0;
92
+{ \
279
+ uint64_t value = s->cbaser;
93
+ uint32_t sum0 = 0, sum1 = 0; \
280
+
94
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
281
+ num_pages = FIELD_EX64(value, GITS_CBASER, SIZE) + 1;
95
+ n &= expand_pred_b(p); \
282
+
96
+ sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
283
+ memset(&s->cq, 0 , sizeof(s->cq));
97
+ sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
284
+ s->cq.valid = FIELD_EX64(value, GITS_CBASER, VALID);
98
+ sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
285
+
99
+ sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
286
+ if (s->cq.valid) {
100
+ sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
287
+ s->cq.max_entries = (num_pages * GITS_PAGE_SIZE_4K) /
101
+ sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \
288
+ GITS_CMDQ_ENTRY_SIZE;
102
+ sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
289
+ s->cq.base_addr = FIELD_EX64(value, GITS_CBASER, PHYADDR);
103
+ sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \
290
+ s->cq.base_addr <<= R_GITS_CBASER_PHYADDR_SHIFT;
104
+ if (neg) { \
291
+ }
105
+ sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \
106
+ } else { \
107
+ sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \
108
+ } \
109
+ return ((uint64_t)sum1 << 32) | sum0; \
292
+}
110
+}
293
+
111
+
294
static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset,
112
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
295
uint64_t data, unsigned size,
113
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
296
MemTxAttrs attrs)
114
+{ \
297
@@ -XXX,XX +XXX,XX @@ static bool its_writel(GICv3ITSState *s, hwaddr offset,
115
+ uint64_t sum = 0; \
298
uint64_t value, MemTxAttrs attrs)
116
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
299
{
117
+ n &= expand_pred_h(p); \
300
bool result = true;
118
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
301
+ int index;
119
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
302
120
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
303
+ switch (offset) {
121
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
304
+ case GITS_CTLR:
122
+ return neg ? a - sum : a + sum; \
305
+ s->ctlr |= (value & ~(s->ctlr));
306
+
307
+ if (s->ctlr & ITS_CTLR_ENABLED) {
308
+ extract_table_params(s);
309
+ extract_cmdq_params(s);
310
+ s->creadr = 0;
311
+ }
312
+ break;
313
+ case GITS_CBASER:
314
+ /*
315
+ * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
316
+ * already enabled
317
+ */
318
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
319
+ s->cbaser = deposit64(s->cbaser, 0, 32, value);
320
+ s->creadr = 0;
321
+ s->cwriter = s->creadr;
322
+ }
323
+ break;
324
+ case GITS_CBASER + 4:
325
+ /*
326
+ * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
327
+ * already enabled
328
+ */
329
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
330
+ s->cbaser = deposit64(s->cbaser, 32, 32, value);
331
+ s->creadr = 0;
332
+ s->cwriter = s->creadr;
333
+ }
334
+ break;
335
+ case GITS_CWRITER:
336
+ s->cwriter = deposit64(s->cwriter, 0, 32,
337
+ (value & ~R_GITS_CWRITER_RETRY_MASK));
338
+ break;
339
+ case GITS_CWRITER + 4:
340
+ s->cwriter = deposit64(s->cwriter, 32, 32, value);
341
+ break;
342
+ case GITS_CREADR:
343
+ if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
344
+ s->creadr = deposit64(s->creadr, 0, 32,
345
+ (value & ~R_GITS_CREADR_STALLED_MASK));
346
+ } else {
347
+ /* RO register, ignore the write */
348
+ qemu_log_mask(LOG_GUEST_ERROR,
349
+ "%s: invalid guest write to RO register at offset "
350
+ TARGET_FMT_plx "\n", __func__, offset);
351
+ }
352
+ break;
353
+ case GITS_CREADR + 4:
354
+ if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
355
+ s->creadr = deposit64(s->creadr, 32, 32, value);
356
+ } else {
357
+ /* RO register, ignore the write */
358
+ qemu_log_mask(LOG_GUEST_ERROR,
359
+ "%s: invalid guest write to RO register at offset "
360
+ TARGET_FMT_plx "\n", __func__, offset);
361
+ }
362
+ break;
363
+ case GITS_BASER ... GITS_BASER + 0x3f:
364
+ /*
365
+ * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
366
+ * already enabled
367
+ */
368
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
369
+ index = (offset - GITS_BASER) / 8;
370
+
371
+ if (offset & 7) {
372
+ value <<= 32;
373
+ value &= ~GITS_BASER_RO_MASK;
374
+ s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(0, 32);
375
+ s->baser[index] |= value;
376
+ } else {
377
+ value &= ~GITS_BASER_RO_MASK;
378
+ s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(32, 32);
379
+ s->baser[index] |= value;
380
+ }
381
+ }
382
+ break;
383
+ case GITS_IIDR:
384
+ case GITS_IDREGS ... GITS_IDREGS + 0x2f:
385
+ /* RO registers, ignore the write */
386
+ qemu_log_mask(LOG_GUEST_ERROR,
387
+ "%s: invalid guest write to RO register at offset "
388
+ TARGET_FMT_plx "\n", __func__, offset);
389
+ break;
390
+ default:
391
+ result = false;
392
+ break;
393
+ }
394
return result;
395
}
396
397
@@ -XXX,XX +XXX,XX @@ static bool its_readl(GICv3ITSState *s, hwaddr offset,
398
uint64_t *data, MemTxAttrs attrs)
399
{
400
bool result = true;
401
+ int index;
402
403
+ switch (offset) {
404
+ case GITS_CTLR:
405
+ *data = s->ctlr;
406
+ break;
407
+ case GITS_IIDR:
408
+ *data = gicv3_iidr();
409
+ break;
410
+ case GITS_IDREGS ... GITS_IDREGS + 0x2f:
411
+ /* ID registers */
412
+ *data = gicv3_idreg(offset - GITS_IDREGS);
413
+ break;
414
+ case GITS_TYPER:
415
+ *data = extract64(s->typer, 0, 32);
416
+ break;
417
+ case GITS_TYPER + 4:
418
+ *data = extract64(s->typer, 32, 32);
419
+ break;
420
+ case GITS_CBASER:
421
+ *data = extract64(s->cbaser, 0, 32);
422
+ break;
423
+ case GITS_CBASER + 4:
424
+ *data = extract64(s->cbaser, 32, 32);
425
+ break;
426
+ case GITS_CREADR:
427
+ *data = extract64(s->creadr, 0, 32);
428
+ break;
429
+ case GITS_CREADR + 4:
430
+ *data = extract64(s->creadr, 32, 32);
431
+ break;
432
+ case GITS_CWRITER:
433
+ *data = extract64(s->cwriter, 0, 32);
434
+ break;
435
+ case GITS_CWRITER + 4:
436
+ *data = extract64(s->cwriter, 32, 32);
437
+ break;
438
+ case GITS_BASER ... GITS_BASER + 0x3f:
439
+ index = (offset - GITS_BASER) / 8;
440
+ if (offset & 7) {
441
+ *data = extract64(s->baser[index], 32, 32);
442
+ } else {
443
+ *data = extract64(s->baser[index], 0, 32);
444
+ }
445
+ break;
446
+ default:
447
+ result = false;
448
+ break;
449
+ }
450
return result;
451
}
452
453
@@ -XXX,XX +XXX,XX @@ static bool its_writell(GICv3ITSState *s, hwaddr offset,
454
uint64_t value, MemTxAttrs attrs)
455
{
456
bool result = true;
457
+ int index;
458
459
+ switch (offset) {
460
+ case GITS_BASER ... GITS_BASER + 0x3f:
461
+ /*
462
+ * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
463
+ * already enabled
464
+ */
465
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
466
+ index = (offset - GITS_BASER) / 8;
467
+ s->baser[index] &= GITS_BASER_RO_MASK;
468
+ s->baser[index] |= (value & ~GITS_BASER_RO_MASK);
469
+ }
470
+ break;
471
+ case GITS_CBASER:
472
+ /*
473
+ * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
474
+ * already enabled
475
+ */
476
+ if (!(s->ctlr & ITS_CTLR_ENABLED)) {
477
+ s->cbaser = value;
478
+ s->creadr = 0;
479
+ s->cwriter = s->creadr;
480
+ }
481
+ break;
482
+ case GITS_CWRITER:
483
+ s->cwriter = value & ~R_GITS_CWRITER_RETRY_MASK;
484
+ break;
485
+ case GITS_CREADR:
486
+ if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
487
+ s->creadr = value & ~R_GITS_CREADR_STALLED_MASK;
488
+ } else {
489
+ /* RO register, ignore the write */
490
+ qemu_log_mask(LOG_GUEST_ERROR,
491
+ "%s: invalid guest write to RO register at offset "
492
+ TARGET_FMT_plx "\n", __func__, offset);
493
+ }
494
+ break;
495
+ case GITS_TYPER:
496
+ /* RO registers, ignore the write */
497
+ qemu_log_mask(LOG_GUEST_ERROR,
498
+ "%s: invalid guest write to RO register at offset "
499
+ TARGET_FMT_plx "\n", __func__, offset);
500
+ break;
501
+ default:
502
+ result = false;
503
+ break;
504
+ }
505
return result;
506
}
507
508
@@ -XXX,XX +XXX,XX @@ static bool its_readll(GICv3ITSState *s, hwaddr offset,
509
uint64_t *data, MemTxAttrs attrs)
510
{
511
bool result = true;
512
+ int index;
513
514
+ switch (offset) {
515
+ case GITS_TYPER:
516
+ *data = s->typer;
517
+ break;
518
+ case GITS_BASER ... GITS_BASER + 0x3f:
519
+ index = (offset - GITS_BASER) / 8;
520
+ *data = s->baser[index];
521
+ break;
522
+ case GITS_CBASER:
523
+ *data = s->cbaser;
524
+ break;
525
+ case GITS_CREADR:
526
+ *data = s->creadr;
527
+ break;
528
+ case GITS_CWRITER:
529
+ *data = s->cwriter;
530
+ break;
531
+ default:
532
+ result = false;
533
+ break;
534
+ }
535
return result;
536
}
537
538
@@ -XXX,XX +XXX,XX @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
539
540
gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
541
542
+ address_space_init(&s->gicv3->dma_as, s->gicv3->dma,
543
+ "gicv3-its-sysmem");
544
+
545
/* set the ITS default features supported */
546
s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL,
547
GITS_TYPE_PHYSICAL);
548
@@ -XXX,XX +XXX,XX @@ static void gicv3_its_reset(DeviceState *dev)
549
GITS_CTE_SIZE - 1);
550
}
551
552
+static void gicv3_its_post_load(GICv3ITSState *s)
553
+{
554
+ if (s->ctlr & ITS_CTLR_ENABLED) {
555
+ extract_table_params(s);
556
+ extract_cmdq_params(s);
557
+ }
558
+}
123
+}
559
+
124
+
560
static Property gicv3_its_props[] = {
125
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
561
DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
126
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
562
GICv3State *),
127
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
563
@@ -XXX,XX +XXX,XX @@ static void gicv3_its_class_init(ObjectClass *klass, void *data)
128
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
564
{
129
+
565
DeviceClass *dc = DEVICE_CLASS(klass);
130
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
566
GICv3ITSClass *ic = ARM_GICV3_ITS_CLASS(klass);
131
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
567
+ GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass);
132
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
568
133
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
569
dc->realize = gicv3_arm_its_realize;
134
+
570
device_class_set_props(dc, gicv3_its_props);
135
+#define DEF_IMOPH(NAME) \
571
device_class_set_parent_reset(dc, gicv3_its_reset, &ic->parent_reset);
136
+ void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \
572
+ icc->post_load = gicv3_its_post_load;
137
+ void *vpm, uint32_t desc) \
573
}
138
+ { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); }
574
139
+
575
static const TypeInfo gicv3_its_info = {
140
+DEF_IMOPH(smopa_s)
141
+DEF_IMOPH(umopa_s)
142
+DEF_IMOPH(sumopa_s)
143
+DEF_IMOPH(usmopa_s)
144
+DEF_IMOPH(smopa_d)
145
+DEF_IMOPH(umopa_d)
146
+DEF_IMOPH(sumopa_d)
147
+DEF_IMOPH(usmopa_d)
148
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/target/arm/translate-sme.c
151
+++ b/target/arm/translate-sme.c
152
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f
153
154
/* TODO: FEAT_EBF16 */
155
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
156
+
157
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
158
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
159
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
160
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
161
+
162
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
163
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
164
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
165
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
576
--
166
--
577
2.20.1
167
2.25.1
578
579
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Read or write to uart registers when unclocked or in reset should be
3
This is an SVE instruction that operates using the SVE vector
4
ignored. Add the check there, and as a result of this, the check in
4
length but that it is present only if SME is implemented.
5
uart_write_tx_fifo() is now unnecessary.
6
5
7
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
10
Message-id: 20210901124521.30599-6-bmeng.cn@gmail.com
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
10
---
13
hw/char/cadence_uart.c | 15 ++++++++++-----
11
target/arm/sve.decode | 20 +++++++++++++
14
1 file changed, 10 insertions(+), 5 deletions(-)
12
target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
13
2 files changed, 77 insertions(+)
15
14
16
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
15
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/char/cadence_uart.c
17
--- a/target/arm/sve.decode
19
+++ b/hw/char/cadence_uart.c
18
+++ b/target/arm/sve.decode
20
@@ -XXX,XX +XXX,XX @@ static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond,
19
@@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
21
static void uart_write_tx_fifo(CadenceUARTState *s, const uint8_t *buf,
20
22
int size)
21
### SVE2 floating-point bfloat16 dot-product (indexed)
23
{
22
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
24
- /* ignore characters when unclocked or in reset */
23
+
25
- if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
24
+### SVE broadcast predicate element
26
- return;
25
+
27
- }
26
+&psel esz pd pn pm rv imm
28
-
27
+%psel_rv 16:2 !function=plus_12
29
if ((s->r[R_CR] & UART_CR_TX_DIS) || !(s->r[R_CR] & UART_CR_TX_EN)) {
28
+%psel_imm_b 22:2 19:2
30
return;
29
+%psel_imm_h 22:2 20:1
31
}
30
+%psel_imm_s 22:2
32
@@ -XXX,XX +XXX,XX @@ static MemTxResult uart_write(void *opaque, hwaddr offset,
31
+%psel_imm_d 23:1
33
{
32
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
34
CadenceUARTState *s = opaque;
33
+ &psel rv=%psel_rv
35
34
+
36
+ /* ignore access when unclocked or in reset */
35
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
37
+ if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
36
+ @psel esz=0 imm=%psel_imm_b
38
+ return MEMTX_ERROR;
37
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
38
+ @psel esz=1 imm=%psel_imm_h
39
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
40
+ @psel esz=2 imm=%psel_imm_s
41
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
42
+ @psel esz=3 imm=%psel_imm_d
43
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/arm/translate-sve.c
46
+++ b/target/arm/translate-sve.c
47
@@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
48
49
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
50
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
51
+
52
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
53
+{
54
+ int vl = vec_full_reg_size(s);
55
+ int pl = pred_gvec_reg_size(s);
56
+ int elements = vl >> a->esz;
57
+ TCGv_i64 tmp, didx, dbit;
58
+ TCGv_ptr ptr;
59
+
60
+ if (!dc_isar_feature(aa64_sme, s)) {
61
+ return false;
62
+ }
63
+ if (!sve_access_check(s)) {
64
+ return true;
39
+ }
65
+ }
40
+
66
+
41
DB_PRINT(" offset:%x data:%08x\n", (unsigned)offset, (unsigned)value);
67
+ tmp = tcg_temp_new_i64();
42
offset >>= 2;
68
+ dbit = tcg_temp_new_i64();
43
if (offset >= CADENCE_UART_R_MAX) {
69
+ didx = tcg_temp_new_i64();
44
@@ -XXX,XX +XXX,XX @@ static MemTxResult uart_read(void *opaque, hwaddr offset,
70
+ ptr = tcg_temp_new_ptr();
45
CadenceUARTState *s = opaque;
71
+
46
uint32_t c = 0;
72
+ /* Compute the predicate element. */
47
73
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
48
+ /* ignore access when unclocked or in reset */
74
+ if (is_power_of_2(elements)) {
49
+ if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
75
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
50
+ return MEMTX_ERROR;
76
+ } else {
77
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
51
+ }
78
+ }
52
+
79
+
53
offset >>= 2;
80
+ /* Extract the predicate byte and bit indices. */
54
if (offset >= CADENCE_UART_R_MAX) {
81
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
55
return MEMTX_DECODE_ERROR;
82
+ tcg_gen_andi_i64(dbit, tmp, 7);
83
+ tcg_gen_shri_i64(didx, tmp, 3);
84
+ if (HOST_BIG_ENDIAN) {
85
+ tcg_gen_xori_i64(didx, didx, 7);
86
+ }
87
+
88
+ /* Load the predicate word. */
89
+ tcg_gen_trunc_i64_ptr(ptr, didx);
90
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
91
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
92
+
93
+ /* Extract the predicate bit and replicate to MO_64. */
94
+ tcg_gen_shr_i64(tmp, tmp, dbit);
95
+ tcg_gen_andi_i64(tmp, tmp, 1);
96
+ tcg_gen_neg_i64(tmp, tmp);
97
+
98
+ /* Apply to either copy the source, or write zeros. */
99
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
100
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
101
+
102
+ tcg_temp_free_i64(tmp);
103
+ tcg_temp_free_i64(dbit);
104
+ tcg_temp_free_i64(didx);
105
+ tcg_temp_free_ptr(ptr);
106
+ return true;
107
+}
56
--
108
--
57
2.20.1
109
2.25.1
58
59
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-30-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper-sve.h | 2 ++
12
target/arm/sve.decode | 1 +
13
target/arm/sve_helper.c | 16 ++++++++++++++++
14
target/arm/translate-sve.c | 2 ++
15
4 files changed, 21 insertions(+)
16
17
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper-sve.h
20
+++ b/target/arm/helper-sve.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
23
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
26
+
27
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
28
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
29
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
30
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/arm/sve.decode
33
+++ b/target/arm/sve.decode
34
@@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
35
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
36
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
37
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
38
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
39
40
# SVE vector splice (predicated, destructive)
41
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
42
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/arm/sve_helper.c
45
+++ b/target/arm/sve_helper.c
46
@@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
47
48
DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
49
50
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
51
+{
52
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
53
+ uint64_t *d = vd, *n = vn;
54
+ uint8_t *pg = vg;
55
+
56
+ for (i = 0; i < opr_sz; i += 2) {
57
+ if (pg[H1(i)] & 1) {
58
+ uint64_t n0 = n[i + 0];
59
+ uint64_t n1 = n[i + 1];
60
+ d[i + 0] = n1;
61
+ d[i + 1] = n0;
62
+ }
63
+ }
64
+}
65
+
66
DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
67
DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
68
DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
69
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/target/arm/translate-sve.c
72
+++ b/target/arm/translate-sve.c
73
@@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
74
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
75
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
76
77
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
78
+
79
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
80
gen_helper_sve_splice, a, a->esz)
81
82
--
83
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
This is an SVE instruction that operates using the SVE vector
4
length but that it is present only if SME is implemented.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-31-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
target/arm/helper.h | 18 +++++++
12
target/arm/sve.decode | 5 ++
13
target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++
14
target/arm/vec_helper.c | 24 +++++++++
15
4 files changed, 149 insertions(+)
16
17
diff --git a/target/arm/helper.h b/target/arm/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/helper.h
20
+++ b/target/arm/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
22
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
23
void, ptr, ptr, ptr, ptr, ptr, i32)
24
25
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
26
+ void, ptr, ptr, ptr, ptr, i32)
27
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
28
+ void, ptr, ptr, ptr, ptr, i32)
29
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
30
+ void, ptr, ptr, ptr, ptr, i32)
31
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
32
+ void, ptr, ptr, ptr, ptr, i32)
33
+
34
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
35
+ void, ptr, ptr, ptr, ptr, i32)
36
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
37
+ void, ptr, ptr, ptr, ptr, i32)
38
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
39
+ void, ptr, ptr, ptr, ptr, i32)
40
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
41
+ void, ptr, ptr, ptr, ptr, i32)
42
+
43
#ifdef TARGET_AARCH64
44
#include "helper-a64.h"
45
#include "helper-sve.h"
46
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/sve.decode
49
+++ b/target/arm/sve.decode
50
@@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
51
@psel esz=2 imm=%psel_imm_s
52
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
53
@psel esz=3 imm=%psel_imm_d
54
+
55
+### SVE clamp
56
+
57
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
58
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
59
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/arm/translate-sve.c
62
+++ b/target/arm/translate-sve.c
63
@@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
64
tcg_temp_free_ptr(ptr);
65
return true;
66
}
67
+
68
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
69
+{
70
+ tcg_gen_smax_i32(d, a, n);
71
+ tcg_gen_smin_i32(d, d, m);
72
+}
73
+
74
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
75
+{
76
+ tcg_gen_smax_i64(d, a, n);
77
+ tcg_gen_smin_i64(d, d, m);
78
+}
79
+
80
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
81
+ TCGv_vec m, TCGv_vec a)
82
+{
83
+ tcg_gen_smax_vec(vece, d, a, n);
84
+ tcg_gen_smin_vec(vece, d, d, m);
85
+}
86
+
87
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
88
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
89
+{
90
+ static const TCGOpcode vecop[] = {
91
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
92
+ };
93
+ static const GVecGen4 ops[4] = {
94
+ { .fniv = gen_sclamp_vec,
95
+ .fno = gen_helper_gvec_sclamp_b,
96
+ .opt_opc = vecop,
97
+ .vece = MO_8 },
98
+ { .fniv = gen_sclamp_vec,
99
+ .fno = gen_helper_gvec_sclamp_h,
100
+ .opt_opc = vecop,
101
+ .vece = MO_16 },
102
+ { .fni4 = gen_sclamp_i32,
103
+ .fniv = gen_sclamp_vec,
104
+ .fno = gen_helper_gvec_sclamp_s,
105
+ .opt_opc = vecop,
106
+ .vece = MO_32 },
107
+ { .fni8 = gen_sclamp_i64,
108
+ .fniv = gen_sclamp_vec,
109
+ .fno = gen_helper_gvec_sclamp_d,
110
+ .opt_opc = vecop,
111
+ .vece = MO_64,
112
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
113
+ };
114
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
115
+}
116
+
117
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
118
+
119
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
120
+{
121
+ tcg_gen_umax_i32(d, a, n);
122
+ tcg_gen_umin_i32(d, d, m);
123
+}
124
+
125
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
126
+{
127
+ tcg_gen_umax_i64(d, a, n);
128
+ tcg_gen_umin_i64(d, d, m);
129
+}
130
+
131
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
132
+ TCGv_vec m, TCGv_vec a)
133
+{
134
+ tcg_gen_umax_vec(vece, d, a, n);
135
+ tcg_gen_umin_vec(vece, d, d, m);
136
+}
137
+
138
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
139
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
140
+{
141
+ static const TCGOpcode vecop[] = {
142
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
143
+ };
144
+ static const GVecGen4 ops[4] = {
145
+ { .fniv = gen_uclamp_vec,
146
+ .fno = gen_helper_gvec_uclamp_b,
147
+ .opt_opc = vecop,
148
+ .vece = MO_8 },
149
+ { .fniv = gen_uclamp_vec,
150
+ .fno = gen_helper_gvec_uclamp_h,
151
+ .opt_opc = vecop,
152
+ .vece = MO_16 },
153
+ { .fni4 = gen_uclamp_i32,
154
+ .fniv = gen_uclamp_vec,
155
+ .fno = gen_helper_gvec_uclamp_s,
156
+ .opt_opc = vecop,
157
+ .vece = MO_32 },
158
+ { .fni8 = gen_uclamp_i64,
159
+ .fniv = gen_uclamp_vec,
160
+ .fno = gen_helper_gvec_uclamp_d,
161
+ .opt_opc = vecop,
162
+ .vece = MO_64,
163
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
164
+ };
165
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
166
+}
167
+
168
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
169
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/target/arm/vec_helper.c
172
+++ b/target/arm/vec_helper.c
173
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
174
}
175
clear_tail(d, opr_sz, simd_maxsz(desc));
176
}
177
+
178
+#define DO_CLAMP(NAME, TYPE) \
179
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
180
+{ \
181
+ intptr_t i, opr_sz = simd_oprsz(desc); \
182
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
183
+ TYPE aa = *(TYPE *)(a + i); \
184
+ TYPE nn = *(TYPE *)(n + i); \
185
+ TYPE mm = *(TYPE *)(m + i); \
186
+ TYPE dd = MIN(MAX(aa, nn), mm); \
187
+ *(TYPE *)(d + i) = dd; \
188
+ } \
189
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
190
+}
191
+
192
+DO_CLAMP(gvec_sclamp_b, int8_t)
193
+DO_CLAMP(gvec_sclamp_h, int16_t)
194
+DO_CLAMP(gvec_sclamp_s, int32_t)
195
+DO_CLAMP(gvec_sclamp_d, int64_t)
196
+
197
+DO_CLAMP(gvec_uclamp_b, uint8_t)
198
+DO_CLAMP(gvec_uclamp_h, uint16_t)
199
+DO_CLAMP(gvec_uclamp_s, uint32_t)
200
+DO_CLAMP(gvec_uclamp_d, uint64_t)
201
--
202
2.25.1
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
We've got SW that expects FSBL (Bootlooader) to setup clocks and
3
We can handle both exception entry and exception return by
4
resets. It's quite common that users run that SW on QEMU without
4
hooking into aarch64_sve_change_el.
5
FSBL (FSBL typically requires the Xilinx tools installed). That's
6
fine, since users can stil use -device loader to enable clocks etc.
7
5
8
To help folks understand what's going, a log (guest-error) message
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
would be helpful here. In particular with the serial port since
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
things will go very quiet if they get things wrong.
8
Message-id: 20220708151540.18136-32-richard.henderson@linaro.org
11
12
Suggested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
13
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
14
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
15
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
16
Message-id: 20210901124521.30599-7-bmeng.cn@gmail.com
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
10
---
19
hw/char/cadence_uart.c | 8 ++++++++
11
target/arm/helper.c | 15 +++++++++++++--
20
1 file changed, 8 insertions(+)
12
1 file changed, 13 insertions(+), 2 deletions(-)
21
13
22
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
14
diff --git a/target/arm/helper.c b/target/arm/helper.c
23
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/char/cadence_uart.c
16
--- a/target/arm/helper.c
25
+++ b/hw/char/cadence_uart.c
17
+++ b/target/arm/helper.c
26
@@ -XXX,XX +XXX,XX @@ static int uart_can_receive(void *opaque)
18
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
27
28
/* ignore characters when unclocked or in reset */
29
if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
30
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
31
+ __func__);
32
return 0;
33
}
34
35
@@ -XXX,XX +XXX,XX @@ static void uart_event(void *opaque, QEMUChrEvent event)
36
37
/* ignore characters when unclocked or in reset */
38
if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
39
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
40
+ __func__);
41
return;
19
return;
42
}
20
}
43
21
44
@@ -XXX,XX +XXX,XX @@ static MemTxResult uart_write(void *opaque, hwaddr offset,
22
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
45
23
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
/* ignore access when unclocked or in reset */
24
+
47
if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
25
+ /*
48
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
26
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
49
+ __func__);
27
+ * invoke ResetSVEState when taking an exception from, or
50
return MEMTX_ERROR;
28
+ * returning to, AArch32 state when PSTATE.SM is enabled.
51
}
29
+ */
52
30
+ if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
53
@@ -XXX,XX +XXX,XX @@ static MemTxResult uart_read(void *opaque, hwaddr offset,
31
+ arm_reset_sve_state(env);
54
32
+ return;
55
/* ignore access when unclocked or in reset */
33
+ }
56
if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
34
+
57
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
35
/*
58
+ __func__);
36
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
59
return MEMTX_ERROR;
37
* at ELx, or not available because the EL is in AArch32 state, then
60
}
38
@@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
39
* we already have the correct register contents when encountering the
40
* vq0->vq0 transition between EL0->EL1.
41
*/
42
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
43
old_len = (old_a64 && !sve_exception_el(env, old_el)
44
? sve_vqm1_for_el(env, old_el) : 0);
45
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
46
new_len = (new_a64 && !sve_exception_el(env, new_el)
47
? sve_vqm1_for_el(env, new_el) : 0);
61
48
62
--
49
--
63
2.20.1
50
2.25.1
64
65
diff view generated by jsdifflib
1
The various MPS2 boards implemented in mps2.c have multiple I2C
1
From: Richard Henderson <richard.henderson@linaro.org>
2
buses: a bus dedicated to the audio configuration, one for the LCD
3
touchscreen controller, and two which are connected to the external
4
Shield expansion connector. Mark the buses which are used only for
5
board-internal devices as 'full' so that if the user creates i2c
6
devices on the commandline without specifying a bus name then they
7
will be connected to the I2C controller used for the Shield
8
connector, where guest software will expect them.
9
2
3
Note that SME remains effectively disabled for user-only,
4
because we do not yet set CPACR_EL1.SMEN. This needs to
5
wait until the kernel ABI is implemented.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-33-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20210903151435.22379-5-peter.maydell@linaro.org
13
---
11
---
14
hw/arm/mps2.c | 12 +++++++++++-
12
docs/system/arm/emulation.rst | 4 ++++
15
1 file changed, 11 insertions(+), 1 deletion(-)
13
target/arm/cpu64.c | 11 +++++++++++
14
2 files changed, 15 insertions(+)
16
15
17
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
16
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/arm/mps2.c
18
--- a/docs/system/arm/emulation.rst
20
+++ b/hw/arm/mps2.c
19
+++ b/docs/system/arm/emulation.rst
21
@@ -XXX,XX +XXX,XX @@ static void mps2_common_init(MachineState *machine)
20
@@ -XXX,XX +XXX,XX @@ the following architecture extensions:
22
0x40023000, /* Audio */
21
- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
23
0x40029000, /* Shield0 */
22
- FEAT_SM3 (Advanced SIMD SM3 instructions)
24
0x4002a000}; /* Shield1 */
23
- FEAT_SM4 (Advanced SIMD SM4 instructions)
25
- sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL);
24
+- FEAT_SME (Scalable Matrix Extension)
26
+ DeviceState *dev;
25
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
26
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
27
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
28
- FEAT_SPECRES (Speculation restriction instructions)
29
- FEAT_SSBS (Speculative Store Bypass Safe)
30
- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
31
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/arm/cpu64.c
34
+++ b/target/arm/cpu64.c
35
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
36
*/
37
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
38
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
39
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
40
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
41
cpu->isar.id_aa64pfr1 = t;
42
43
@@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj)
44
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */
45
cpu->isar.id_aa64dfr0 = t;
46
47
+ t = cpu->isar.id_aa64smfr0;
48
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
49
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
50
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
51
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
52
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
53
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
54
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
55
+ cpu->isar.id_aa64smfr0 = t;
27
+
56
+
28
+ dev = sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL);
57
/* Replicate the same data to the 32-bit id registers. */
29
+ if (i < 2) {
58
aa32_max_features(cpu);
30
+ /*
31
+ * internal-only bus: mark it full to avoid user-created
32
+ * i2c devices being plugged into it.
33
+ */
34
+ BusState *qbus = qdev_get_child_bus(dev, "i2c");
35
+ qbus_mark_full(qbus);
36
+ }
37
}
38
create_unimplemented_device("i2s", 0x40024000, 0x400);
39
59
40
--
60
--
41
2.20.1
61
2.25.1
42
43
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-34-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/target_cpu.h | 5 ++++-
9
1 file changed, 4 insertions(+), 1 deletion(-)
10
11
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/target_cpu.h
14
+++ b/linux-user/aarch64/target_cpu.h
15
@@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags)
16
17
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
18
{
19
- /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
20
+ /*
21
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
22
* different from AArch32 Linux, which uses TPIDRRO.
23
*/
24
env->cp15.tpidr_el[0] = newtls;
25
+ /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
26
+ env->cp15.tpidr2_el0 = 0;
27
}
28
29
static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
30
--
31
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-35-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/cpu_loop.c | 9 +++++++++
9
1 file changed, 9 insertions(+)
10
11
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/cpu_loop.c
14
+++ b/linux-user/aarch64/cpu_loop.c
15
@@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env)
16
17
switch (trapnr) {
18
case EXCP_SWI:
19
+ /*
20
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
21
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
22
+ */
23
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
24
+ env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
25
+ arm_rebuild_hflags(env);
26
+ arm_reset_sve_state(env);
27
+ }
28
ret = do_syscall(env,
29
env->xregs[8],
30
env->xregs[0],
31
--
32
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Make sure to zero the currently reserved fields.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-36-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
linux-user/aarch64/signal.c | 9 ++++++++-
11
1 file changed, 8 insertions(+), 1 deletion(-)
12
13
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/linux-user/aarch64/signal.c
16
+++ b/linux-user/aarch64/signal.c
17
@@ -XXX,XX +XXX,XX @@ struct target_extra_context {
18
struct target_sve_context {
19
struct target_aarch64_ctx head;
20
uint16_t vl;
21
- uint16_t reserved[3];
22
+ uint16_t flags;
23
+ uint16_t reserved[2];
24
/* The actual SVE data immediately follows. It is laid out
25
* according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
26
* the original struct pointer.
27
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
28
#define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
29
(TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
30
31
+#define TARGET_SVE_SIG_FLAG_SM 1
32
+
33
struct target_rt_sigframe {
34
struct target_siginfo info;
35
struct target_ucontext uc;
36
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
37
{
38
int i, j;
39
40
+ memset(sve, 0, sizeof(*sve));
41
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
42
__put_user(size, &sve->head.size);
43
__put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl);
44
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
45
+ __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags);
46
+ }
47
48
/* Note that SVE regs are stored as a byte stream, with each byte element
49
* at a subsequent address. This corresponds to a little-endian store
50
--
51
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Fold the return value setting into the goto, so each
4
point of failure need not do both.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-37-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/signal.c | 26 +++++++++++---------------
12
1 file changed, 11 insertions(+), 15 deletions(-)
13
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/aarch64/signal.c
17
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
19
struct target_sve_context *sve = NULL;
20
uint64_t extra_datap = 0;
21
bool used_extra = false;
22
- bool err = false;
23
int vq = 0, sve_size = 0;
24
25
target_restore_general_frame(env, sf);
26
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
27
switch (magic) {
28
case 0:
29
if (size != 0) {
30
- err = true;
31
- goto exit;
32
+ goto err;
33
}
34
if (used_extra) {
35
ctx = NULL;
36
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
37
38
case TARGET_FPSIMD_MAGIC:
39
if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
40
- err = true;
41
- goto exit;
42
+ goto err;
43
}
44
fpsimd = (struct target_fpsimd_context *)ctx;
45
break;
46
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
47
break;
48
}
49
}
50
- err = true;
51
- goto exit;
52
+ goto err;
53
54
case TARGET_EXTRA_MAGIC:
55
if (extra || size != sizeof(struct target_extra_context)) {
56
- err = true;
57
- goto exit;
58
+ goto err;
59
}
60
__get_user(extra_datap,
61
&((struct target_extra_context *)ctx)->datap);
62
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
63
/* Unknown record -- we certainly didn't generate it.
64
* Did we in fact get out of sync?
65
*/
66
- err = true;
67
- goto exit;
68
+ goto err;
69
}
70
ctx = (void *)ctx + size;
71
}
72
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
73
if (fpsimd) {
74
target_restore_fpsimd_record(env, fpsimd);
75
} else {
76
- err = true;
77
+ goto err;
78
}
79
80
/* SVE data, if present, overwrites FPSIMD data. */
81
if (sve) {
82
target_restore_sve_record(env, sve, vq);
83
}
84
-
85
- exit:
86
unlock_user(extra, extra_datap, 0);
87
- return err;
88
+ return 0;
89
+
90
+ err:
91
+ unlock_user(extra, extra_datap, 0);
92
+ return 1;
93
}
94
95
static abi_ulong get_sigframe(struct target_sigaction *ka,
96
--
97
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
In parse_user_sigframe, the kernel rejects duplicate sve records,
4
or records that are smaller than the header. We were silently
5
allowing these cases to pass, dropping the record.
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-id: 20220708151540.18136-38-richard.henderson@linaro.org
10
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
---
12
linux-user/aarch64/signal.c | 5 ++++-
13
1 file changed, 4 insertions(+), 1 deletion(-)
14
15
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/signal.c
18
+++ b/linux-user/aarch64/signal.c
19
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
20
break;
21
22
case TARGET_SVE_MAGIC:
23
+ if (sve || size < sizeof(struct target_sve_context)) {
24
+ goto err;
25
+ }
26
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
27
vq = sve_vq(env);
28
sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
29
- if (!sve && size == sve_size) {
30
+ if (size == sve_size) {
31
sve = (struct target_sve_context *)ctx;
32
break;
33
}
34
--
35
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-39-richard.henderson@linaro.org
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
---
8
linux-user/aarch64/signal.c | 3 +++
9
1 file changed, 3 insertions(+)
10
11
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/linux-user/aarch64/signal.c
14
+++ b/linux-user/aarch64/signal.c
15
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
16
__get_user(extra_size,
17
&((struct target_extra_context *)ctx)->size);
18
extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
19
+ if (!extra) {
20
+ return 1;
21
+ }
22
break;
23
24
default:
25
--
26
2.25.1
diff view generated by jsdifflib
1
By default, QEMU will allow devices to be plugged into a bus up to
1
From: Richard Henderson <richard.henderson@linaro.org>
2
the bus class's device count limit. If the user creates a device on
3
the command line or via the monitor and doesn't explicitly specify
4
the bus to plug it in, QEMU will plug it into the first non-full bus
5
that it finds.
6
2
7
This is fine in most cases, but some machines have multiple buses of
3
Move the checks out of the parsing loop and into the
8
a given type, some of which are dedicated to on-board devices and
4
restore function. This more closely mirrors the code
9
some of which have an externally exposed connector for user-pluggable
5
structure in the kernel, and is slightly clearer.
10
devices. One example is I2C buses.
11
6
12
Provide a new function qbus_mark_full() so that a machine model can
7
Reject rather than silently skip incorrect VL and SVE record sizes,
13
mark this kind of "internal only" bus as 'full' after it has created
8
bringing our checks in to line with those the kernel does.
14
all the devices that should be plugged into that bus. The "find a
15
non-full bus" algorithm will then skip the internal-only bus when
16
looking for a place to plug in user-created devices.
17
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-id: 20220708151540.18136-40-richard.henderson@linaro.org
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Message-id: 20210903151435.22379-2-peter.maydell@linaro.org
21
---
14
---
22
include/hw/qdev-core.h | 24 ++++++++++++++++++++++++
15
linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------
23
softmmu/qdev-monitor.c | 7 ++++++-
16
1 file changed, 35 insertions(+), 16 deletions(-)
24
2 files changed, 30 insertions(+), 1 deletion(-)
25
17
26
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
18
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
27
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/qdev-core.h
20
--- a/linux-user/aarch64/signal.c
29
+++ b/include/hw/qdev-core.h
21
+++ b/linux-user/aarch64/signal.c
30
@@ -XXX,XX +XXX,XX @@ struct BusState {
22
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
31
HotplugHandler *hotplug_handler;
23
}
32
int max_index;
33
bool realized;
34
+ bool full;
35
int num_children;
36
37
/*
38
@@ -XXX,XX +XXX,XX @@ static inline bool qbus_is_hotpluggable(BusState *bus)
39
return bus->hotplug_handler;
40
}
24
}
41
25
42
+/**
26
-static void target_restore_sve_record(CPUARMState *env,
43
+ * qbus_mark_full: Mark this bus as full, so no more devices can be attached
27
- struct target_sve_context *sve, int vq)
44
+ * @bus: Bus to mark as full
28
+static bool target_restore_sve_record(CPUARMState *env,
45
+ *
29
+ struct target_sve_context *sve,
46
+ * By default, QEMU will allow devices to be plugged into a bus up
30
+ int size)
47
+ * to the bus class's device count limit. Calling this function
31
{
48
+ * marks a particular bus as full, so that no more devices can be
32
- int i, j;
49
+ * plugged into it. In particular this means that the bus will not
33
+ int i, j, vl, vq;
50
+ * be considered as a candidate for plugging in devices created by
34
51
+ * the user on the commandline or via the monitor.
35
- /* Note that SVE regs are stored as a byte stream, with each byte element
52
+ * If a machine has multiple buses of a given type, such as I2C,
36
+ if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
53
+ * where some of those buses in the real hardware are used only for
37
+ return false;
54
+ * internal devices and some are exposed via expansion ports, you
38
+ }
55
+ * can use this function to mark the internal-only buses as full
56
+ * after you have created all their internal devices. Then user
57
+ * created devices will appear on the expansion-port bus where
58
+ * guest software expects them.
59
+ */
60
+static inline void qbus_mark_full(BusState *bus)
61
+{
62
+ bus->full = true;
63
+}
64
+
39
+
65
void device_listener_register(DeviceListener *listener);
40
+ __get_user(vl, &sve->vl);
66
void device_listener_unregister(DeviceListener *listener);
41
+ vq = sve_vq(env);
67
68
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/softmmu/qdev-monitor.c
71
+++ b/softmmu/qdev-monitor.c
72
@@ -XXX,XX +XXX,XX @@ static DeviceState *qbus_find_dev(BusState *bus, char *elem)
73
74
static inline bool qbus_is_full(BusState *bus)
75
{
76
- BusClass *bus_class = BUS_GET_CLASS(bus);
77
+ BusClass *bus_class;
78
+
42
+
79
+ if (bus->full) {
43
+ /* Reject mismatched VL. */
44
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
45
+ return false;
46
+ }
47
+
48
+ /* Accept empty record -- used to clear PSTATE.SM. */
49
+ if (size <= sizeof(*sve)) {
80
+ return true;
50
+ return true;
81
+ }
51
+ }
82
+ bus_class = BUS_GET_CLASS(bus);
52
+
83
return bus_class->max_dev && bus->num_children >= bus_class->max_dev;
53
+ /* Reject non-empty but incomplete record. */
54
+ if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
55
+ return false;
56
+ }
57
+
58
+ /*
59
+ * Note that SVE regs are stored as a byte stream, with each byte element
60
* at a subsequent address. This corresponds to a little-endian load
61
* of our 64-bit hunks.
62
*/
63
@@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env,
64
}
65
}
66
}
67
+ return true;
84
}
68
}
85
69
70
static int target_restore_sigframe(CPUARMState *env,
71
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
72
struct target_sve_context *sve = NULL;
73
uint64_t extra_datap = 0;
74
bool used_extra = false;
75
- int vq = 0, sve_size = 0;
76
+ int sve_size = 0;
77
78
target_restore_general_frame(env, sf);
79
80
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
81
if (sve || size < sizeof(struct target_sve_context)) {
82
goto err;
83
}
84
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
85
- vq = sve_vq(env);
86
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
87
- if (size == sve_size) {
88
- sve = (struct target_sve_context *)ctx;
89
- break;
90
- }
91
- }
92
- goto err;
93
+ sve = (struct target_sve_context *)ctx;
94
+ sve_size = size;
95
+ break;
96
97
case TARGET_EXTRA_MAGIC:
98
if (extra || size != sizeof(struct target_extra_context)) {
99
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
100
}
101
102
/* SVE data, if present, overwrites FPSIMD data. */
103
- if (sve) {
104
- target_restore_sve_record(env, sve, vq);
105
+ if (sve && !target_restore_sve_record(env, sve, sve_size)) {
106
+ goto err;
107
}
108
unlock_user(extra, extra_datap, 0);
109
return 0;
86
--
110
--
87
2.20.1
111
2.25.1
88
89
diff view generated by jsdifflib
1
From: Bin Meng <bmeng.cn@gmail.com>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
This converts uart_read() and uart_write() to memop_with_attrs() ops.
3
Set the SM bit in the SVE record on signal delivery, create the ZA record.
4
Restore SM and ZA state according to the records present on return.
4
5
5
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Message-id: 20220708151540.18136-41-richard.henderson@linaro.org
8
Message-id: 20210901124521.30599-5-bmeng.cn@gmail.com
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
10
---
11
hw/char/cadence_uart.c | 26 +++++++++++++++-----------
11
linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++---
12
1 file changed, 15 insertions(+), 11 deletions(-)
12
1 file changed, 154 insertions(+), 13 deletions(-)
13
13
14
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
14
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/char/cadence_uart.c
16
--- a/linux-user/aarch64/signal.c
17
+++ b/hw/char/cadence_uart.c
17
+++ b/linux-user/aarch64/signal.c
18
@@ -XXX,XX +XXX,XX @@ static void uart_read_rx_fifo(CadenceUARTState *s, uint32_t *c)
18
@@ -XXX,XX +XXX,XX @@ struct target_sve_context {
19
uart_update_status(s);
19
20
#define TARGET_SVE_SIG_FLAG_SM 1
21
22
+#define TARGET_ZA_MAGIC 0x54366345
23
+
24
+struct target_za_context {
25
+ struct target_aarch64_ctx head;
26
+ uint16_t vl;
27
+ uint16_t reserved[3];
28
+ /* The actual ZA data immediately follows. */
29
+};
30
+
31
+#define TARGET_ZA_SIG_REGS_OFFSET \
32
+ QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
33
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
34
+ (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
35
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
36
+ TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
37
+
38
struct target_rt_sigframe {
39
struct target_siginfo info;
40
struct target_ucontext uc;
41
@@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end)
20
}
42
}
21
43
22
-static void uart_write(void *opaque, hwaddr offset,
44
static void target_setup_sve_record(struct target_sve_context *sve,
23
- uint64_t value, unsigned size)
45
- CPUARMState *env, int vq, int size)
24
+static MemTxResult uart_write(void *opaque, hwaddr offset,
46
+ CPUARMState *env, int size)
25
+ uint64_t value, unsigned size, MemTxAttrs attrs)
47
{
26
{
48
- int i, j;
27
CadenceUARTState *s = opaque;
49
+ int i, j, vq = sve_vq(env);
28
50
29
DB_PRINT(" offset:%x data:%08x\n", (unsigned)offset, (unsigned)value);
51
memset(sve, 0, sizeof(*sve));
30
offset >>= 2;
52
__put_user(TARGET_SVE_MAGIC, &sve->head.magic);
31
if (offset >= CADENCE_UART_R_MAX) {
53
@@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve,
32
- return;
54
}
33
+ return MEMTX_DECODE_ERROR;
34
}
35
switch (offset) {
36
case R_IER: /* ier (wts imr) */
37
@@ -XXX,XX +XXX,XX @@ static void uart_write(void *opaque, hwaddr offset,
38
break;
39
}
40
uart_update_status(s);
41
+
42
+ return MEMTX_OK;
43
}
55
}
44
56
45
-static uint64_t uart_read(void *opaque, hwaddr offset,
57
+static void target_setup_za_record(struct target_za_context *za,
46
- unsigned size)
58
+ CPUARMState *env, int size)
47
+static MemTxResult uart_read(void *opaque, hwaddr offset,
59
+{
48
+ uint64_t *value, unsigned size, MemTxAttrs attrs)
60
+ int vq = sme_vq(env);
49
{
61
+ int vl = vq * TARGET_SVE_VQ_BYTES;
50
CadenceUARTState *s = opaque;
62
+ int i, j;
51
uint32_t c = 0;
63
+
52
64
+ memset(za, 0, sizeof(*za));
53
offset >>= 2;
65
+ __put_user(TARGET_ZA_MAGIC, &za->head.magic);
54
if (offset >= CADENCE_UART_R_MAX) {
66
+ __put_user(size, &za->head.size);
55
- c = 0;
67
+ __put_user(vl, &za->vl);
56
- } else if (offset == R_TX_RX) {
68
+
57
+ return MEMTX_DECODE_ERROR;
69
+ if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
58
+ }
70
+ return;
59
+ if (offset == R_TX_RX) {
71
+ }
60
uart_read_rx_fifo(s, &c);
72
+ assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
61
} else {
73
+
62
- c = s->r[offset];
74
+ /*
63
+ c = s->r[offset];
75
+ * Note that ZA vectors are stored as a byte stream,
64
}
76
+ * with each byte element at a subsequent address.
65
77
+ */
66
DB_PRINT(" offset:%x data:%08x\n", (unsigned)(offset << 2), (unsigned)c);
78
+ for (i = 0; i < vl; ++i) {
67
- return c;
79
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
68
+ *value = c;
80
+ for (j = 0; j < vq * 2; ++j) {
69
+ return MEMTX_OK;
81
+ __put_user_e(env->zarray[i].d[j], z + j, le);
82
+ }
83
+ }
84
+}
85
+
86
static void target_restore_general_frame(CPUARMState *env,
87
struct target_rt_sigframe *sf)
88
{
89
@@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env,
90
91
static bool target_restore_sve_record(CPUARMState *env,
92
struct target_sve_context *sve,
93
- int size)
94
+ int size, int *svcr)
95
{
96
- int i, j, vl, vq;
97
+ int i, j, vl, vq, flags;
98
+ bool sm;
99
100
- if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
101
+ __get_user(vl, &sve->vl);
102
+ __get_user(flags, &sve->flags);
103
+
104
+ sm = flags & TARGET_SVE_SIG_FLAG_SM;
105
+
106
+ /* The cpu must support Streaming or Non-streaming SVE. */
107
+ if (sm
108
+ ? !cpu_isar_feature(aa64_sme, env_archcpu(env))
109
+ : !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
110
return false;
111
}
112
113
- __get_user(vl, &sve->vl);
114
- vq = sve_vq(env);
115
+ /*
116
+ * Note that we cannot use sve_vq() because that depends on the
117
+ * current setting of PSTATE.SM, not the state to be restored.
118
+ */
119
+ vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
120
121
/* Reject mismatched VL. */
122
if (vl != vq * TARGET_SVE_VQ_BYTES) {
123
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
124
return false;
125
}
126
127
+ *svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
128
+
129
/*
130
* Note that SVE regs are stored as a byte stream, with each byte element
131
* at a subsequent address. This corresponds to a little-endian load
132
@@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env,
133
return true;
70
}
134
}
71
135
72
static const MemoryRegionOps uart_ops = {
136
+static bool target_restore_za_record(CPUARMState *env,
73
- .read = uart_read,
137
+ struct target_za_context *za,
74
- .write = uart_write,
138
+ int size, int *svcr)
75
+ .read_with_attrs = uart_read,
139
+{
76
+ .write_with_attrs = uart_write,
140
+ int i, j, vl, vq;
77
.endianness = DEVICE_NATIVE_ENDIAN,
141
+
78
};
142
+ if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
79
143
+ return false;
144
+ }
145
+
146
+ __get_user(vl, &za->vl);
147
+ vq = sme_vq(env);
148
+
149
+ /* Reject mismatched VL. */
150
+ if (vl != vq * TARGET_SVE_VQ_BYTES) {
151
+ return false;
152
+ }
153
+
154
+ /* Accept empty record -- used to clear PSTATE.ZA. */
155
+ if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
156
+ return true;
157
+ }
158
+
159
+ /* Reject non-empty but incomplete record. */
160
+ if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
161
+ return false;
162
+ }
163
+
164
+ *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1);
165
+
166
+ for (i = 0; i < vl; ++i) {
167
+ uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
168
+ for (j = 0; j < vq * 2; ++j) {
169
+ __get_user_e(env->zarray[i].d[j], z + j, le);
170
+ }
171
+ }
172
+ return true;
173
+}
174
+
175
static int target_restore_sigframe(CPUARMState *env,
176
struct target_rt_sigframe *sf)
177
{
178
struct target_aarch64_ctx *ctx, *extra = NULL;
179
struct target_fpsimd_context *fpsimd = NULL;
180
struct target_sve_context *sve = NULL;
181
+ struct target_za_context *za = NULL;
182
uint64_t extra_datap = 0;
183
bool used_extra = false;
184
int sve_size = 0;
185
+ int za_size = 0;
186
+ int svcr = 0;
187
188
target_restore_general_frame(env, sf);
189
190
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
191
sve_size = size;
192
break;
193
194
+ case TARGET_ZA_MAGIC:
195
+ if (za || size < sizeof(struct target_za_context)) {
196
+ goto err;
197
+ }
198
+ za = (struct target_za_context *)ctx;
199
+ za_size = size;
200
+ break;
201
+
202
case TARGET_EXTRA_MAGIC:
203
if (extra || size != sizeof(struct target_extra_context)) {
204
goto err;
205
@@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env,
206
}
207
208
/* SVE data, if present, overwrites FPSIMD data. */
209
- if (sve && !target_restore_sve_record(env, sve, sve_size)) {
210
+ if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) {
211
goto err;
212
}
213
+ if (za && !target_restore_za_record(env, za, za_size, &svcr)) {
214
+ goto err;
215
+ }
216
+ if (env->svcr != svcr) {
217
+ env->svcr = svcr;
218
+ arm_rebuild_hflags(env);
219
+ }
220
unlock_user(extra, extra_datap, 0);
221
return 0;
222
223
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
224
.total_size = offsetof(struct target_rt_sigframe,
225
uc.tuc_mcontext.__reserved),
226
};
227
- int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0;
228
+ int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0;
229
+ int sve_size = 0, za_size = 0;
230
struct target_rt_sigframe *frame;
231
struct target_rt_frame_record *fr;
232
abi_ulong frame_addr, return_addr;
233
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
234
&layout);
235
236
/* SVE state needs saving only if it exists. */
237
- if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
238
- vq = sve_vq(env);
239
- sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
240
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env)) ||
241
+ cpu_isar_feature(aa64_sme, env_archcpu(env))) {
242
+ sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16);
243
sve_ofs = alloc_sigframe_space(sve_size, &layout);
244
}
245
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
246
+ /* ZA state needs saving only if it is enabled. */
247
+ if (FIELD_EX64(env->svcr, SVCR, ZA)) {
248
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env));
249
+ } else {
250
+ za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0);
251
+ }
252
+ za_ofs = alloc_sigframe_space(za_size, &layout);
253
+ }
254
255
if (layout.extra_ofs) {
256
/* Reserve space for the extra end marker. The standard end marker
257
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
258
target_setup_end_record((void *)frame + layout.extra_end_ofs);
259
}
260
if (sve_ofs) {
261
- target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size);
262
+ target_setup_sve_record((void *)frame + sve_ofs, env, sve_size);
263
+ }
264
+ if (za_ofs) {
265
+ target_setup_za_record((void *)frame + za_ofs, env, za_size);
266
}
267
268
/* Set up the stack frame for unwinding. */
269
@@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
270
env->btype = 2;
271
}
272
273
+ /*
274
+ * Invoke the signal handler with both SM and ZA disabled.
275
+ * When clearing SM, ResetSVEState, per SMSTOP.
276
+ */
277
+ if (FIELD_EX64(env->svcr, SVCR, SM)) {
278
+ arm_reset_sve_state(env);
279
+ }
280
+ if (env->svcr) {
281
+ env->svcr = 0;
282
+ arm_rebuild_hflags(env);
283
+ }
284
+
285
if (info) {
286
tswap_siginfo(&frame->info, info);
287
env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
80
--
288
--
81
2.20.1
289
2.25.1
82
83
diff view generated by jsdifflib
1
The mps2-tz boards use a data-driven structure to create the devices
1
From: Richard Henderson <richard.henderson@linaro.org>
2
that sit behind peripheral protection controllers. Currently the
3
functions which create these devices are passed an 'opaque' pointer
4
which is always the address within the machine struct of the device
5
to create, and some "all devices need this" information like irqs and
6
addresses.
7
2
8
If a specific device needs more information than this, it is
3
Add "sve" to the sve prctl functions, to distinguish
9
currently not possible to pass that through from the PPCInfo
4
them from the coming "sme" prctls with similar names.
10
data structure. Add support for passing an extra data parameter,
11
so that we can more flexibly handle the needs of specific
12
device types. To provide some type-safety we make this extra
13
parameter a pointer to a union (which initially has no members).
14
5
15
In particular, we would like to be able to indicate which of the
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
16
i2c controllers are for on-board devices only and which are
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
connected to the external 'shield' expansion port; a subsequent
8
Message-id: 20220708151540.18136-42-richard.henderson@linaro.org
18
patch will use this mechanism for that purpose.
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 8 ++++----
12
linux-user/syscall.c | 12 ++++++------
13
2 files changed, 10 insertions(+), 10 deletions(-)
19
14
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-id: 20210903151435.22379-3-peter.maydell@linaro.org
23
---
24
hw/arm/mps2-tz.c | 35 ++++++++++++++++++++++-------------
25
1 file changed, 22 insertions(+), 13 deletions(-)
26
27
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
28
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/arm/mps2-tz.c
17
--- a/linux-user/aarch64/target_prctl.h
30
+++ b/hw/arm/mps2-tz.c
18
+++ b/linux-user/aarch64/target_prctl.h
31
@@ -XXX,XX +XXX,XX @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno)
19
@@ -XXX,XX +XXX,XX @@
20
#ifndef AARCH64_TARGET_PRCTL_H
21
#define AARCH64_TARGET_PRCTL_H
22
23
-static abi_long do_prctl_get_vl(CPUArchState *env)
24
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
25
{
26
ARMCPU *cpu = env_archcpu(env);
27
if (cpu_isar_feature(aa64_sve, cpu)) {
28
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env)
32
}
29
}
30
return -TARGET_EINVAL;
33
}
31
}
34
32
-#define do_prctl_get_vl do_prctl_get_vl
35
+/* Union describing the device-specific extra data we pass to the devfn. */
33
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
36
+typedef union PPCExtraData {
34
37
+} PPCExtraData;
35
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
38
+
36
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
39
/* Most of the devices in the AN505 FPGA image sit behind
40
* Peripheral Protection Controllers. These data structures
41
* define the layout of which devices sit behind which PPCs.
42
@@ -XXX,XX +XXX,XX @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno)
43
*/
44
typedef MemoryRegion *MakeDevFn(MPS2TZMachineState *mms, void *opaque,
45
const char *name, hwaddr size,
46
- const int *irqs);
47
+ const int *irqs,
48
+ const PPCExtraData *extradata);
49
50
typedef struct PPCPortInfo {
51
const char *name;
52
@@ -XXX,XX +XXX,XX @@ typedef struct PPCPortInfo {
53
hwaddr addr;
54
hwaddr size;
55
int irqs[3]; /* currently no device needs more IRQ lines than this */
56
+ PPCExtraData extradata; /* to pass device-specific info to the devfn */
57
} PPCPortInfo;
58
59
typedef struct PPCInfo {
60
@@ -XXX,XX +XXX,XX @@ typedef struct PPCInfo {
61
static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms,
62
void *opaque,
63
const char *name, hwaddr size,
64
- const int *irqs)
65
+ const int *irqs,
66
+ const PPCExtraData *extradata)
67
{
68
/* Initialize, configure and realize a TYPE_UNIMPLEMENTED_DEVICE,
69
* and return a pointer to its MemoryRegion.
70
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms,
71
72
static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque,
73
const char *name, hwaddr size,
74
- const int *irqs)
75
+ const int *irqs, const PPCExtraData *extradata)
76
{
77
/* The irq[] array is tx, rx, combined, in that order */
78
MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
79
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque,
80
81
static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
82
const char *name, hwaddr size,
83
- const int *irqs)
84
+ const int *irqs, const PPCExtraData *extradata)
85
{
86
MPS2SCC *scc = opaque;
87
DeviceState *sccdev;
88
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
89
90
static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
91
const char *name, hwaddr size,
92
- const int *irqs)
93
+ const int *irqs, const PPCExtraData *extradata)
94
{
95
MPS2FPGAIO *fpgaio = opaque;
96
MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
97
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
98
99
static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
100
const char *name, hwaddr size,
101
- const int *irqs)
102
+ const int *irqs,
103
+ const PPCExtraData *extradata)
104
{
105
SysBusDevice *s;
106
NICInfo *nd = &nd_table[0];
107
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
108
109
static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque,
110
const char *name, hwaddr size,
111
- const int *irqs)
112
+ const int *irqs,
113
+ const PPCExtraData *extradata)
114
{
37
{
115
/*
38
/*
116
* The AN524 makes the ethernet and USB share a PPC port.
39
* We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
117
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque,
40
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
118
41
}
119
static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque,
42
return -TARGET_EINVAL;
120
const char *name, hwaddr size,
43
}
121
- const int *irqs)
44
-#define do_prctl_set_vl do_prctl_set_vl
122
+ const int *irqs, const PPCExtraData *extradata)
45
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
46
47
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
123
{
48
{
124
TZMPC *mpc = opaque;
49
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
125
int i = mpc - &mms->mpc[0];
50
index XXXXXXX..XXXXXXX 100644
126
@@ -XXX,XX +XXX,XX @@ static void remap_irq_fn(void *opaque, int n, int level)
51
--- a/linux-user/syscall.c
127
52
+++ b/linux-user/syscall.c
128
static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque,
53
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
129
const char *name, hwaddr size,
54
#ifndef do_prctl_set_fp_mode
130
- const int *irqs)
55
#define do_prctl_set_fp_mode do_prctl_inval1
131
+ const int *irqs, const PPCExtraData *extradata)
56
#endif
132
{
57
-#ifndef do_prctl_get_vl
133
/* The irq[] array is DMACINTR, DMACINTERR, DMACINTTC, in that order */
58
-#define do_prctl_get_vl do_prctl_inval0
134
PL080State *dma = opaque;
59
+#ifndef do_prctl_sve_get_vl
135
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque,
60
+#define do_prctl_sve_get_vl do_prctl_inval0
136
61
#endif
137
static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque,
62
-#ifndef do_prctl_set_vl
138
const char *name, hwaddr size,
63
-#define do_prctl_set_vl do_prctl_inval1
139
- const int *irqs)
64
+#ifndef do_prctl_sve_set_vl
140
+ const int *irqs, const PPCExtraData *extradata)
65
+#define do_prctl_sve_set_vl do_prctl_inval1
141
{
66
#endif
142
/*
67
#ifndef do_prctl_reset_keys
143
* The AN505 has five PL022 SPI controllers.
68
#define do_prctl_reset_keys do_prctl_inval1
144
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque,
69
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
145
70
case PR_SET_FP_MODE:
146
static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque,
71
return do_prctl_set_fp_mode(env, arg2);
147
const char *name, hwaddr size,
72
case PR_SVE_GET_VL:
148
- const int *irqs)
73
- return do_prctl_get_vl(env);
149
+ const int *irqs, const PPCExtraData *extradata)
74
+ return do_prctl_sve_get_vl(env);
150
{
75
case PR_SVE_SET_VL:
151
ArmSbconI2CState *i2c = opaque;
76
- return do_prctl_set_vl(env, arg2);
152
SysBusDevice *s;
77
+ return do_prctl_sve_set_vl(env, arg2);
153
@@ -XXX,XX +XXX,XX @@ static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque,
78
case PR_PAC_RESET_KEYS:
154
79
if (arg3 || arg4 || arg5) {
155
static MemoryRegion *make_rtc(MPS2TZMachineState *mms, void *opaque,
80
return -TARGET_EINVAL;
156
const char *name, hwaddr size,
157
- const int *irqs)
158
+ const int *irqs, const PPCExtraData *extradata)
159
{
160
PL031State *pl031 = opaque;
161
SysBusDevice *s;
162
@@ -XXX,XX +XXX,XX @@ static void mps2tz_common_init(MachineState *machine)
163
}
164
165
mr = pinfo->devfn(mms, pinfo->opaque, pinfo->name, pinfo->size,
166
- pinfo->irqs);
167
+ pinfo->irqs, &pinfo->extradata);
168
portname = g_strdup_printf("port[%d]", port);
169
object_property_set_link(OBJECT(ppc), portname, OBJECT(mr),
170
&error_fatal);
171
--
81
--
172
2.20.1
82
2.25.1
173
174
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
These prctl set the Streaming SVE vector length, which may
4
be completely different from the Normal SVE vector length.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-id: 20220708151540.18136-43-richard.henderson@linaro.org
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
10
---
11
linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++
12
linux-user/syscall.c | 16 +++++++++
13
2 files changed, 70 insertions(+)
14
15
diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/linux-user/aarch64/target_prctl.h
18
+++ b/linux-user/aarch64/target_prctl.h
19
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
20
{
21
ARMCPU *cpu = env_archcpu(env);
22
if (cpu_isar_feature(aa64_sve, cpu)) {
23
+ /* PSTATE.SM is always unset on syscall entry. */
24
return sve_vq(env) * 16;
25
}
26
return -TARGET_EINVAL;
27
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
28
&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
29
uint32_t vq, old_vq;
30
31
+ /* PSTATE.SM is always unset on syscall entry. */
32
old_vq = sve_vq(env);
33
34
/*
35
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
36
}
37
#define do_prctl_sve_set_vl do_prctl_sve_set_vl
38
39
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
40
+{
41
+ ARMCPU *cpu = env_archcpu(env);
42
+ if (cpu_isar_feature(aa64_sme, cpu)) {
43
+ return sme_vq(env) * 16;
44
+ }
45
+ return -TARGET_EINVAL;
46
+}
47
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
48
+
49
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
50
+{
51
+ /*
52
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
53
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
54
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
55
+ */
56
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))
57
+ && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
58
+ int vq, old_vq;
59
+
60
+ old_vq = sme_vq(env);
61
+
62
+ /*
63
+ * Bound the value of vq, so that we know that it fits into
64
+ * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared
65
+ * on syscall entry, we are not modifying the current SVE
66
+ * vector length.
67
+ */
68
+ vq = MAX(arg2 / 16, 1);
69
+ vq = MIN(vq, 16);
70
+ env->vfp.smcr_el[1] =
71
+ FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
72
+
73
+ /* Delay rebuilding hflags until we know if ZA must change. */
74
+ vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
75
+
76
+ if (vq != old_vq) {
77
+ /*
78
+ * PSTATE.ZA state is cleared on any change to SVL.
79
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
80
+ * cleared on syscall entry, so this hasn't changed VL.
81
+ */
82
+ env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
83
+ arm_rebuild_hflags(env);
84
+ }
85
+ return vq * 16;
86
+ }
87
+ return -TARGET_EINVAL;
88
+}
89
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
90
+
91
static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
92
{
93
ARMCPU *cpu = env_archcpu(env);
94
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/linux-user/syscall.c
97
+++ b/linux-user/syscall.c
98
@@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
99
#ifndef PR_SET_SYSCALL_USER_DISPATCH
100
# define PR_SET_SYSCALL_USER_DISPATCH 59
101
#endif
102
+#ifndef PR_SME_SET_VL
103
+# define PR_SME_SET_VL 63
104
+# define PR_SME_GET_VL 64
105
+# define PR_SME_VL_LEN_MASK 0xffff
106
+# define PR_SME_VL_INHERIT (1 << 17)
107
+#endif
108
109
#include "target_prctl.h"
110
111
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
112
#ifndef do_prctl_set_unalign
113
#define do_prctl_set_unalign do_prctl_inval1
114
#endif
115
+#ifndef do_prctl_sme_get_vl
116
+#define do_prctl_sme_get_vl do_prctl_inval0
117
+#endif
118
+#ifndef do_prctl_sme_set_vl
119
+#define do_prctl_sme_set_vl do_prctl_inval1
120
+#endif
121
122
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
123
abi_long arg3, abi_long arg4, abi_long arg5)
124
@@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
125
return do_prctl_sve_get_vl(env);
126
case PR_SVE_SET_VL:
127
return do_prctl_sve_set_vl(env, arg2);
128
+ case PR_SME_GET_VL:
129
+ return do_prctl_sme_get_vl(env);
130
+ case PR_SME_SET_VL:
131
+ return do_prctl_sme_set_vl(env, arg2);
132
case PR_PAC_RESET_KEYS:
133
if (arg3 || arg4 || arg5) {
134
return -TARGET_EINVAL;
135
--
136
2.25.1
diff view generated by jsdifflib
New patch
1
From: Richard Henderson <richard.henderson@linaro.org>
1
2
3
There's no reason to set CPACR_EL1.ZEN if SVE disabled.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-44-richard.henderson@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
10
target/arm/cpu.c | 7 +++----
11
1 file changed, 3 insertions(+), 4 deletions(-)
12
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/cpu.c
16
+++ b/target/arm/cpu.c
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
18
/* and to the FP/Neon instructions */
19
env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
20
CPACR_EL1, FPEN, 3);
21
- /* and to the SVE instructions */
22
- env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
23
- CPACR_EL1, ZEN, 3);
24
- /* with reasonable vector length */
25
+ /* and to the SVE instructions, with default vector length */
26
if (cpu_isar_feature(aa64_sve, cpu)) {
27
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
28
+ CPACR_EL1, ZEN, 3);
29
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
30
}
31
/*
32
--
33
2.25.1
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Updated expected IORT files applicable with latest GICv3
3
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.
4
ITS changes.
5
4
6
Full diff of new file disassembly:
7
8
/*
9
* Intel ACPI Component Architecture
10
* AML/ASL+ Disassembler version 20180629 (64-bit version)
11
* Copyright (c) 2000 - 2018 Intel Corporation
12
*
13
* Disassembly of tests/data/acpi/virt/IORT.pxb, Tue Jun 29 17:35:38 2021
14
*
15
* ACPI Data Table [IORT]
16
*
17
* Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
18
*/
19
20
[000h 0000 4] Signature : "IORT" [IO Remapping Table]
21
[004h 0004 4] Table Length : 0000007C
22
[008h 0008 1] Revision : 00
23
[009h 0009 1] Checksum : 07
24
[00Ah 0010 6] Oem ID : "BOCHS "
25
[010h 0016 8] Oem Table ID : "BXPC "
26
[018h 0024 4] Oem Revision : 00000001
27
[01Ch 0028 4] Asl Compiler ID : "BXPC"
28
[020h 0032 4] Asl Compiler Revision : 00000001
29
30
[024h 0036 4] Node Count : 00000002
31
[028h 0040 4] Node Offset : 00000030
32
[02Ch 0044 4] Reserved : 00000000
33
34
[030h 0048 1] Type : 00
35
[031h 0049 2] Length : 0018
36
[033h 0051 1] Revision : 00
37
[034h 0052 4] Reserved : 00000000
38
[038h 0056 4] Mapping Count : 00000000
39
[03Ch 0060 4] Mapping Offset : 00000000
40
41
[040h 0064 4] ItsCount : 00000001
42
[044h 0068 4] Identifiers : 00000000
43
44
[048h 0072 1] Type : 02
45
[049h 0073 2] Length : 0034
46
[04Bh 0075 1] Revision : 00
47
[04Ch 0076 4] Reserved : 00000000
48
[050h 0080 4] Mapping Count : 00000001
49
[054h 0084 4] Mapping Offset : 00000020
50
51
[058h 0088 8] Memory Properties : [IORT Memory Access Properties]
52
[058h 0088 4] Cache Coherency : 00000001
53
[05Ch 0092 1] Hints (decoded below) : 00
54
Transient : 0
55
Write Allocate : 0
56
Read Allocate : 0
57
Override : 0
58
[05Dh 0093 2] Reserved : 0000
59
[05Fh 0095 1] Memory Flags (decoded below) : 03
60
Coherency : 1
61
Device Attribute : 1
62
[060h 0096 4] ATS Attribute : 00000000
63
[064h 0100 4] PCI Segment Number : 00000000
64
[068h 0104 1] Memory Size Limit : 00
65
[069h 0105 3] Reserved : 000000
66
67
[068h 0104 4] Input base : 00000000
68
[06Ch 0108 4] ID Count : 0000FFFF
69
[070h 0112 4] Output Base : 00000000
70
[074h 0116 4] Output Reference : 00000030
71
[078h 0120 4] Flags (decoded below) : 00000000
72
Single Mapping : 0
73
74
Raw Table Data: Length 124 (0x7C)
75
76
0000: 49 4F 52 54 7C 00 00 00 00 07 42 4F 43 48 53 20 // IORT|.....BOCHS
77
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC
78
0020: 01 00 00 00 02 00 00 00 30 00 00 00 00 00 00 00 // ........0.......
79
0030: 00 18 00 00 00 00 00 00 00 00 00 00 00 00 00 00 // ................
80
0040: 01 00 00 00 00 00 00 00 02 34 00 00 00 00 00 00 // .........4......
81
0050: 01 00 00 00 20 00 00 00 01 00 00 00 00 00 00 03 // .... ...........
82
0060: 00 00 00 00 00 00 00 00 00 00 00 00 FF FF 00 00 // ................
83
0070: 00 00 00 00 30 00 00 00 00 00 00 00 // ....0.......
84
85
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
86
Acked-by: Igor Mammedov <imammedo@redhat.com>
87
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
88
Message-id: 20210910143951.92242-10-shashi.mallela@linaro.org
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Message-id: 20220708151540.18136-45-richard.henderson@linaro.org
89
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
90
---
9
---
91
tests/qtest/bios-tables-test-allowed-diff.h | 4 ----
10
target/arm/cpu.c | 11 +++++++++++
92
tests/data/acpi/virt/IORT | Bin 0 -> 124 bytes
11
1 file changed, 11 insertions(+)
93
tests/data/acpi/virt/IORT.memhp | Bin 0 -> 124 bytes
94
tests/data/acpi/virt/IORT.numamem | Bin 0 -> 124 bytes
95
tests/data/acpi/virt/IORT.pxb | Bin 0 -> 124 bytes
96
5 files changed, 4 deletions(-)
97
12
98
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
13
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
99
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
100
--- a/tests/qtest/bios-tables-test-allowed-diff.h
15
--- a/target/arm/cpu.c
101
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
16
+++ b/target/arm/cpu.c
102
@@ -1,5 +1 @@
17
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev)
103
/* List of comma-separated changed AML files to ignore */
18
CPACR_EL1, ZEN, 3);
104
-"tests/data/acpi/virt/IORT",
19
env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
105
-"tests/data/acpi/virt/IORT.memhp",
20
}
106
-"tests/data/acpi/virt/IORT.numamem",
21
+ /* and for SME instructions, with default vector length, and TPIDR2 */
107
-"tests/data/acpi/virt/IORT.pxb",
22
+ if (cpu_isar_feature(aa64_sme, cpu)) {
108
diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
23
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
109
index XXXXXXX..XXXXXXX 100644
24
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
110
GIT binary patch
25
+ CPACR_EL1, SMEN, 3);
111
literal 124
26
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
112
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
27
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
113
QRGb+i3L*dhhtM#y0PN=p0RR91
28
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
114
29
+ SMCR, FA64, 1);
115
literal 0
30
+ }
116
HcmV?d00001
31
+ }
117
32
/*
118
diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
33
* Enable 48-bit address space (TODO: take reserved_va into account).
119
index XXXXXXX..XXXXXXX 100644
34
* Enable TBI0 but not TBI1.
120
GIT binary patch
121
literal 124
122
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
123
QRGb+i3L*dhhtM#y0PN=p0RR91
124
125
literal 0
126
HcmV?d00001
127
128
diff --git a/tests/data/acpi/virt/IORT.numamem b/tests/data/acpi/virt/IORT.numamem
129
index XXXXXXX..XXXXXXX 100644
130
GIT binary patch
131
literal 124
132
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
133
QRGb+i3L*dhhtM#y0PN=p0RR91
134
135
literal 0
136
HcmV?d00001
137
138
diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb
139
index XXXXXXX..XXXXXXX 100644
140
GIT binary patch
141
literal 124
142
zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
143
QRGb+i3L*dhhtM#y0PN=p0RR91
144
145
literal 0
146
HcmV?d00001
147
148
--
35
--
149
2.20.1
36
2.25.1
150
151
diff view generated by jsdifflib
1
From: Shashi Mallela <shashi.mallela@linaro.org>
1
From: Richard Henderson <richard.henderson@linaro.org>
2
2
3
Added expected IORT files applicable with latest GICv3
4
ITS changes.Temporarily differences in these files are
5
okay.
6
7
Signed-off-by: Shashi Mallela <shashi.mallela@linaro.org>
8
Acked-by: Igor Mammedov <imammedo@redhat.com>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Message-id: 20210910143951.92242-8-shashi.mallela@linaro.org
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-id: 20220708151540.18136-46-richard.henderson@linaro.org
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
7
---
13
tests/qtest/bios-tables-test-allowed-diff.h | 4 ++++
8
linux-user/elfload.c | 20 ++++++++++++++++++++
14
tests/data/acpi/virt/IORT | 0
9
1 file changed, 20 insertions(+)
15
tests/data/acpi/virt/IORT.memhp | 0
16
tests/data/acpi/virt/IORT.numamem | 0
17
tests/data/acpi/virt/IORT.pxb | 0
18
5 files changed, 4 insertions(+)
19
create mode 100644 tests/data/acpi/virt/IORT
20
create mode 100644 tests/data/acpi/virt/IORT.memhp
21
create mode 100644 tests/data/acpi/virt/IORT.numamem
22
create mode 100644 tests/data/acpi/virt/IORT.pxb
23
10
24
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
25
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
26
--- a/tests/qtest/bios-tables-test-allowed-diff.h
13
--- a/linux-user/elfload.c
27
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
14
+++ b/linux-user/elfload.c
28
@@ -1 +1,5 @@
15
@@ -XXX,XX +XXX,XX @@ enum {
29
/* List of comma-separated changed AML files to ignore */
16
ARM_HWCAP2_A64_RNG = 1 << 16,
30
+"tests/data/acpi/virt/IORT",
17
ARM_HWCAP2_A64_BTI = 1 << 17,
31
+"tests/data/acpi/virt/IORT.memhp",
18
ARM_HWCAP2_A64_MTE = 1 << 18,
32
+"tests/data/acpi/virt/IORT.numamem",
19
+ ARM_HWCAP2_A64_ECV = 1 << 19,
33
+"tests/data/acpi/virt/IORT.pxb",
20
+ ARM_HWCAP2_A64_AFP = 1 << 20,
34
diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
21
+ ARM_HWCAP2_A64_RPRES = 1 << 21,
35
new file mode 100644
22
+ ARM_HWCAP2_A64_MTE3 = 1 << 22,
36
index XXXXXXX..XXXXXXX
23
+ ARM_HWCAP2_A64_SME = 1 << 23,
37
diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
24
+ ARM_HWCAP2_A64_SME_I16I64 = 1 << 24,
38
new file mode 100644
25
+ ARM_HWCAP2_A64_SME_F64F64 = 1 << 25,
39
index XXXXXXX..XXXXXXX
26
+ ARM_HWCAP2_A64_SME_I8I32 = 1 << 26,
40
diff --git a/tests/data/acpi/virt/IORT.numamem b/tests/data/acpi/virt/IORT.numamem
27
+ ARM_HWCAP2_A64_SME_F16F32 = 1 << 27,
41
new file mode 100644
28
+ ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
42
index XXXXXXX..XXXXXXX
29
+ ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
43
diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb
30
+ ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
44
new file mode 100644
31
};
45
index XXXXXXX..XXXXXXX
32
33
#define ELF_HWCAP get_elf_hwcap()
34
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void)
35
GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
36
GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
37
GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
38
+ GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
39
+ ARM_HWCAP2_A64_SME_F32F32 |
40
+ ARM_HWCAP2_A64_SME_B16F32 |
41
+ ARM_HWCAP2_A64_SME_F16F32 |
42
+ ARM_HWCAP2_A64_SME_I8I32));
43
+ GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
44
+ GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
45
+ GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
46
47
return hwcaps;
48
}
46
--
49
--
47
2.20.1
50
2.25.1
48
49
diff view generated by jsdifflib